@inproceedings{wu-etal-2024-multimodal,
title = "Multimodal Multi-loss Fusion Network for Sentiment Analysis",
author = "Wu, Zehui and
Gong, Ziwei and
Koo, Jaywon and
Hirschberg, Julia",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.197/",
doi = "10.18653/v1/2024.naacl-long.197",
pages = "3588--3602",
abstract = "This paper investigates the optimal selection and fusion of feature encoders across multiple modalities and combines these in one neural network to improve sentiment detection. We compare different fusion methods and examine the impact of multi-loss training within the multi-modality fusion network, identifying surprisingly important findings relating to subnet performance. We have also found that integrating context significantly enhances model performance. Our best model achieves state-of-the-art performance for three datasets (CMU-MOSI, CMU-MOSEI and CH-SIMS). These results suggest a roadmap toward an optimized feature selection and fusion approach for enhancing sentiment detection in neural networks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2024-multimodal">
<titleInfo>
<title>Multimodal Multi-loss Fusion Network for Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zehui</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziwei</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaywon</namePart>
<namePart type="family">Koo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hirschberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates the optimal selection and fusion of feature encoders across multiple modalities and combines these in one neural network to improve sentiment detection. We compare different fusion methods and examine the impact of multi-loss training within the multi-modality fusion network, identifying surprisingly important findings relating to subnet performance. We have also found that integrating context significantly enhances model performance. Our best model achieves state-of-the-art performance for three datasets (CMU-MOSI, CMU-MOSEI and CH-SIMS). These results suggest a roadmap toward an optimized feature selection and fusion approach for enhancing sentiment detection in neural networks.</abstract>
<identifier type="citekey">wu-etal-2024-multimodal</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.197</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.197/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3588</start>
<end>3602</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Multi-loss Fusion Network for Sentiment Analysis
%A Wu, Zehui
%A Gong, Ziwei
%A Koo, Jaywon
%A Hirschberg, Julia
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F wu-etal-2024-multimodal
%X This paper investigates the optimal selection and fusion of feature encoders across multiple modalities and combines these in one neural network to improve sentiment detection. We compare different fusion methods and examine the impact of multi-loss training within the multi-modality fusion network, identifying surprisingly important findings relating to subnet performance. We have also found that integrating context significantly enhances model performance. Our best model achieves state-of-the-art performance for three datasets (CMU-MOSI, CMU-MOSEI and CH-SIMS). These results suggest a roadmap toward an optimized feature selection and fusion approach for enhancing sentiment detection in neural networks.
%R 10.18653/v1/2024.naacl-long.197
%U https://aclanthology.org/2024.naacl-long.197/
%U https://doi.org/10.18653/v1/2024.naacl-long.197
%P 3588-3602
Markdown (Informal)
[Multimodal Multi-loss Fusion Network for Sentiment Analysis](https://aclanthology.org/2024.naacl-long.197/) (Wu et al., NAACL 2024)
- Multimodal Multi-loss Fusion Network for Sentiment Analysis (Wu et al., NAACL 2024)
ACL
- Zehui Wu, Ziwei Gong, Jaywon Koo, and Julia Hirschberg. 2024. Multimodal Multi-loss Fusion Network for Sentiment Analysis. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 3588–3602, Mexico City, Mexico. Association for Computational Linguistics.