@inproceedings{elliott-etal-2016-multimodal,
title = "Multimodal Learning and Reasoning",
author = "Elliott, Desmond and
Kiela, Douwe and
Lazaridou, Angeliki",
editor = "Birch, Alexandra and
Zuidema, Willem",
booktitle = "Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts",
month = aug,
year = "2016",
address = "Berlin, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P16-5001",
abstract = "Natural Language Processing has broadened in scope to tackle more and more challenging language understanding and reasoning tasks. The core NLP tasks remain predominantly unimodal, focusing on linguistic input, despite the fact that we, humans, acquire and use language while communicating in perceptually rich environments. Moving towards human-level AI will require the integration and modeling of multiple modalities beyond language. With this tutorial, our aim is to introduce researchers to the areas of NLP that have dealt with multimodal signals. The key advantage of using multimodal signals in NLP tasks is the complementarity of the data in different modalities. For example, we are less likely to nd descriptions of yellow bananas or wooden chairs in text corpora, but these visual attributes can be readily extracted directly from images. Multimodal signals, such as visual, auditory or olfactory data, have proven useful for models of word similarity and relatedness, automatic image and video description, and even predicting the associated smells of words. Finally, multimodality offers a practical opportunity to study and apply multitask learning, a general machine learning paradigm that improves generalization performance of a task by using training signals of other related tasks.All material associated to the tutorial will be available at \url{http://multimodalnlp.github.io/}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elliott-etal-2016-multimodal">
<titleInfo>
<title>Multimodal Learning and Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Desmond</namePart>
<namePart type="family">Elliott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douwe</namePart>
<namePart type="family">Kiela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Willem</namePart>
<namePart type="family">Zuidema</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Berlin, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural Language Processing has broadened in scope to tackle more and more challenging language understanding and reasoning tasks. The core NLP tasks remain predominantly unimodal, focusing on linguistic input, despite the fact that we, humans, acquire and use language while communicating in perceptually rich environments. Moving towards human-level AI will require the integration and modeling of multiple modalities beyond language. With this tutorial, our aim is to introduce researchers to the areas of NLP that have dealt with multimodal signals. The key advantage of using multimodal signals in NLP tasks is the complementarity of the data in different modalities. For example, we are less likely to nd descriptions of yellow bananas or wooden chairs in text corpora, but these visual attributes can be readily extracted directly from images. Multimodal signals, such as visual, auditory or olfactory data, have proven useful for models of word similarity and relatedness, automatic image and video description, and even predicting the associated smells of words. Finally, multimodality offers a practical opportunity to study and apply multitask learning, a general machine learning paradigm that improves generalization performance of a task by using training signals of other related tasks.All material associated to the tutorial will be available at http://multimodalnlp.github.io/</abstract>
<identifier type="citekey">elliott-etal-2016-multimodal</identifier>
<location>
<url>https://aclanthology.org/P16-5001</url>
</location>
<part>
<date>2016-08</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Learning and Reasoning
%A Elliott, Desmond
%A Kiela, Douwe
%A Lazaridou, Angeliki
%Y Birch, Alexandra
%Y Zuidema, Willem
%S Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts
%D 2016
%8 August
%I Association for Computational Linguistics
%C Berlin, Germany
%F elliott-etal-2016-multimodal
%X Natural Language Processing has broadened in scope to tackle more and more challenging language understanding and reasoning tasks. The core NLP tasks remain predominantly unimodal, focusing on linguistic input, despite the fact that we, humans, acquire and use language while communicating in perceptually rich environments. Moving towards human-level AI will require the integration and modeling of multiple modalities beyond language. With this tutorial, our aim is to introduce researchers to the areas of NLP that have dealt with multimodal signals. The key advantage of using multimodal signals in NLP tasks is the complementarity of the data in different modalities. For example, we are less likely to nd descriptions of yellow bananas or wooden chairs in text corpora, but these visual attributes can be readily extracted directly from images. Multimodal signals, such as visual, auditory or olfactory data, have proven useful for models of word similarity and relatedness, automatic image and video description, and even predicting the associated smells of words. Finally, multimodality offers a practical opportunity to study and apply multitask learning, a general machine learning paradigm that improves generalization performance of a task by using training signals of other related tasks.All material associated to the tutorial will be available at http://multimodalnlp.github.io/
%U https://aclanthology.org/P16-5001
Markdown (Informal)
[Multimodal Learning and Reasoning](https://aclanthology.org/P16-5001) (Elliott et al., ACL 2016)
ACL
- Desmond Elliott, Douwe Kiela, and Angeliki Lazaridou. 2016. Multimodal Learning and Reasoning. In Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics: Tutorial Abstracts, Berlin, Germany. Association for Computational Linguistics.