@inproceedings{asher-etal-2023-limits,
title = "Limits for learning with language models",
author = "Asher, Nicholas and
Bhar, Swarnadeep and
Chaturvedi, Akshay and
Hunter, Julie and
Paul, Soumya",
editor = "Palmer, Alexis and
Camacho-collados, Jose",
booktitle = "Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.starsem-1.22/",
doi = "10.18653/v1/2023.starsem-1.22",
pages = "236--248",
abstract = "With the advent of large language models (LLMs), the trend in NLP has been to train LLMs on vast amounts of data to solve diverse language understanding and generation tasks. The list of LLM successes is long and varied. Nevertheless, several recent papers provide empirical evidence that LLMs fail to capture important aspects of linguistic meaning. Focusing on universal quantification, we provide a theoretical foundation for these empirical findings by proving that LLMs cannot learn certain fundamental semantic properties including semantic entailment and consistency as they are defined in formal semantics. More generally, we show that LLMs are unable to learn concepts beyond the first level of the Borel Hierarchy, which imposes severe limits on the ability of LMs, both large and small, to capture many aspects of linguistic meaning. This means that LLMs will operate without formal guarantees on tasks that require entailments and deep linguistic understanding."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="asher-etal-2023-limits">
<titleInfo>
<title>Limits for learning with language models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Asher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swarnadeep</namePart>
<namePart type="family">Bhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Chaturvedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Hunter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soumya</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the advent of large language models (LLMs), the trend in NLP has been to train LLMs on vast amounts of data to solve diverse language understanding and generation tasks. The list of LLM successes is long and varied. Nevertheless, several recent papers provide empirical evidence that LLMs fail to capture important aspects of linguistic meaning. Focusing on universal quantification, we provide a theoretical foundation for these empirical findings by proving that LLMs cannot learn certain fundamental semantic properties including semantic entailment and consistency as they are defined in formal semantics. More generally, we show that LLMs are unable to learn concepts beyond the first level of the Borel Hierarchy, which imposes severe limits on the ability of LMs, both large and small, to capture many aspects of linguistic meaning. This means that LLMs will operate without formal guarantees on tasks that require entailments and deep linguistic understanding.</abstract>
<identifier type="citekey">asher-etal-2023-limits</identifier>
<identifier type="doi">10.18653/v1/2023.starsem-1.22</identifier>
<location>
<url>https://aclanthology.org/2023.starsem-1.22/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>236</start>
<end>248</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Limits for learning with language models
%A Asher, Nicholas
%A Bhar, Swarnadeep
%A Chaturvedi, Akshay
%A Hunter, Julie
%A Paul, Soumya
%Y Palmer, Alexis
%Y Camacho-collados, Jose
%S Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F asher-etal-2023-limits
%X With the advent of large language models (LLMs), the trend in NLP has been to train LLMs on vast amounts of data to solve diverse language understanding and generation tasks. The list of LLM successes is long and varied. Nevertheless, several recent papers provide empirical evidence that LLMs fail to capture important aspects of linguistic meaning. Focusing on universal quantification, we provide a theoretical foundation for these empirical findings by proving that LLMs cannot learn certain fundamental semantic properties including semantic entailment and consistency as they are defined in formal semantics. More generally, we show that LLMs are unable to learn concepts beyond the first level of the Borel Hierarchy, which imposes severe limits on the ability of LMs, both large and small, to capture many aspects of linguistic meaning. This means that LLMs will operate without formal guarantees on tasks that require entailments and deep linguistic understanding.
%R 10.18653/v1/2023.starsem-1.22
%U https://aclanthology.org/2023.starsem-1.22/
%U https://doi.org/10.18653/v1/2023.starsem-1.22
%P 236-248
Markdown (Informal)
[Limits for learning with language models](https://aclanthology.org/2023.starsem-1.22/) (Asher et al., *SEM 2023)
- Limits for learning with language models (Asher et al., *SEM 2023)
ACL
- Nicholas Asher, Swarnadeep Bhar, Akshay Chaturvedi, Julie Hunter, and Soumya Paul. 2023. Limits for learning with language models. In Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023), pages 236–248, Toronto, Canada. Association for Computational Linguistics.