@inproceedings{klubicka-etal-2023-idioms,
title = "Idioms, Probing and Dangerous Things: Towards Structural Probing for Idiomaticity in Vector Space",
author = "Klubi{\v{c}}ka, Filip and
Nedumpozhimana, Vasudevan and
Kelleher, John",
editor = "Bhatia, Archna and
Evang, Kilian and
Garcia, Marcos and
Giouli, Voula and
Han, Lifeng and
Taslimipoor, Shiva",
booktitle = "Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.mwe-1.8",
doi = "10.18653/v1/2023.mwe-1.8",
pages = "45--57",
abstract = "The goal of this paper is to learn more about how idiomatic information is structurally encoded in embeddings, using a structural probing method. We repurpose an existing English verbal multi-word expression (MWE) dataset to suit the probing framework and perform a comparative probing study of static (GloVe) and contextual (BERT) embeddings. Our experiments indicate that both encode some idiomatic information to varying degrees, but yield conflicting evidence as to whether idiomaticity is encoded in the vector norm, leaving this an open question. We also identify some limitations of the used dataset and highlight important directions for future work in improving its suitability for a probing analysis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="klubicka-etal-2023-idioms">
<titleInfo>
<title>Idioms, Probing and Dangerous Things: Towards Structural Probing for Idiomaticity in Vector Space</title>
</titleInfo>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Klubička</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudevan</namePart>
<namePart type="family">Nedumpozhimana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Kelleher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archna</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kilian</namePart>
<namePart type="family">Evang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Voula</namePart>
<namePart type="family">Giouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Taslimipoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The goal of this paper is to learn more about how idiomatic information is structurally encoded in embeddings, using a structural probing method. We repurpose an existing English verbal multi-word expression (MWE) dataset to suit the probing framework and perform a comparative probing study of static (GloVe) and contextual (BERT) embeddings. Our experiments indicate that both encode some idiomatic information to varying degrees, but yield conflicting evidence as to whether idiomaticity is encoded in the vector norm, leaving this an open question. We also identify some limitations of the used dataset and highlight important directions for future work in improving its suitability for a probing analysis.</abstract>
<identifier type="citekey">klubicka-etal-2023-idioms</identifier>
<identifier type="doi">10.18653/v1/2023.mwe-1.8</identifier>
<location>
<url>https://aclanthology.org/2023.mwe-1.8</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>45</start>
<end>57</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Idioms, Probing and Dangerous Things: Towards Structural Probing for Idiomaticity in Vector Space
%A Klubička, Filip
%A Nedumpozhimana, Vasudevan
%A Kelleher, John
%Y Bhatia, Archna
%Y Evang, Kilian
%Y Garcia, Marcos
%Y Giouli, Voula
%Y Han, Lifeng
%Y Taslimipoor, Shiva
%S Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F klubicka-etal-2023-idioms
%X The goal of this paper is to learn more about how idiomatic information is structurally encoded in embeddings, using a structural probing method. We repurpose an existing English verbal multi-word expression (MWE) dataset to suit the probing framework and perform a comparative probing study of static (GloVe) and contextual (BERT) embeddings. Our experiments indicate that both encode some idiomatic information to varying degrees, but yield conflicting evidence as to whether idiomaticity is encoded in the vector norm, leaving this an open question. We also identify some limitations of the used dataset and highlight important directions for future work in improving its suitability for a probing analysis.
%R 10.18653/v1/2023.mwe-1.8
%U https://aclanthology.org/2023.mwe-1.8
%U https://doi.org/10.18653/v1/2023.mwe-1.8
%P 45-57
Markdown (Informal)
[Idioms, Probing and Dangerous Things: Towards Structural Probing for Idiomaticity in Vector Space](https://aclanthology.org/2023.mwe-1.8) (Klubička et al., MWE 2023)
ACL