@inproceedings{bloem-etal-2019-evaluating,
title = "Evaluating the Consistency of Word Embeddings from Small Data",
author = "Bloem, Jelke and
Fokkens, Antske and
Herbelot, Aur{\'e}lie",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1016",
doi = "10.26615/978-954-452-056-4_016",
pages = "132--141",
abstract = "In this work, we address the evaluation of distributional semantic models trained on smaller, domain-specific texts, specifically, philosophical text. Specifically, we inspect the behaviour of models using a pre-trained background space in learning. We propose a measure of consistency which can be used as an evaluation metric when no in-domain gold-standard data is available. This measure simply computes the ability of a model to learn similar embeddings from different parts of some homogeneous data. We show that in spite of being a simple evaluation, consistency actually depends on various combinations of factors, including the nature of the data itself, the model used to train the semantic space, and the frequency of the learnt terms, both in the background space and in the in-domain data of interest.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bloem-etal-2019-evaluating">
<titleInfo>
<title>Evaluating the Consistency of Word Embeddings from Small Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jelke</namePart>
<namePart type="family">Bloem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antske</namePart>
<namePart type="family">Fokkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aurélie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we address the evaluation of distributional semantic models trained on smaller, domain-specific texts, specifically, philosophical text. Specifically, we inspect the behaviour of models using a pre-trained background space in learning. We propose a measure of consistency which can be used as an evaluation metric when no in-domain gold-standard data is available. This measure simply computes the ability of a model to learn similar embeddings from different parts of some homogeneous data. We show that in spite of being a simple evaluation, consistency actually depends on various combinations of factors, including the nature of the data itself, the model used to train the semantic space, and the frequency of the learnt terms, both in the background space and in the in-domain data of interest.</abstract>
<identifier type="citekey">bloem-etal-2019-evaluating</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_016</identifier>
<location>
<url>https://aclanthology.org/R19-1016</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>132</start>
<end>141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating the Consistency of Word Embeddings from Small Data
%A Bloem, Jelke
%A Fokkens, Antske
%A Herbelot, Aurélie
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F bloem-etal-2019-evaluating
%X In this work, we address the evaluation of distributional semantic models trained on smaller, domain-specific texts, specifically, philosophical text. Specifically, we inspect the behaviour of models using a pre-trained background space in learning. We propose a measure of consistency which can be used as an evaluation metric when no in-domain gold-standard data is available. This measure simply computes the ability of a model to learn similar embeddings from different parts of some homogeneous data. We show that in spite of being a simple evaluation, consistency actually depends on various combinations of factors, including the nature of the data itself, the model used to train the semantic space, and the frequency of the learnt terms, both in the background space and in the in-domain data of interest.
%R 10.26615/978-954-452-056-4_016
%U https://aclanthology.org/R19-1016
%U https://doi.org/10.26615/978-954-452-056-4_016
%P 132-141
Markdown (Informal)
[Evaluating the Consistency of Word Embeddings from Small Data](https://aclanthology.org/R19-1016) (Bloem et al., RANLP 2019)
ACL