@inproceedings{rangel-etal-2018-cross,
title = "Cross-corpus Native Language Identification via Statistical Embedding",
author = "Rangel, Francisco and
Rosso, Paolo and
Brooke, Julian and
Uitdenbogerd, Alexandra",
editor = "Brooke, Julian and
Flekova, Lucie and
Koppel, Moshe and
Solorio, Thamar",
booktitle = "Proceedings of the Second Workshop on Stylistic Variation",
month = jun,
year = "2018",
address = "New Orleans",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1605",
doi = "10.18653/v1/W18-1605",
pages = "39--43",
abstract = "In this paper, we approach the task of native language identification in a realistic cross-corpus scenario where a model is trained with available data and has to predict the native language from data of a different corpus. The motivation behind this study is to investigate native language identification in the Australian academic scenario where a majority of students come from China, Indonesia, and Arabic-speaking nations. We have proposed a statistical embedding representation reporting a significant improvement over common single-layer approaches of the state of the art, identifying Chinese, Arabic, and Indonesian in a cross-corpus scenario. The proposed approach was shown to be competitive even when the data is scarce and imbalanced.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rangel-etal-2018-cross">
<titleInfo>
<title>Cross-corpus Native Language Identification via Statistical Embedding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Rangel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Rosso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Brooke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Uitdenbogerd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Stylistic Variation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Brooke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flekova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moshe</namePart>
<namePart type="family">Koppel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we approach the task of native language identification in a realistic cross-corpus scenario where a model is trained with available data and has to predict the native language from data of a different corpus. The motivation behind this study is to investigate native language identification in the Australian academic scenario where a majority of students come from China, Indonesia, and Arabic-speaking nations. We have proposed a statistical embedding representation reporting a significant improvement over common single-layer approaches of the state of the art, identifying Chinese, Arabic, and Indonesian in a cross-corpus scenario. The proposed approach was shown to be competitive even when the data is scarce and imbalanced.</abstract>
<identifier type="citekey">rangel-etal-2018-cross</identifier>
<identifier type="doi">10.18653/v1/W18-1605</identifier>
<location>
<url>https://aclanthology.org/W18-1605</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>39</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-corpus Native Language Identification via Statistical Embedding
%A Rangel, Francisco
%A Rosso, Paolo
%A Brooke, Julian
%A Uitdenbogerd, Alexandra
%Y Brooke, Julian
%Y Flekova, Lucie
%Y Koppel, Moshe
%Y Solorio, Thamar
%S Proceedings of the Second Workshop on Stylistic Variation
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans
%F rangel-etal-2018-cross
%X In this paper, we approach the task of native language identification in a realistic cross-corpus scenario where a model is trained with available data and has to predict the native language from data of a different corpus. The motivation behind this study is to investigate native language identification in the Australian academic scenario where a majority of students come from China, Indonesia, and Arabic-speaking nations. We have proposed a statistical embedding representation reporting a significant improvement over common single-layer approaches of the state of the art, identifying Chinese, Arabic, and Indonesian in a cross-corpus scenario. The proposed approach was shown to be competitive even when the data is scarce and imbalanced.
%R 10.18653/v1/W18-1605
%U https://aclanthology.org/W18-1605
%U https://doi.org/10.18653/v1/W18-1605
%P 39-43
Markdown (Informal)
[Cross-corpus Native Language Identification via Statistical Embedding](https://aclanthology.org/W18-1605) (Rangel et al., Style-Var 2018)
ACL