@inproceedings{iruskieta-braud-2019-eusdisparser,
title = "{E}us{D}is{P}arser: improving an under-resourced discourse parser with cross-lingual data",
author = "Iruskieta, Mikel and
Braud, Chlo{\'e}",
editor = "Zeldes, Amir and
Das, Debopam and
Galani, Erick Maziero and
Antonio, Juliano Desiderato and
Iruskieta, Mikel",
booktitle = "Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019",
month = jun,
year = "2019",
address = "Minneapolis, MN",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2709",
doi = "10.18653/v1/W19-2709",
pages = "62--71",
abstract = "Development of discourse parsers to annotate the relational discourse structure of a text is crucial for many downstream tasks. However, most of the existing work focuses on English, assuming a quite large dataset. Discourse data have been annotated for Basque, but training a system on these data is challenging since the corpus is very small. In this paper, we create the first demonstrator based on RST for Basque, and we investigate the use of data in another language to improve the performance of a Basque discourse parser. More precisely, we build a monolingual system using the small set of data available and investigate the use of multilingual word embeddings to train a system for Basque using data annotated for another language. We found that our approach to building a system limited to the small set of data available for Basque allowed us to get an improvement over previous approaches making use of many data annotated in other languages. At best, we get 34.78 in F1 for the full discourse structure. More data annotation is necessary in order to improve the results obtained with these techniques. We also describe which relations match with the gold standard, in order to understand these results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="iruskieta-braud-2019-eusdisparser">
<titleInfo>
<title>EusDisParser: improving an under-resourced discourse parser with cross-lingual data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Iruskieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chloé</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debopam</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erick</namePart>
<namePart type="given">Maziero</namePart>
<namePart type="family">Galani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juliano</namePart>
<namePart type="given">Desiderato</namePart>
<namePart type="family">Antonio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Iruskieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, MN</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Development of discourse parsers to annotate the relational discourse structure of a text is crucial for many downstream tasks. However, most of the existing work focuses on English, assuming a quite large dataset. Discourse data have been annotated for Basque, but training a system on these data is challenging since the corpus is very small. In this paper, we create the first demonstrator based on RST for Basque, and we investigate the use of data in another language to improve the performance of a Basque discourse parser. More precisely, we build a monolingual system using the small set of data available and investigate the use of multilingual word embeddings to train a system for Basque using data annotated for another language. We found that our approach to building a system limited to the small set of data available for Basque allowed us to get an improvement over previous approaches making use of many data annotated in other languages. At best, we get 34.78 in F1 for the full discourse structure. More data annotation is necessary in order to improve the results obtained with these techniques. We also describe which relations match with the gold standard, in order to understand these results.</abstract>
<identifier type="citekey">iruskieta-braud-2019-eusdisparser</identifier>
<identifier type="doi">10.18653/v1/W19-2709</identifier>
<location>
<url>https://aclanthology.org/W19-2709</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>62</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EusDisParser: improving an under-resourced discourse parser with cross-lingual data
%A Iruskieta, Mikel
%A Braud, Chloé
%Y Zeldes, Amir
%Y Das, Debopam
%Y Galani, Erick Maziero
%Y Antonio, Juliano Desiderato
%Y Iruskieta, Mikel
%S Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, MN
%F iruskieta-braud-2019-eusdisparser
%X Development of discourse parsers to annotate the relational discourse structure of a text is crucial for many downstream tasks. However, most of the existing work focuses on English, assuming a quite large dataset. Discourse data have been annotated for Basque, but training a system on these data is challenging since the corpus is very small. In this paper, we create the first demonstrator based on RST for Basque, and we investigate the use of data in another language to improve the performance of a Basque discourse parser. More precisely, we build a monolingual system using the small set of data available and investigate the use of multilingual word embeddings to train a system for Basque using data annotated for another language. We found that our approach to building a system limited to the small set of data available for Basque allowed us to get an improvement over previous approaches making use of many data annotated in other languages. At best, we get 34.78 in F1 for the full discourse structure. More data annotation is necessary in order to improve the results obtained with these techniques. We also describe which relations match with the gold standard, in order to understand these results.
%R 10.18653/v1/W19-2709
%U https://aclanthology.org/W19-2709
%U https://doi.org/10.18653/v1/W19-2709
%P 62-71
Markdown (Informal)
[EusDisParser: improving an under-resourced discourse parser with cross-lingual data](https://aclanthology.org/W19-2709) (Iruskieta & Braud, NAACL 2019)
ACL