@inproceedings{tsarfaty-etal-2019-whats,
title = "What{'}s Wrong with {H}ebrew {NLP}? And How to Make it Right",
author = "Tsarfaty, Reut and
Sadde, Shoval and
Klein, Stav and
Seker, Amit",
editor = "Pad{\'o}, Sebastian and
Huang, Ruihong",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-3044",
doi = "10.18653/v1/D19-3044",
pages = "259--264",
abstract = "For languages with simple morphology such as English, automatic annotation pipelines such as spaCy or Stanford{'}s CoreNLP successfully serve projects in academia and the industry. For many morphologically-rich languages (MRLs), similar pipelines show sub-optimal performance that limits their applicability for text analysis in research and the industry. The sub-optimal performance is mainly due to errors in early morphological disambiguation decisions, that cannot be recovered later on in the pipeline, yielding incoherent annotations on the whole. This paper describes the design and use of the ONLP suite, a joint morpho-syntactic infrastructure for processing Modern Hebrew texts. The joint inference over morphology and syntax substantially limits error propagation, and leads to high accuracy. ONLP provides rich and expressive annotations which already serve diverse academic and commercial needs. Its accompanying demo further serves educational activities, introducing Hebrew NLP intricacies to researchers and non-researchers alike.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tsarfaty-etal-2019-whats">
<titleInfo>
<title>What’s Wrong with Hebrew NLP? And How to Make it Right</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoval</namePart>
<namePart type="family">Sadde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stav</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Seker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Padó</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>For languages with simple morphology such as English, automatic annotation pipelines such as spaCy or Stanford’s CoreNLP successfully serve projects in academia and the industry. For many morphologically-rich languages (MRLs), similar pipelines show sub-optimal performance that limits their applicability for text analysis in research and the industry. The sub-optimal performance is mainly due to errors in early morphological disambiguation decisions, that cannot be recovered later on in the pipeline, yielding incoherent annotations on the whole. This paper describes the design and use of the ONLP suite, a joint morpho-syntactic infrastructure for processing Modern Hebrew texts. The joint inference over morphology and syntax substantially limits error propagation, and leads to high accuracy. ONLP provides rich and expressive annotations which already serve diverse academic and commercial needs. Its accompanying demo further serves educational activities, introducing Hebrew NLP intricacies to researchers and non-researchers alike.</abstract>
<identifier type="citekey">tsarfaty-etal-2019-whats</identifier>
<identifier type="doi">10.18653/v1/D19-3044</identifier>
<location>
<url>https://aclanthology.org/D19-3044</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>259</start>
<end>264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What’s Wrong with Hebrew NLP? And How to Make it Right
%A Tsarfaty, Reut
%A Sadde, Shoval
%A Klein, Stav
%A Seker, Amit
%Y Padó, Sebastian
%Y Huang, Ruihong
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F tsarfaty-etal-2019-whats
%X For languages with simple morphology such as English, automatic annotation pipelines such as spaCy or Stanford’s CoreNLP successfully serve projects in academia and the industry. For many morphologically-rich languages (MRLs), similar pipelines show sub-optimal performance that limits their applicability for text analysis in research and the industry. The sub-optimal performance is mainly due to errors in early morphological disambiguation decisions, that cannot be recovered later on in the pipeline, yielding incoherent annotations on the whole. This paper describes the design and use of the ONLP suite, a joint morpho-syntactic infrastructure for processing Modern Hebrew texts. The joint inference over morphology and syntax substantially limits error propagation, and leads to high accuracy. ONLP provides rich and expressive annotations which already serve diverse academic and commercial needs. Its accompanying demo further serves educational activities, introducing Hebrew NLP intricacies to researchers and non-researchers alike.
%R 10.18653/v1/D19-3044
%U https://aclanthology.org/D19-3044
%U https://doi.org/10.18653/v1/D19-3044
%P 259-264
Markdown (Informal)
[What’s Wrong with Hebrew NLP? And How to Make it Right](https://aclanthology.org/D19-3044) (Tsarfaty et al., EMNLP-IJCNLP 2019)
ACL
- Reut Tsarfaty, Shoval Sadde, Stav Klein, and Amit Seker. 2019. What’s Wrong with Hebrew NLP? And How to Make it Right. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations, pages 259–264, Hong Kong, China. Association for Computational Linguistics.