@inproceedings{torres-aguilar-stutzmann-2021-named,
title = "Named Entity Recognition for {F}rench medieval charters",
author = "Torres Aguilar, Sergio and
Stutzmann, Dominique",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Alnajjar, Khalid and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the Workshop on Natural Language Processing for Digital Humanities",
month = dec,
year = "2021",
address = "NIT Silchar, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2021.nlp4dh-1.5",
pages = "37--46",
abstract = "This paper presents the process of annotating and modelling a corpus to automatically detect named entities in medieval charters in French. It introduces a new annotated corpus and a new system which outperforms state-of-the art libraries. Charters are legal documents and among the most important historical sources for medieval studies as they reflect economic and social dynamics as well as the evolution of literacy and writing practices. Automatic detection of named entities greatly improves the access to these unstructured texts and facilitates historical research. The experiments described here are based on a corpus encompassing about 500k words (1200 charters) coming from three charter collections of the 13th and 14th centuries. We annotated the corpus and then trained two state-of-the art NLP libraries for Named Entity Recognition (Spacy and Flair) and a custom neural model (Bi-LSTM-CRF). The evaluation shows that all three models achieve a high performance rate on the test set and a high generalization capacity against two external corpora unseen during training. This paper describes the corpus and the annotation model, and discusses the issues related to the linguistic processing of medieval French and formulaic discourse, so as to interpret the results within a larger historical perspective.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="torres-aguilar-stutzmann-2021-named">
<titleInfo>
<title>Named Entity Recognition for French medieval charters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Torres Aguilar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominique</namePart>
<namePart type="family">Stutzmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">NIT Silchar, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the process of annotating and modelling a corpus to automatically detect named entities in medieval charters in French. It introduces a new annotated corpus and a new system which outperforms state-of-the art libraries. Charters are legal documents and among the most important historical sources for medieval studies as they reflect economic and social dynamics as well as the evolution of literacy and writing practices. Automatic detection of named entities greatly improves the access to these unstructured texts and facilitates historical research. The experiments described here are based on a corpus encompassing about 500k words (1200 charters) coming from three charter collections of the 13th and 14th centuries. We annotated the corpus and then trained two state-of-the art NLP libraries for Named Entity Recognition (Spacy and Flair) and a custom neural model (Bi-LSTM-CRF). The evaluation shows that all three models achieve a high performance rate on the test set and a high generalization capacity against two external corpora unseen during training. This paper describes the corpus and the annotation model, and discusses the issues related to the linguistic processing of medieval French and formulaic discourse, so as to interpret the results within a larger historical perspective.</abstract>
<identifier type="citekey">torres-aguilar-stutzmann-2021-named</identifier>
<location>
<url>https://aclanthology.org/2021.nlp4dh-1.5</url>
</location>
<part>
<date>2021-12</date>
<extent unit="page">
<start>37</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Named Entity Recognition for French medieval charters
%A Torres Aguilar, Sergio
%A Stutzmann, Dominique
%Y Hämäläinen, Mika
%Y Alnajjar, Khalid
%Y Partanen, Niko
%Y Rueter, Jack
%S Proceedings of the Workshop on Natural Language Processing for Digital Humanities
%D 2021
%8 December
%I NLP Association of India (NLPAI)
%C NIT Silchar, India
%F torres-aguilar-stutzmann-2021-named
%X This paper presents the process of annotating and modelling a corpus to automatically detect named entities in medieval charters in French. It introduces a new annotated corpus and a new system which outperforms state-of-the art libraries. Charters are legal documents and among the most important historical sources for medieval studies as they reflect economic and social dynamics as well as the evolution of literacy and writing practices. Automatic detection of named entities greatly improves the access to these unstructured texts and facilitates historical research. The experiments described here are based on a corpus encompassing about 500k words (1200 charters) coming from three charter collections of the 13th and 14th centuries. We annotated the corpus and then trained two state-of-the art NLP libraries for Named Entity Recognition (Spacy and Flair) and a custom neural model (Bi-LSTM-CRF). The evaluation shows that all three models achieve a high performance rate on the test set and a high generalization capacity against two external corpora unseen during training. This paper describes the corpus and the annotation model, and discusses the issues related to the linguistic processing of medieval French and formulaic discourse, so as to interpret the results within a larger historical perspective.
%U https://aclanthology.org/2021.nlp4dh-1.5
%P 37-46
Markdown (Informal)
[Named Entity Recognition for French medieval charters](https://aclanthology.org/2021.nlp4dh-1.5) (Torres Aguilar & Stutzmann, NLP4DH 2021)
ACL