@inproceedings{dua-etal-2019-comprehensive,
title = "Comprehensive Multi-Dataset Evaluation of Reading Comprehension",
author = "Dua, Dheeru and
Gottumukkala, Ananth and
Talmor, Alon and
Singh, Sameer and
Gardner, Matt",
editor = "Fisch, Adam and
Talmor, Alon and
Jia, Robin and
Seo, Minjoon and
Choi, Eunsol and
Chen, Danqi",
booktitle = "Proceedings of the 2nd Workshop on Machine Reading for Question Answering",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5820",
doi = "10.18653/v1/D19-5820",
pages = "147--153",
abstract = "Reading comprehension is one of the crucial tasks for furthering research in natural language understanding. A lot of diverse reading comprehension datasets have recently been introduced to study various phenomena in natural language, ranging from simple paraphrase matching and entity typing to entity tracking and understanding the implications of the context. Given the availability of many such datasets, comprehensive and reliable evaluation is tedious and time-consuming for researchers working on this problem. We present an evaluation server, ORB, that reports performance on seven diverse reading comprehension datasets, encouraging and facilitating testing a single model{'}s capability in understanding a wide variety of reading phenomena. The evaluation server places no restrictions on how models are trained, so it is a suitable test bed for exploring training paradigms and representation learning for general reading facility. As more suitable datasets are released, they will be added to the evaluation server. We also collect and include synthetic augmentations for these datasets, testing how well models can handle out-of-domain questions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dua-etal-2019-comprehensive">
<titleInfo>
<title>Comprehensive Multi-Dataset Evaluation of Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dheeru</namePart>
<namePart type="family">Dua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ananth</namePart>
<namePart type="family">Gottumukkala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Talmor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matt</namePart>
<namePart type="family">Gardner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Machine Reading for Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Fisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Talmor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minjoon</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Reading comprehension is one of the crucial tasks for furthering research in natural language understanding. A lot of diverse reading comprehension datasets have recently been introduced to study various phenomena in natural language, ranging from simple paraphrase matching and entity typing to entity tracking and understanding the implications of the context. Given the availability of many such datasets, comprehensive and reliable evaluation is tedious and time-consuming for researchers working on this problem. We present an evaluation server, ORB, that reports performance on seven diverse reading comprehension datasets, encouraging and facilitating testing a single model’s capability in understanding a wide variety of reading phenomena. The evaluation server places no restrictions on how models are trained, so it is a suitable test bed for exploring training paradigms and representation learning for general reading facility. As more suitable datasets are released, they will be added to the evaluation server. We also collect and include synthetic augmentations for these datasets, testing how well models can handle out-of-domain questions.</abstract>
<identifier type="citekey">dua-etal-2019-comprehensive</identifier>
<identifier type="doi">10.18653/v1/D19-5820</identifier>
<location>
<url>https://aclanthology.org/D19-5820</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>147</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comprehensive Multi-Dataset Evaluation of Reading Comprehension
%A Dua, Dheeru
%A Gottumukkala, Ananth
%A Talmor, Alon
%A Singh, Sameer
%A Gardner, Matt
%Y Fisch, Adam
%Y Talmor, Alon
%Y Jia, Robin
%Y Seo, Minjoon
%Y Choi, Eunsol
%Y Chen, Danqi
%S Proceedings of the 2nd Workshop on Machine Reading for Question Answering
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F dua-etal-2019-comprehensive
%X Reading comprehension is one of the crucial tasks for furthering research in natural language understanding. A lot of diverse reading comprehension datasets have recently been introduced to study various phenomena in natural language, ranging from simple paraphrase matching and entity typing to entity tracking and understanding the implications of the context. Given the availability of many such datasets, comprehensive and reliable evaluation is tedious and time-consuming for researchers working on this problem. We present an evaluation server, ORB, that reports performance on seven diverse reading comprehension datasets, encouraging and facilitating testing a single model’s capability in understanding a wide variety of reading phenomena. The evaluation server places no restrictions on how models are trained, so it is a suitable test bed for exploring training paradigms and representation learning for general reading facility. As more suitable datasets are released, they will be added to the evaluation server. We also collect and include synthetic augmentations for these datasets, testing how well models can handle out-of-domain questions.
%R 10.18653/v1/D19-5820
%U https://aclanthology.org/D19-5820
%U https://doi.org/10.18653/v1/D19-5820
%P 147-153
Markdown (Informal)
[Comprehensive Multi-Dataset Evaluation of Reading Comprehension](https://aclanthology.org/D19-5820) (Dua et al., 2019)
ACL