@inproceedings{shakeri-etal-2020-end,
title = "End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems",
author = "Shakeri, Siamak and
Nogueira dos Santos, Cicero and
Zhu, Henghui and
Ng, Patrick and
Nan, Feng and
Wang, Zhiguo and
Nallapati, Ramesh and
Xiang, Bing",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.439/",
doi = "10.18653/v1/2020.emnlp-main.439",
pages = "5445--5460",
abstract = "We propose an end-to-end approach for synthetic QA data generation. Our model comprises a single transformer-based encoder-decoder network that is trained end-to-end to generate both answers and questions. In a nutshell, we feed a passage to the encoder and ask the decoder to generate a question and an answer token-by-token. The likelihood produced in the generation process is used as a filtering score, which avoids the need for a separate filtering model. Our generator is trained by fine-tuning a pretrained LM using maximum likelihood estimation. The experimental results indicate significant improvements in the domain adaptation of QA models outperforming current state-of-the-art methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shakeri-etal-2020-end">
<titleInfo>
<title>End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siamak</namePart>
<namePart type="family">Shakeri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cicero</namePart>
<namePart type="family">Nogueira dos Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henghui</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Feng</namePart>
<namePart type="family">Nan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiguo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramesh</namePart>
<namePart type="family">Nallapati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose an end-to-end approach for synthetic QA data generation. Our model comprises a single transformer-based encoder-decoder network that is trained end-to-end to generate both answers and questions. In a nutshell, we feed a passage to the encoder and ask the decoder to generate a question and an answer token-by-token. The likelihood produced in the generation process is used as a filtering score, which avoids the need for a separate filtering model. Our generator is trained by fine-tuning a pretrained LM using maximum likelihood estimation. The experimental results indicate significant improvements in the domain adaptation of QA models outperforming current state-of-the-art methods.</abstract>
<identifier type="citekey">shakeri-etal-2020-end</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.439</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.439/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>5445</start>
<end>5460</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems
%A Shakeri, Siamak
%A Nogueira dos Santos, Cicero
%A Zhu, Henghui
%A Ng, Patrick
%A Nan, Feng
%A Wang, Zhiguo
%A Nallapati, Ramesh
%A Xiang, Bing
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F shakeri-etal-2020-end
%X We propose an end-to-end approach for synthetic QA data generation. Our model comprises a single transformer-based encoder-decoder network that is trained end-to-end to generate both answers and questions. In a nutshell, we feed a passage to the encoder and ask the decoder to generate a question and an answer token-by-token. The likelihood produced in the generation process is used as a filtering score, which avoids the need for a separate filtering model. Our generator is trained by fine-tuning a pretrained LM using maximum likelihood estimation. The experimental results indicate significant improvements in the domain adaptation of QA models outperforming current state-of-the-art methods.
%R 10.18653/v1/2020.emnlp-main.439
%U https://aclanthology.org/2020.emnlp-main.439/
%U https://doi.org/10.18653/v1/2020.emnlp-main.439
%P 5445-5460
Markdown (Informal)
[End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems](https://aclanthology.org/2020.emnlp-main.439/) (Shakeri et al., EMNLP 2020)
- End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems (Shakeri et al., EMNLP 2020)
ACL
- Siamak Shakeri, Cicero Nogueira dos Santos, Henghui Zhu, Patrick Ng, Feng Nan, Zhiguo Wang, Ramesh Nallapati, and Bing Xiang. 2020. End-to-End Synthetic Data Generation for Domain Adaptation of Question Answering Systems. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 5445–5460, Online. Association for Computational Linguistics.