@inproceedings{khayrallah-etal-2018-regularized,
title = "Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation",
author = "Khayrallah, Huda and
Thompson, Brian and
Duh, Kevin and
Koehn, Philipp",
editor = "Birch, Alexandra and
Finch, Andrew and
Luong, Thang and
Neubig, Graham and
Oda, Yusuke",
booktitle = "Proceedings of the 2nd Workshop on Neural Machine Translation and Generation",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-2705/",
doi = "10.18653/v1/W18-2705",
pages = "36--44",
abstract = "Supervised domain adaptation{---}where a large generic corpus and a smaller in-domain corpus are both available for training{---}is a challenge for neural machine translation (NMT). Standard practice is to train a generic model and use it to initialize a second model, then continue training the second model on in-domain data to produce an in-domain model. We add an auxiliary term to the training objective during continued training that minimizes the cross entropy between the in-domain model`s output word distribution and that of the out-of-domain model to prevent the model`s output from differing too much from the original out-of-domain model. We perform experiments on EMEA (descriptions of medicines) and TED (rehearsed presentations), initialized from a general domain (WMT) model. Our method shows improvements over standard continued training by up to 1.5 BLEU."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khayrallah-etal-2018-regularized">
<titleInfo>
<title>Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Huda</namePart>
<namePart type="family">Khayrallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Neural Machine Translation and Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Oda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Supervised domain adaptation—where a large generic corpus and a smaller in-domain corpus are both available for training—is a challenge for neural machine translation (NMT). Standard practice is to train a generic model and use it to initialize a second model, then continue training the second model on in-domain data to produce an in-domain model. We add an auxiliary term to the training objective during continued training that minimizes the cross entropy between the in-domain model‘s output word distribution and that of the out-of-domain model to prevent the model‘s output from differing too much from the original out-of-domain model. We perform experiments on EMEA (descriptions of medicines) and TED (rehearsed presentations), initialized from a general domain (WMT) model. Our method shows improvements over standard continued training by up to 1.5 BLEU.</abstract>
<identifier type="citekey">khayrallah-etal-2018-regularized</identifier>
<identifier type="doi">10.18653/v1/W18-2705</identifier>
<location>
<url>https://aclanthology.org/W18-2705/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>36</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation
%A Khayrallah, Huda
%A Thompson, Brian
%A Duh, Kevin
%A Koehn, Philipp
%Y Birch, Alexandra
%Y Finch, Andrew
%Y Luong, Thang
%Y Neubig, Graham
%Y Oda, Yusuke
%S Proceedings of the 2nd Workshop on Neural Machine Translation and Generation
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F khayrallah-etal-2018-regularized
%X Supervised domain adaptation—where a large generic corpus and a smaller in-domain corpus are both available for training—is a challenge for neural machine translation (NMT). Standard practice is to train a generic model and use it to initialize a second model, then continue training the second model on in-domain data to produce an in-domain model. We add an auxiliary term to the training objective during continued training that minimizes the cross entropy between the in-domain model‘s output word distribution and that of the out-of-domain model to prevent the model‘s output from differing too much from the original out-of-domain model. We perform experiments on EMEA (descriptions of medicines) and TED (rehearsed presentations), initialized from a general domain (WMT) model. Our method shows improvements over standard continued training by up to 1.5 BLEU.
%R 10.18653/v1/W18-2705
%U https://aclanthology.org/W18-2705/
%U https://doi.org/10.18653/v1/W18-2705
%P 36-44
Markdown (Informal)
[Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation](https://aclanthology.org/W18-2705/) (Khayrallah et al., NGT 2018)
- Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation (Khayrallah et al., NGT 2018)
ACL
- Huda Khayrallah, Brian Thompson, Kevin Duh, and Philipp Koehn. 2018. Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation. In Proceedings of the 2nd Workshop on Neural Machine Translation and Generation, pages 36–44, Melbourne, Australia. Association for Computational Linguistics.