@inproceedings{park-caragea-2020-scientific,
title = "Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning",
author = "Park, Seoyeon and
Caragea, Cornelia",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.472/",
doi = "10.18653/v1/2020.coling-main.472",
pages = "5409--5419",
abstract = "Scientific keyphrase identification and classification is the task of detecting and classifying keyphrases from scholarly text with their types from a set of predefined classes. This task has a wide range of benefits, but it is still challenging in performance due to the lack of large amounts of labeled data required for training deep neural models. In order to overcome this challenge, we explore pre-trained language models BERT and SciBERT with intermediate task transfer learning, using 42 data-rich related intermediate-target task combinations. We reveal that intermediate task transfer learning on SciBERT induces a better starting point for target task fine-tuning compared with BERT and achieves competitive performance in scientific keyphrase identification and classification compared to both previous works and strong baselines. Interestingly, we observe that BERT with intermediate task transfer learning fails to improve the performance of scientific keyphrase identification and classification potentially due to significant catastrophic forgetting. This result highlights that scientific knowledge achieved during the pre-training of language models on large scientific collections plays an important role in the target tasks. We also observe that sequence tagging related intermediate tasks, especially syntactic structure learning tasks such as POS Tagging, tend to work best for scientific keyphrase identification and classification."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-caragea-2020-scientific">
<titleInfo>
<title>Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seoyeon</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cornelia</namePart>
<namePart type="family">Caragea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scientific keyphrase identification and classification is the task of detecting and classifying keyphrases from scholarly text with their types from a set of predefined classes. This task has a wide range of benefits, but it is still challenging in performance due to the lack of large amounts of labeled data required for training deep neural models. In order to overcome this challenge, we explore pre-trained language models BERT and SciBERT with intermediate task transfer learning, using 42 data-rich related intermediate-target task combinations. We reveal that intermediate task transfer learning on SciBERT induces a better starting point for target task fine-tuning compared with BERT and achieves competitive performance in scientific keyphrase identification and classification compared to both previous works and strong baselines. Interestingly, we observe that BERT with intermediate task transfer learning fails to improve the performance of scientific keyphrase identification and classification potentially due to significant catastrophic forgetting. This result highlights that scientific knowledge achieved during the pre-training of language models on large scientific collections plays an important role in the target tasks. We also observe that sequence tagging related intermediate tasks, especially syntactic structure learning tasks such as POS Tagging, tend to work best for scientific keyphrase identification and classification.</abstract>
<identifier type="citekey">park-caragea-2020-scientific</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.472</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.472/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>5409</start>
<end>5419</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning
%A Park, Seoyeon
%A Caragea, Cornelia
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F park-caragea-2020-scientific
%X Scientific keyphrase identification and classification is the task of detecting and classifying keyphrases from scholarly text with their types from a set of predefined classes. This task has a wide range of benefits, but it is still challenging in performance due to the lack of large amounts of labeled data required for training deep neural models. In order to overcome this challenge, we explore pre-trained language models BERT and SciBERT with intermediate task transfer learning, using 42 data-rich related intermediate-target task combinations. We reveal that intermediate task transfer learning on SciBERT induces a better starting point for target task fine-tuning compared with BERT and achieves competitive performance in scientific keyphrase identification and classification compared to both previous works and strong baselines. Interestingly, we observe that BERT with intermediate task transfer learning fails to improve the performance of scientific keyphrase identification and classification potentially due to significant catastrophic forgetting. This result highlights that scientific knowledge achieved during the pre-training of language models on large scientific collections plays an important role in the target tasks. We also observe that sequence tagging related intermediate tasks, especially syntactic structure learning tasks such as POS Tagging, tend to work best for scientific keyphrase identification and classification.
%R 10.18653/v1/2020.coling-main.472
%U https://aclanthology.org/2020.coling-main.472/
%U https://doi.org/10.18653/v1/2020.coling-main.472
%P 5409-5419
Markdown (Informal)
[Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning](https://aclanthology.org/2020.coling-main.472/) (Park & Caragea, COLING 2020)
- Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning (Park & Caragea, COLING 2020)
ACL
- Seoyeon Park and Cornelia Caragea. 2020. Scientific Keyphrase Identification and Classification by Pre-Trained Language Models Intermediate Task Transfer Learning. In Proceedings of the 28th International Conference on Computational Linguistics, pages 5409–5419, Barcelona, Spain (Online). International Committee on Computational Linguistics.