@inproceedings{durrett-etal-2017-identifying,
title = "Identifying Products in Online Cybercrime Marketplaces: A Dataset for Fine-grained Domain Adaptation",
author = "Durrett, Greg and
Kummerfeld, Jonathan K. and
Berg-Kirkpatrick, Taylor and
Portnoff, Rebecca and
Afroz, Sadia and
McCoy, Damon and
Levchenko, Kirill and
Paxson, Vern",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1275",
doi = "10.18653/v1/D17-1275",
pages = "2598--2607",
abstract = "One weakness of machine-learned NLP models is that they typically perform poorly on out-of-domain data. In this work, we study the task of identifying products being bought and sold in online cybercrime forums, which exhibits particularly challenging cross-domain effects. We formulate a task that represents a hybrid of slot-filling information extraction and named entity recognition and annotate data from four different forums. Each of these forums constitutes its own {``}fine-grained domain{''} in that the forums cover different market sectors with different properties, even though all forums are in the broad domain of cybercrime. We characterize these domain differences in the context of a learning-based system: supervised models see decreased accuracy when applied to new forums, and standard techniques for semi-supervised learning and domain adaptation have limited effectiveness on this data, which suggests the need to improve these techniques. We release a dataset of 1,938 annotated posts from across the four forums.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="durrett-etal-2017-identifying">
<titleInfo>
<title>Identifying Products in Online Cybercrime Marketplaces: A Dataset for Fine-grained Domain Adaptation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Kummerfeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taylor</namePart>
<namePart type="family">Berg-Kirkpatrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Portnoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadia</namePart>
<namePart type="family">Afroz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damon</namePart>
<namePart type="family">McCoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirill</namePart>
<namePart type="family">Levchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vern</namePart>
<namePart type="family">Paxson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One weakness of machine-learned NLP models is that they typically perform poorly on out-of-domain data. In this work, we study the task of identifying products being bought and sold in online cybercrime forums, which exhibits particularly challenging cross-domain effects. We formulate a task that represents a hybrid of slot-filling information extraction and named entity recognition and annotate data from four different forums. Each of these forums constitutes its own “fine-grained domain” in that the forums cover different market sectors with different properties, even though all forums are in the broad domain of cybercrime. We characterize these domain differences in the context of a learning-based system: supervised models see decreased accuracy when applied to new forums, and standard techniques for semi-supervised learning and domain adaptation have limited effectiveness on this data, which suggests the need to improve these techniques. We release a dataset of 1,938 annotated posts from across the four forums.</abstract>
<identifier type="citekey">durrett-etal-2017-identifying</identifier>
<identifier type="doi">10.18653/v1/D17-1275</identifier>
<location>
<url>https://aclanthology.org/D17-1275</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>2598</start>
<end>2607</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Products in Online Cybercrime Marketplaces: A Dataset for Fine-grained Domain Adaptation
%A Durrett, Greg
%A Kummerfeld, Jonathan K.
%A Berg-Kirkpatrick, Taylor
%A Portnoff, Rebecca
%A Afroz, Sadia
%A McCoy, Damon
%A Levchenko, Kirill
%A Paxson, Vern
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F durrett-etal-2017-identifying
%X One weakness of machine-learned NLP models is that they typically perform poorly on out-of-domain data. In this work, we study the task of identifying products being bought and sold in online cybercrime forums, which exhibits particularly challenging cross-domain effects. We formulate a task that represents a hybrid of slot-filling information extraction and named entity recognition and annotate data from four different forums. Each of these forums constitutes its own “fine-grained domain” in that the forums cover different market sectors with different properties, even though all forums are in the broad domain of cybercrime. We characterize these domain differences in the context of a learning-based system: supervised models see decreased accuracy when applied to new forums, and standard techniques for semi-supervised learning and domain adaptation have limited effectiveness on this data, which suggests the need to improve these techniques. We release a dataset of 1,938 annotated posts from across the four forums.
%R 10.18653/v1/D17-1275
%U https://aclanthology.org/D17-1275
%U https://doi.org/10.18653/v1/D17-1275
%P 2598-2607
Markdown (Informal)
[Identifying Products in Online Cybercrime Marketplaces: A Dataset for Fine-grained Domain Adaptation](https://aclanthology.org/D17-1275) (Durrett et al., EMNLP 2017)
ACL
- Greg Durrett, Jonathan K. Kummerfeld, Taylor Berg-Kirkpatrick, Rebecca Portnoff, Sadia Afroz, Damon McCoy, Kirill Levchenko, and Vern Paxson. 2017. Identifying Products in Online Cybercrime Marketplaces: A Dataset for Fine-grained Domain Adaptation. In Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pages 2598–2607, Copenhagen, Denmark. Association for Computational Linguistics.