@inproceedings{wiegand-etal-2019-detection,
title = "{D}etection of {A}busive {L}anguage: the {P}roblem of {B}iased {D}atasets",
author = "Wiegand, Michael and
Ruppenhofer, Josef and
Kleinbauer, Thomas",
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1060",
doi = "10.18653/v1/N19-1060",
pages = "602--608",
abstract = "We discuss the impact of data bias on abusive language detection. We show that classification scores on popular datasets reported in previous work are much lower under realistic settings in which this bias is reduced. Such biases are most notably observed on datasets that are created by focused sampling instead of random sampling. Datasets with a higher proportion of implicit abuse are more affected than datasets with a lower proportion.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wiegand-etal-2019-detection">
<titleInfo>
<title>Detection of Abusive Language: the Problem of Biased Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Wiegand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Ruppenhofer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Kleinbauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christy</namePart>
<namePart type="family">Doran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We discuss the impact of data bias on abusive language detection. We show that classification scores on popular datasets reported in previous work are much lower under realistic settings in which this bias is reduced. Such biases are most notably observed on datasets that are created by focused sampling instead of random sampling. Datasets with a higher proportion of implicit abuse are more affected than datasets with a lower proportion.</abstract>
<identifier type="citekey">wiegand-etal-2019-detection</identifier>
<identifier type="doi">10.18653/v1/N19-1060</identifier>
<location>
<url>https://aclanthology.org/N19-1060</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>602</start>
<end>608</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detection of Abusive Language: the Problem of Biased Datasets
%A Wiegand, Michael
%A Ruppenhofer, Josef
%A Kleinbauer, Thomas
%Y Burstein, Jill
%Y Doran, Christy
%Y Solorio, Thamar
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F wiegand-etal-2019-detection
%X We discuss the impact of data bias on abusive language detection. We show that classification scores on popular datasets reported in previous work are much lower under realistic settings in which this bias is reduced. Such biases are most notably observed on datasets that are created by focused sampling instead of random sampling. Datasets with a higher proportion of implicit abuse are more affected than datasets with a lower proportion.
%R 10.18653/v1/N19-1060
%U https://aclanthology.org/N19-1060
%U https://doi.org/10.18653/v1/N19-1060
%P 602-608
Markdown (Informal)
[Detection of Abusive Language: the Problem of Biased Datasets](https://aclanthology.org/N19-1060) (Wiegand et al., NAACL 2019)
ACL
- Michael Wiegand, Josef Ruppenhofer, and Thomas Kleinbauer. 2019. Detection of Abusive Language: the Problem of Biased Datasets. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 602–608, Minneapolis, Minnesota. Association for Computational Linguistics.