@inproceedings{chen-etal-2021-detect,
title = "Detect Profane Language in Streaming Services to Protect Young Audiences",
author = "Chen, Jingxiang and
Wei, Kai and
Hao, Xiang",
editor = "Malmasi, Shervin and
Kallumadi, Surya and
Ueffing, Nicola and
Rokhlenko, Oleg and
Agichtein, Eugene and
Guy, Ido",
booktitle = "Proceedings of the 4th Workshop on e-Commerce and NLP",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.ecnlp-1.15",
doi = "10.18653/v1/2021.ecnlp-1.15",
pages = "123--131",
abstract = "With the rapid growth of online video streaming, recent years have seen increasing concerns about profane language in their content. Detecting profane language in streaming services is challenging due to the long sentences appeared in a video. While recent research on handling long sentences has focused on developing deep learning modeling techniques, little work has focused on techniques on improving data pipelines. In this work, we develop a data collection pipeline to address long sequence of texts and integrate this pipeline with a multi-head self-attention model. With this pipeline, our experiments show the self-attention model offers 12.5{\%} relative accuracy improvement over state-of-the-art distilBERT model on profane language detection while requiring only 3{\%} of parameters. This research designs a better system for informing users of profane language in video streaming services.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2021-detect">
<titleInfo>
<title>Detect Profane Language in Streaming Services to Protect Young Audiences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingxiang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Hao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on e-Commerce and NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surya</namePart>
<namePart type="family">Kallumadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Ueffing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Rokhlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugene</namePart>
<namePart type="family">Agichtein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Guy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the rapid growth of online video streaming, recent years have seen increasing concerns about profane language in their content. Detecting profane language in streaming services is challenging due to the long sentences appeared in a video. While recent research on handling long sentences has focused on developing deep learning modeling techniques, little work has focused on techniques on improving data pipelines. In this work, we develop a data collection pipeline to address long sequence of texts and integrate this pipeline with a multi-head self-attention model. With this pipeline, our experiments show the self-attention model offers 12.5% relative accuracy improvement over state-of-the-art distilBERT model on profane language detection while requiring only 3% of parameters. This research designs a better system for informing users of profane language in video streaming services.</abstract>
<identifier type="citekey">chen-etal-2021-detect</identifier>
<identifier type="doi">10.18653/v1/2021.ecnlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2021.ecnlp-1.15</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>123</start>
<end>131</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detect Profane Language in Streaming Services to Protect Young Audiences
%A Chen, Jingxiang
%A Wei, Kai
%A Hao, Xiang
%Y Malmasi, Shervin
%Y Kallumadi, Surya
%Y Ueffing, Nicola
%Y Rokhlenko, Oleg
%Y Agichtein, Eugene
%Y Guy, Ido
%S Proceedings of the 4th Workshop on e-Commerce and NLP
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F chen-etal-2021-detect
%X With the rapid growth of online video streaming, recent years have seen increasing concerns about profane language in their content. Detecting profane language in streaming services is challenging due to the long sentences appeared in a video. While recent research on handling long sentences has focused on developing deep learning modeling techniques, little work has focused on techniques on improving data pipelines. In this work, we develop a data collection pipeline to address long sequence of texts and integrate this pipeline with a multi-head self-attention model. With this pipeline, our experiments show the self-attention model offers 12.5% relative accuracy improvement over state-of-the-art distilBERT model on profane language detection while requiring only 3% of parameters. This research designs a better system for informing users of profane language in video streaming services.
%R 10.18653/v1/2021.ecnlp-1.15
%U https://aclanthology.org/2021.ecnlp-1.15
%U https://doi.org/10.18653/v1/2021.ecnlp-1.15
%P 123-131
Markdown (Informal)
[Detect Profane Language in Streaming Services to Protect Young Audiences](https://aclanthology.org/2021.ecnlp-1.15) (Chen et al., ECNLP 2021)
ACL