@inproceedings{kondragunta-etal-2023-skam,
title = "{SKAM} at {S}em{E}val-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification",
author = "Kondragunta, Murali Manohar and
Chen, Amber and
Slot, Karlo and
Weering, Sanne and
Caselli, Tommaso",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.250/",
doi = "10.18653/v1/2023.semeval-1.250",
pages = "1805--1817",
abstract = "Sexism has been prevalent online. In this paper, we explored the effect of explicit linguistic features and continuous pretraining on the performance of pretrained language models in sexism detection. While adding linguistic features did not improve the performance of the model, continuous pretraining did slightly boost the performance of the model in Task B from a mean macro-F1 score of 0.6156 to 0.6246. The best mean macro-F1 score in Task A was achieved by a finetuned HateBERT model using regular pretraining (0.8331). We observed that the linguistic features did not improve the model`s performance. At the same time, continuous pretraining proved beneficial only for nuanced downstream tasks like Task-B."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kondragunta-etal-2023-skam">
<titleInfo>
<title>SKAM at SemEval-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Murali</namePart>
<namePart type="given">Manohar</namePart>
<namePart type="family">Kondragunta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amber</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karlo</namePart>
<namePart type="family">Slot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanne</namePart>
<namePart type="family">Weering</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Caselli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sexism has been prevalent online. In this paper, we explored the effect of explicit linguistic features and continuous pretraining on the performance of pretrained language models in sexism detection. While adding linguistic features did not improve the performance of the model, continuous pretraining did slightly boost the performance of the model in Task B from a mean macro-F1 score of 0.6156 to 0.6246. The best mean macro-F1 score in Task A was achieved by a finetuned HateBERT model using regular pretraining (0.8331). We observed that the linguistic features did not improve the model‘s performance. At the same time, continuous pretraining proved beneficial only for nuanced downstream tasks like Task-B.</abstract>
<identifier type="citekey">kondragunta-etal-2023-skam</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.250</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.250/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1805</start>
<end>1817</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SKAM at SemEval-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification
%A Kondragunta, Murali Manohar
%A Chen, Amber
%A Slot, Karlo
%A Weering, Sanne
%A Caselli, Tommaso
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F kondragunta-etal-2023-skam
%X Sexism has been prevalent online. In this paper, we explored the effect of explicit linguistic features and continuous pretraining on the performance of pretrained language models in sexism detection. While adding linguistic features did not improve the performance of the model, continuous pretraining did slightly boost the performance of the model in Task B from a mean macro-F1 score of 0.6156 to 0.6246. The best mean macro-F1 score in Task A was achieved by a finetuned HateBERT model using regular pretraining (0.8331). We observed that the linguistic features did not improve the model‘s performance. At the same time, continuous pretraining proved beneficial only for nuanced downstream tasks like Task-B.
%R 10.18653/v1/2023.semeval-1.250
%U https://aclanthology.org/2023.semeval-1.250/
%U https://doi.org/10.18653/v1/2023.semeval-1.250
%P 1805-1817
Markdown (Informal)
[SKAM at SemEval-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification](https://aclanthology.org/2023.semeval-1.250/) (Kondragunta et al., SemEval 2023)
- SKAM at SemEval-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification (Kondragunta et al., SemEval 2023)
ACL
- Murali Manohar Kondragunta, Amber Chen, Karlo Slot, Sanne Weering, and Tommaso Caselli. 2023. SKAM at SemEval-2023 Task 10: Linguistic Feature Integration and Continuous Pretraining for Online Sexism Detection and Classification. In Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023), pages 1805–1817, Toronto, Canada. Association for Computational Linguistics.