@inproceedings{moon-etal-2024-virtual,
title = "Virtual Personas for Language Models via an Anthology of Backstories",
author = "Moon, Suhong and
Abdulhai, Marwa and
Kang, Minwoo and
Suh, Joseph and
Soedarmadji, Widyadewi and
Behar, Eran Kohen and
Chan, David",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1110/",
doi = "10.18653/v1/2024.emnlp-main.1110",
pages = "19864--19897",
abstract = "Large language models (LLMs) are trained from vast repositories of text authored by millions of distinct authors, reflecting an enormous diversity of human traits. While these models bear the potential to be used as approximations of human subjects in behavioral studies, prior efforts have been limited in steering model responses to match individual human users. In this work, we introduce Anthology, a method for conditioning LLMs to particular virtual personas by harnessing open-ended life narratives, which we refer to as backstories. We show that our methodology enhances the consistency and reliability of experimental outcomes while ensuring better representation of diverse sub-populations. Across three nationally representative human surveys conducted as part of Pew Research Center's American Trends Panel (ATP), we demonstrate that Anthology achieves up to 18\% improvement in matching the response distributions of human respondents and 27\% improvement in consistency metrics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moon-etal-2024-virtual">
<titleInfo>
<title>Virtual Personas for Language Models via an Anthology of Backstories</title>
</titleInfo>
<name type="personal">
<namePart type="given">Suhong</namePart>
<namePart type="family">Moon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marwa</namePart>
<namePart type="family">Abdulhai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minwoo</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Suh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Widyadewi</namePart>
<namePart type="family">Soedarmadji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eran</namePart>
<namePart type="given">Kohen</namePart>
<namePart type="family">Behar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) are trained from vast repositories of text authored by millions of distinct authors, reflecting an enormous diversity of human traits. While these models bear the potential to be used as approximations of human subjects in behavioral studies, prior efforts have been limited in steering model responses to match individual human users. In this work, we introduce Anthology, a method for conditioning LLMs to particular virtual personas by harnessing open-ended life narratives, which we refer to as backstories. We show that our methodology enhances the consistency and reliability of experimental outcomes while ensuring better representation of diverse sub-populations. Across three nationally representative human surveys conducted as part of Pew Research Center’s American Trends Panel (ATP), we demonstrate that Anthology achieves up to 18% improvement in matching the response distributions of human respondents and 27% improvement in consistency metrics.</abstract>
<identifier type="citekey">moon-etal-2024-virtual</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1110</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1110/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>19864</start>
<end>19897</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Virtual Personas for Language Models via an Anthology of Backstories
%A Moon, Suhong
%A Abdulhai, Marwa
%A Kang, Minwoo
%A Suh, Joseph
%A Soedarmadji, Widyadewi
%A Behar, Eran Kohen
%A Chan, David
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F moon-etal-2024-virtual
%X Large language models (LLMs) are trained from vast repositories of text authored by millions of distinct authors, reflecting an enormous diversity of human traits. While these models bear the potential to be used as approximations of human subjects in behavioral studies, prior efforts have been limited in steering model responses to match individual human users. In this work, we introduce Anthology, a method for conditioning LLMs to particular virtual personas by harnessing open-ended life narratives, which we refer to as backstories. We show that our methodology enhances the consistency and reliability of experimental outcomes while ensuring better representation of diverse sub-populations. Across three nationally representative human surveys conducted as part of Pew Research Center’s American Trends Panel (ATP), we demonstrate that Anthology achieves up to 18% improvement in matching the response distributions of human respondents and 27% improvement in consistency metrics.
%R 10.18653/v1/2024.emnlp-main.1110
%U https://aclanthology.org/2024.emnlp-main.1110/
%U https://doi.org/10.18653/v1/2024.emnlp-main.1110
%P 19864-19897
Markdown (Informal)
[Virtual Personas for Language Models via an Anthology of Backstories](https://aclanthology.org/2024.emnlp-main.1110/) (Moon et al., EMNLP 2024)
- Virtual Personas for Language Models via an Anthology of Backstories (Moon et al., EMNLP 2024)
ACL
- Suhong Moon, Marwa Abdulhai, Minwoo Kang, Joseph Suh, Widyadewi Soedarmadji, Eran Kohen Behar, and David Chan. 2024. Virtual Personas for Language Models via an Anthology of Backstories. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 19864–19897, Miami, Florida, USA. Association for Computational Linguistics.