@inproceedings{5d5619a773b04f2f8514c6868cfb0ac1,
title = "{"}When AI Writes Personas{"}: Analyzing Lexical Diversity in LLM-Generated Persona Descriptions",
abstract = "Large language models (LLMs) are increasingly employed in generating user personas representing various groups of people. It is vital that these personas do not contain major sources of bias for stakeholders using the personas. To investigate linguistic bias in LLM-generated personas, we apply eleven lexical diversity metrics to analyze the association between linguistic diversity in 600 persona descriptions generated using five LLMs (GPT, Claude, Gemini, DeepSeek, Llama) and demographic attributes (age, gender, country) of the personas. We find that LLM-generated persona descriptions are lexically diverse independently of the personas{\textquoteright} demographic attributes. While we find no significant demographic bias in the persona profiles, we do find significant differences between the lexical diversity of persona descriptions generated by the LLMs. The persona descriptions generated by Gemini 1.5 Pro have the highest lexical diversity. The results imply that current LLMs can generate lexically diverse persona descriptions, but the selection of an LLM for specific applications is an important decision.",
keywords = "Ai, Evaluation, LLMs, Lexical diversity, User personas",
author = "Sankalp Sethi and Joni Salminen and Danial Amin and Jansen, \{Bernard J.\}",
note = "Publisher Copyright: {\textcopyright} 2025 Copyright held by the owner/author(s).; 2025 CHI Conference on Human Factors in Computing Systems, CHI EA 2025 ; Conference date: 26-04-2025 Through 01-05-2025",
year = "2025",
month = apr,
day = "26",
doi = "10.1145/3706599.3719712",
language = "English",
series = "Conference on Human Factors in Computing Systems - Proceedings",
publisher = "Association for Computing Machinery",
booktitle = "Extended Abstracts Of The 2025 Chi Conference On Human Factors In Computing Systems, Chi 2025",
address = "United States",
}