@misc{ge2025autopresentdesigningstructured,title={AutoPresent: Designing Structured Visuals from Scratch},author={Ge, Jun and Wang, Zhengzhong and Zhou, Xuhui and Peng, Yuhang and Subramanian, Siddharth and Tan, Qian and Sap, Maarten and Suhr, Alane},year={2025},eprint={2501.00912},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.AI},}
2024
arXiv
Bridging the Data Provenance Gap Across Text, Speech and Video
Shayne Longpre, Nikhil Singh, Manuel Cherep, and 40 more authors
@misc{longpre2024bridgingdataprovenancegap,title={Bridging the Data Provenance Gap Across Text, Speech and Video},author={Longpre, Shayne and Singh, Nikhil and Cherep, Manuel and Tiwary, Kushagra and Materzynska, Joanna and Brannon, William and Mahari, Robert and Dey, Manan and Hamdy, Mohammed and Saxena, Nayan and Anis, Ahmad Mustafa and Alghamdi, Emad A. and Chien, Vu Minh and Obeng-Marnu, Naana and Yin, Da and Qian, Kun and Li, Yizhi and Liang, Minnie and Dinh, An and Mohanty, Shrestha and Mataciunas, Deividas and South, Tobin and Zhang, Jianguo and Lee, Ariel N. and Lund, Campbell S. and Klamm, Christopher and Sileo, Damien and Misra, Diganta and Shippole, Enrico and Klyman, Kevin and Miranda, Lester JV and Muennighoff, Niklas and Ye, Seonghyeon and Kim, Seungone and Gupta, Vipul and Sharma, Vivek and Zhou, Xuhui and Xiong, Caiming and Villa, Luis and Biderman, Stella and Pentland, Alex and Hooker, Sara and Kabbara, Jad},year={2024},eprint={2412.17847},archiveprefix={arXiv},primaryclass={cs.AI},}
arXiv
Theagentcompany: benchmarking llm agents on consequential real world tasks
Frank F Xu, Yiwei Song, Bowen Li, and 6 more authors
@misc{xu2024theagentcompanybenchmarkingllm,title={Theagentcompany: benchmarking llm agents on consequential real world tasks},author={Xu, Frank F and Song, Yiwei and Li, Bowen and Tang, Yujia and Jain, Khushi and Bao, Mingyu and Wang, Zhengzhong and Zhou, Xuhui and Guo, Zhiyi},year={2024},eprint={2412.14161},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.AI},}
NeurIPS
Consent in crisis: The rapid decline of the ai data commons
Shayne Longpre, Robert Mahari, Ariel Lee, and 13 more authors
@inproceedings{longpre2024consentcrisisrapiddecline,title={Consent in crisis: The rapid decline of the ai data commons},author={Longpre, Shayne and Mahari, Robert and Lee, Ariel and Lund, Chris and Oderinwale, Hakeem and Brannon, Will and Zhou, Xuhui and Li, Yizhi and Xiong, Caiming and Villa, Luis and Biderman, Stella and Li, Hanlin and Ippolito, Daphne and Hooker, Sara and Kabbara, Jad and Pentland, Sandy},year={2024},booktitle={NeurIPS Datasets and Benchmarks},}
arXiv
Minion: A Technology Probe for Resolving Value Conflicts through Expert-Driven and User-Driven Strategies in AI Companion Applications
Xianzhe Fan, Qing Xiao, Xuhui Zhou, and 4 more authors
@misc{fan2024minionatechnologyprobe,title={Minion: A Technology Probe for Resolving Value Conflicts through Expert-Driven and User-Driven Strategies in AI Companion Applications},author={Fan, Xianzhe and Xiao, Qing and Zhou, Xuhui and Su, Yuran and Lu, Zhicong and Sap, Maarten and Shen, Hong},year={2024},eprint={2411.07042},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.HC},}
arXiv
BIG5-CHAT: Shaping LLM Personalities Through Training on Human-Grounded Data
Wenkai Li, Jiarui Liu, Andy Liu, and 3 more authors
@misc{li2024big5chatshapingllm,title={BIG5-CHAT: Shaping LLM Personalities Through Training on Human-Grounded Data},author={Li, Wenkai and Liu, Jiarui and Liu, Andy and Zhou, Xuhui and Diab, Mona and Sap, Maarten},year={2024},eprint={2410.16491},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.AI},}
arXiv
HAICOSYSTEM: An Ecosystem for Sandboxing Safety Risks in Human-AI Interactions
Xuhui Zhou, Hyunwoo Kim, Faeze Brahman, and 9 more authors
@misc{zhou2024haicosystemecosystemsandboxingsafety,title={HAICOSYSTEM: An Ecosystem for Sandboxing Safety Risks in Human-AI Interactions},author={Zhou, Xuhui and Kim, Hyunwoo and Brahman, Faeze and Jiang, Liwei and Zhu, Hao and Lu, Ximing and Xu, Frank and Lin, Bill Yuchen and Choi, Yejin and Mireshghallah, Niloofar and Bras, Ronan Le and Sap, Maarten},year={2024},eprint={2409.16427},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.AI},}
arXiv
AI-LieDar: Examine the Trade-off Between Utility and Truthfulness in LLM Agents
Zhe Su, Xuhui Zhou, Sanketh Rangreji, and 4 more authors
@misc{su2024ailiedarexaminetradeoffutility,title={AI-LieDar: Examine the Trade-off Between Utility and Truthfulness in LLM Agents},author={Su, Zhe and Zhou, Xuhui and Rangreji, Sanketh and Kabra, Anubha and Mendelsohn, Julia and Brahman, Faeze and Sap, Maarten},year={2024},eprint={2409.09013},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.AI},}
arXiv
User-Driven Value Alignment: Understanding Users’ Perceptions and Strategies for Addressing Biased and Discriminatory Statements in AI Companions
Xianzhe Fan, Qing Xiao, Xuhui Zhou, and 4 more authors
@misc{fan2024userdrivenvaluealignmentunderstanding,title={User-Driven Value Alignment: Understanding Users' Perceptions and Strategies for Addressing Biased and Discriminatory Statements in AI Companions},author={Fan, Xianzhe and Xiao, Qing and Zhou, Xuhui and Pei, Jiaxin and Sap, Maarten and Lu, Zhicong and Shen, Hong},year={2024},eprint={2409.00862},archiveprefix={arXiv},booktitle={arXiv},primaryclass={cs.HC},}
arXiv
On the Resilience of Multi-Agent Systems with Malicious Agents
Jen-tse Huang, Jiaxu Zhou, Tailin Jin, and 6 more authors
@misc{huang2024resiliencemultiagentsystemsmalicious,title={On the Resilience of Multi-Agent Systems with Malicious Agents},author={Huang, Jen-tse and Zhou, Jiaxu and Jin, Tailin and Zhou, Xuhui and Chen, Zixi and Wang, Wenxuan and Yuan, Youliang and Sap, Maarten and Lyu, Michael R.},year={2024},eprint={2408.00989},archiveprefix={arXiv},primaryclass={cs.AI},}
NeurIPS
Consent in Crisis: The Rapid Decline of the AI Data Commons
@inproceedings{zhou2024reallifejustfantasy,title={Is this the real life? Is this just fantasy? The Misleading Success of Simulating Social Interactions With LLMs},author={Zhou, Xuhui and Su, Zhe and Eisape, Tiwalayo and Kim, Hyunwoo and Sap, Maarten},year={2024},eprint={2403.05020},booktitle={EMNLP},primaryclass={cs.CL},}
ICLR
Can LLMs Keep a Secret? Testing Privacy Implications of Language Models via Contextual Integrity Theory
Niloofar Mireshghallah, Hyunwoo Kim, Xuhui Zhou, and 4 more authors
@inproceedings{Niloofar2024CanLLMsKeepASecret,title={Can LLMs Keep a Secret? Testing Privacy Implications of Language Models via Contextual Integrity Theory},author={Mireshghallah, Niloofar and Kim, Hyunwoo and Zhou, Xuhui and Tsvetkov, Yulia and Sap, Maarten and Shokri, Reza and Choi, Yejin},booktitle={ICLR},year={2024},url={https://openreview.net/forum?id=gmg7t8b4s0},award={Spotlight (top 5%)}}
ICLR
SOTOPIA: Interactive Evaluation for Social Intelligence in Language Agents
Xuhui Zhou*, Hao Zhu*, Leena Mathur, and 8 more authors
@inproceedings{zhou2024sotopia,title={SOTOPIA: Interactive Evaluation for Social Intelligence in Language Agents},author={Zhou*, Xuhui and Zhu*, Hao and Mathur, Leena and Zhang, Ruohong and Qi, Zhengyang and Yu, Haofei and Morency, Louis-Philippe and Bisk, Yonatan and Fried, Daniel and Neubig, Graham and Sap, Maarten},booktitle={ICLR},year={2024},url={https://openreview.net/forum?id=mM7VurbA4r},award={Spotlight (top 5%)},}
ICLR
WebArena: A Realistic Web Environment for Building Autonomous Agents
Shuyan Zhou, Frank F. Xu, Hao Zhu, and 8 more authors
@inproceedings{zhou2024arena,title={WebArena: A Realistic Web Environment for Building Autonomous Agents},author={Zhou, Shuyan and Xu, Frank F. and Zhu, Hao and Zhou, Xuhui and Lo, Robert and Sridhar, Abishek and Cheng, Xianyi and Bisk, Yonatan and Fried, Daniel and Alon, Uri and Neubig, Graham},booktitle={ICLR},year={2024},url={https://openreview.net/forum?id=oKn9c6ytLx},demo={https://webarena.dev/#try-it-yourself}}
EACL
Clever Hans or Neural Theory of Mind? Stress Testing Social Reasoning in Large Language Models
Natalie Shapira, Mosh Levy, Hossein Seyed Alavi, and 5 more authors
@inproceedings{shapira2024cleverHans,title={Clever Hans or Neural Theory of Mind? Stress Testing Social Reasoning in Large Language Models},author={Shapira, Natalie and Levy, Mosh and Seyed Alavi, Hossein and Zhou, Xuhui and Choi, Yejin and Goldberg, Yoav and Sap, Maarten and Shwartz, Vered},year={2024},booktitle={EACL},}
2023
ACL
COBRA 🐍 Frames: Contextual Reasoning about Effects and Harms of Offensive Statements
Xuhui Zhou, Hao Zhu, Akhila Yerukola, and 4 more authors
@inproceedings{zhou2023cobra,title={COBRA 🐍 Frames: Contextual Reasoning about Effects and Harms of Offensive Statements},author={Zhou, Xuhui and Zhu, Hao and Yerukola, Akhila and Davidson, Thomas and D. Hwang, Jena and Swayamdipta, Swabha and Sap, Maarten},year={2023},booktitle={Findings of ACL},demo={https://cobra.allen.ai/},}
EMNLP
“Don’t Take This Out of Context!” On the Need for Contextual Models and Evaluations for Stylistic Rewriting
@inproceedings{yerukola2023contextRewrite,title={``Don't Take This Out of Context!'' On the Need for Contextual Models and Evaluations for Stylistic Rewriting},author={Yerukola, Akhila and Zhou, Xuhui and Sap, Maarten},year={2023},booktitle={EMNLP},}
EMNLP
FANToM: A Benchmark for Stress-testing Machine Theory of Mind in Interactions
Hyunwoo Kim, Melanie Sclar, Xuhui Zhou, and 4 more authors
@inproceedings{kim2023fantom,title={FANToM: A Benchmark for Stress-testing Machine Theory of Mind in Interactions},author={Kim, Hyunwoo and Sclar, Melanie and Zhou, Xuhui and Le Bras, Ronan and Kim, Gunhee and Choi, Yejin and Sap, Maarten},year={2023},booktitle={EMNLP},}
EMNLP
Learning to translate by learning to communicate
C. Downey*, Xuhui Zhou*, L. Liu, and 1 more author
@inproceedings{Downey2022LearningTT,title={Learning to translate by learning to communicate},author={Downey*, C. and Zhou*, Xuhui and Liu, L. and Steinert-Threlkeld, Shane},booktitle={EMNLP MRL},year={2023}}
2022
NACCL
Annotators with Attitudes: How Annotator Beliefs And Identities Bias Toxic Language Detection
Maarten Sap, Swabha Swayamdipta, Laura Vianna, and 3 more authors
@inproceedings{sap2022annotatorsWithAttitudes,title={Annotators with Attitudes: How Annotator Beliefs And Identities Bias Toxic Language Detection},author={Sap, Maarten and Swayamdipta, Swabha and Vianna, Laura and Zhou, Xuhui and Choi, Yejin and Smith, Noah A.},year={2022},booktitle={NAACL},journal={NAACL}}
ACL ConvAI
Extracting and Inferring Personal Attributes from Dialogue
Zhilin Wang, Xuhui Zhou, Rik Koncel-Kedziorski, and 2 more authors
@inproceedings{zhilin2022exandinfpersonalatt,title={Extracting and Inferring Personal Attributes from Dialogue},author={Wang, Zhilin and Zhou, Xuhui and Koncel-Kedziorski, Rik and Marin, Alex and Xia, Fei},year={2022},booktitle={ACL ConvAI}}
ICLR EmeCom
Emergent Communication Fine-tuning (EC-FT) for Pretrained Language Models
Shane Steinert-Threlkeld, Xuhui Zhou, Zeyu Liu, and 1 more author
In Emergent Communication Workshop at ICLR 2022, 2022
@inproceedings{steinert-threlkeld2022emergent,title={Emergent Communication Fine-tuning ({EC}-{FT}) for Pretrained Language Models},author={Steinert-Threlkeld, Shane and Zhou, Xuhui and Liu, Zeyu and Downey, C.M.},booktitle={Emergent Communication Workshop at ICLR 2022},award={Runner-up Best Paper},year={2022},url={https://openreview.net/forum?id=SUqrM7WR7W5}}
2021
EACL
Challenges in Automated Debiasing for Toxic Language Detection
Xuhui Zhou, Maarten Sap, Swabha Swayamdipta, and 2 more authors
@inproceedings{zhou-etal-2020-debiasing,title={Challenges in Automated Debiasing for Toxic Language Detection},author={Zhou, Xuhui and Sap, Maarten and Swayamdipta, Swabha and Choi, Yejin and Smith, Noah A.},booktitle={EACL},year={2021},}
2020
BlackboxNLP
Linguistically-Informed Transformations (LIT): A Method for Automatically Generating Contrast Sets
Chuanrong Li, Lin Shengshuo, Zeyu Liu, and 3 more authors
In Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP, Nov 2020
Although large-scale pretrained language models, such as BERT and RoBERTa, have achieved superhuman performance on in-distribution test sets, their performance suffers on out-of-distribution test sets (e.g., on contrast sets). Building contrast sets often requires human-expert annotation, which is expensive and hard to create on a large scale. In this work, we propose a Linguistically-Informed Transformation (LIT) method to automatically generate contrast sets, which enables practitioners to explore linguistic phenomena of interests as well as compose different phenomena. Experimenting with our method on SNLI and MNLI shows that current pretrained language models, although being claimed to contain sufficient linguistic knowledge, struggle on our automatically generated contrast sets. Furthermore, we improve models’ performance on the contrast sets by applying LIT to augment the training data, without affecting performance on the original data.
EMNLP
Multilevel Text Alignment with Cross-Document Attention
Xuhui Zhou, Nikolaos Pappas, and Noah A. Smith
In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), Nov 2020
Text alignment finds application in tasks such as citation recommendation and plagiarism detection. Existing alignment methods operate at a single, predefined level and cannot learn to align texts at, for example, sentence \textitand document levels. We propose a new learning approach that equips previously established hierarchical attention encoders for representing documents with a cross-document attention component, enabling structural comparisons across different levels (document-to-document and sentence-to-document). Our component is weakly supervised from document pairs and can align at multiple levels. Our evaluation on predicting document-to-document relationships and sentence-to-document relationships on the tasks of citation recommendation and plagiarism detection shows that our approach outperforms previously established hierarchical, attention encoders based on recurrent and transformer contextualization that are unaware of structural correspondence between documents.
ACL SRW
RPD: A Distance Function Between Word Embeddings
Xuhui Zhou, Shujian Huang, and Zaixiang Zheng
In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop, Nov 2020
It is well-understood that different algorithms, training processes, and corpora produce different word embeddings. However, less is known about the relation between different embedding spaces, i.e. how far different sets of em-beddings deviate from each other. In this paper, we propose a novel metric called Relative Pairwise Inner Product Distance (RPD) to quantify the distance between different sets of word embeddings. This unitary-invariant metric has a unified scale for comparing different sets of word embeddings. Based on the properties of RPD, we study the relations of word embeddings of different algorithms systematically and investigate the influence of different training processes and corpora. The results shed light on the poorly understood word embeddings and justify RPD as a measure of the distance of embedding space.
AAAI
Evaluating Commonsense in Pre-trained Language Models
Xuhui Zhou, Y. Zhang, Leyang Cui, and 1 more author
In Proceedings of the AAAI Conference on Artificial Intelligence, 34(05), Nov 2020