@inproceedings{Luecking:Voll:Rott:Henlein:Mehler:2025-fraga,
title = {Head and Hand Movements During Turn Transitions: Data-Based Multimodal
Analysis Using the {Frankfurt VR Gesture--Speech Alignment Corpus}
({FraGA})},
author = {Lücking, Andy and Voll, Felix and Rott, Daniel and Henlein, Alexander
and Mehler, Alexander},
year = {2025},
booktitle = {Proceedings of the 29th Workshop on The Semantics and Pragmatics
of Dialogue -- Full Papers},
series = {SemDial'25 -- Bialogue},
publisher = {SEMDIAL},
url = {http://semdial.org/anthology/Z25-Luecking_semdial_3316.pdf},
pages = {146--156},
keywords = {gemdis}
}
@inproceedings{Abrami:et:al:2025:c,
author = {Abrami, Giuseppe and Bundan, Daniel and Manolis, Chrisowaladis
and Mehler, Alexander},
title = {VR-ParlExplorer: A Hypertext System for the Collaborative Interaction
in Parliamentary Debate Spaces},
year = {2025},
isbn = {9798400715341},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3720553.3746672},
doi = {10.1145/3720553.3746672},
abstract = {The enhanced visualization and interaction with information in
collaborative VR environments enabled by chatbots is currently
rather limited. To fill this gap and create a concrete application
that combines spatial and virtual concepts of hypertext systems
based on the use of LLMs, we present VR-ParlExplorer as a system
for virtualizing plenary debates that allows users to interact
with virtual members of parliament through chatbots. VR-ParlExplorer
is implemented as a Plugin for Va.Si.Li-Lab to enable immersion
in the dynamics of communication in parliamentary debates. The
paper describes the functionality of VR-ParlExplorer and discusses
specifics of the use case it addresses.},
booktitle = {Proceedings of the 36th ACM Conference on Hypertext and Social Media},
pages = {177--183},
numpages = {7},
location = {Chicago, USA},
series = {HT '25},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3720553.3746672}
}
KONVENS 2025 (21th Conference on Natural Language Processing)
@inproceedings{Bundan:Abrami:Mehler:2025,
author = {Bundan, Daniel and Abrami, Giuseppe and Mehler, Alexander},
title = {Multimodal Docker Unified {UIMA} Interface: New Horizons for Distributed
Microservice-Oriented Processing of Corpora using {UIMA}},
booktitle = {Proceedings of the 21st Conference on Natural Language Processing
(KONVENS 2025): Long and Short Papers},
year = {2025},
editor = {Wartena, Christian and Heid, Ulrich},
location = {Hildesheim, Germany},
address = {Hannover, Germany},
publisher = {HsH Applied Academics},
pages = {257--268},
series = {KONVENS '25},
url = {https://aclanthology.org/2025.konvens-1.22/},
pdf = {https://aclanthology.org/2025.konvens-1.22.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2025/09/Poster_Multimodal_DUUI_KONVENS_2025.pdf},
keywords = {duui,neglab,new-data-spaces,circlet}
}
Our paper, “Filling the Temporal Void: Recovering Missing Publication Years in the Project Gutenberg Corpus Using LLMs“, has been accepted to the Findings of the 63rd Annual Meeting of the Association for Computational Linguistics (ACL 2025).
@inproceedings{Momen:Schaaf:Mehler:2025,
title = {Filling the Temporal Void: Recovering Missing Publication Years
in the Project Gutenberg Corpus Using {LLM}s},
author = {Momen, Omar and Schaaf, Manuel and Mehler, Alexander},
editor = {Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},
booktitle = {Findings of the Association for Computational Linguistics: ACL 2025},
month = {jul},
year = {2025},
address = {Vienna, Austria},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.findings-acl.890/},
pages = {17318--17334},
isbn = {979-8-89176-256-5},
abstract = {Analysing texts spanning long periods of time is critical for
researchers in historical linguistics and related disciplines.
However, publicly available corpora suitable for such analyses
are scarce. The Project Gutenberg (PG) corpus presents a significant
yet underutilized opportunity in this context, due to the absence
of accurate temporal metadata. We take advantage of language models
and information retrieval to explore four sources of information
{--} Open Web, Wikipedia, Open Library API, and PG books texts
{--} to add missing temporal metadata to the PG corpus. Through
20 experiments employing state-of-the-art Large Language Models
(LLMs) and Retrieval-Augmented Generation (RAG) methods, we estimate
the production years of all PG books. We curate an enriched metadata
repository for the PG corpus and propose a refined version for
it, which includes 53,774 books with a total of 3.8 billion tokens
in 11 languages, produced between 1600 and 2000. This work provides
a new resource for computational linguistics and humanities studies
focusing on diachronic analyses. The final dataset and all experiments
data are publicly available (https://github.com/OmarMomen14/pg-dates).},
pdf = {https://aclanthology.org/2025.findings-acl.890.pdf}
}
We are delighted that our paper “Towards Unified, Dynamic, and Annotation-based Visualizations and Exploration of Annotated Big Data Corpora with the Help of Unified Corpus Explorer” has been awarded the Best Demo Paper at this year’s annual conference of the Nations of the Americas Chapter of the Association for Computational Linguistics (NAACL 2025).
@inproceedings{Boenisch:et:al:2025,
title = {Towards Unified, Dynamic and Annotation-based Visualisations and
Exploration of Annotated Big Data Corpora with the Help of Unified
Corpus Explorer},
author = {B{\"o}nisch, Kevin and Abrami, Giuseppe and Mehler, Alexander},
editor = {Dziri, Nouha and Ren, Sean (Xiang) and Diao, Shizhe},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human
Language Technologies (System Demonstrations)},
year = {2025},
address = {Albuquerque, New Mexico},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.naacl-demo.42/},
pages = {522--534},
isbn = {979-8-89176-191-9},
abstract = {The annotation and exploration of large text corpora, both automatic
and manual, presents significant challenges across multiple disciplines,
including linguistics, digital humanities, biology, and legal
science. These challenges are exacerbated by the heterogeneity
of processing methods, which complicates corpus visualization,
interaction, and integration. To address these issues, we introduce
the Unified Corpus Explorer (UCE), a standardized, dockerized,
open-source and dynamic Natural Language Processing (NLP) application
designed for flexible and scalable corpus navigation. Herein,
UCE utilizes the UIMA format for NLP annotations as a standardized
input, constructing interfaces and features around those annotations
while dynamically adapting to the corpora and their extracted
annotations. We evaluate UCE based on a user study and demonstrate
its versatility as a corpus explorer based on generative AI.},
note = {Best Demo Award},
pdf = {https://aclanthology.org/2025.naacl-demo.42.pdf},
keywords = {uce,new-data-spaces,circlet,core,core_c08}
}