Modelling the Information Landscape (IL) for Assessing and Analyzing Domain-Specific and Generic Critical Online Reasoning
Project B05 investigates how linguistic features function as cues in the online information landscape (IL) and how they relate to students’ performance in critical online reasoning (COR) tasks. Although previous research has shown that linguistic cues influence text readability, source credibility, and performance in domain-specific knowledge tests in offline contexts, their applicability to online environments remains insufficiently examined. B05 addresses this gap by modeling the linguistic characteristics of texts that students engage with while solving COR tasks.
The main objective is to develop a theoretically grounded model of linguistic features that predicts COR processes and performance. The study analyzes differences in linguistic predictors between generic and domain-specific COR tasks across four domains (economics, medicine, social sciences, and physics) and across three cognitive facets of COR: online information acquisition, critical information evaluation, and reasoning through evidence, argumentation, and synthesis. It further examines the levels at which these features operate, ranging from individual texts to domains, genres, and the IL as a whole.
Methodologically, B05 integrates qualitative and quantitative approaches. Linguistic features related to evidentiality, information sources, and text organization are first identified qualitatively, then operationalized quantitatively, expanded using machine learning, and evaluated for predictive validity. This integration follows a computational hermeneutic approach in which quantitative modeling is grounded in and interpretable through prior linguistic analysis.
The project yields machine learning models that enable automated analysis of fine-grained linguistic features across multiple texts within the IL. Within the CORE research unit, B05 contributes detailed linguistic data that complement analyses of text, performance, media and content characteristics, narrative structures, and multimodal data in related projects.
Team TTLab
- Principal Investigator: Prof. Dr. Alexander Mehler
- Maxim Konca
Team JGU
- Principal Investigator: Prof. Dr. Walter Bisang
- Patryk Czerwinski
Publications
BibTeX
@inproceedings{Schaaf:et:al:2026,
title = {{GhostWriter}: Hidden {AI}-Generated Texts Over Multiple Languages,
Domains and Generators},
booktitle = {Proceedings of the 15th International Conference on Language Resources
and Evaluation (LREC 2026)},
year = {2026},
author = {Schaaf, Manuel and Bönisch, Kevin and Mehler, Alexander},
keywords = {Corpus, Natural Language Generation; Validation of LRs, AI-generated Text Detection, core, core_b05},
abstract = {The advent of Transformer-based Large Language Models (LLMs) has
led to an unprecedented surge of AI-generated text (AIGT) across
online platforms and academic domains. While these models exhibit
near-human fluency and stylistic coherence, their widespread adoption
has raised concerns about authorship integrity, research quality,
and the recursive contamination of training corpora with synthetic
data. These developments underscore the need for reliable AIGT
detection methods and benchmark datasets, particularly for malicious
or deceptive ghostwriting scenarios where AIGT is intentionally
crafted to evade detection. To address this, we present GhostWriter,
a large-scale, bilingual (German and English), multi-generator,
and multi-domain dataset for AIGT detection. The dataset comprises
human- and AI-authored texts produced under domain-specific ghostwriting
conditions, including examples intentionally embedded within otherwise
human-written texts to obscure their AI origin. With GhostWriter,
we (i) aim to expand the resources available for German AIGT datasets,
(ii) emphasize mixed or fused synthesizations---since most existing
corpora are limited to the document level---and (iii) introduce
specifically crafted malicious ghostwriting scenarios across multiple
domains and generators.},
note = {accepted}
}
BibTeX
@inproceedings{Bisang:Mehler:2026,
title = {Linguistic Features as Predictors of Students' Performance in
Domain-Specific Critical Online Reasoning Tasks},
author = {Bisang, Walter and Mehler, Alexander},
booktitle = {International Test Commission Conference (ITC) 2026},
eventdate = {2026-06-30/2026-07-03},
location = {Auckland, New Zealand},
note = {accepted},
year = {2026},
keywords = {core,core_b05}
}
BibTeX
@article{Borkowski:et:al:2026,
title = {{DUUIgateway}: A Web Service for Platform-independent, Ubiquitous Big Data NLP},
journal = {SoftwareX},
volume = {34},
pages = {102549},
year = {2026},
issn = {2352-7110},
doi = {https://doi.org/10.1016/j.softx.2026.102549},
url = {https://www.sciencedirect.com/science/article/pii/S2352711026000439},
author = {Borkowski, Cedric and Abrami, Giuseppe and Terefe, Dawit and Baumartz, Daniel
and Mehler, Alexander},
keywords = {duui, neglab, core, core_b05, core_c08, new-data-spaces, circlet},
abstract = {Distributed processing of unstructured text data is a challenge
in the rapidly changing and evolving natural language processing
(NLP) landscape. This landscape is characterized by heterogeneous
systems, models, and formats, and especially by the increasing
influence of AI systems. While many of these systems handle text
data, there are also unified systems that process multiple input
and output formats, while allowing for distributed corpus processing.
However, there are hardly any user-friendly interfaces that allow
existing NLP frameworks to be used flexibly and extended in a
user-controlled manner. Due to this gap and the increasing importance
of NLP for various scientific disciplines, there has been a demand
for a web and API based flexible software solution for deploying,
managing and monitoring NLP systems. Such a solution is provided
by Docker Unified UIMA-gateway. We introduce DUUIgateway and evaluate
its API and user-driven approach to encapsulation. We also describe
how these features improve the usability and accessibility of
the NLP framework DUUI. We illustrate DUUIgateway in the field
of process modeling in higher education and show how it closes
the latter gap in NLP by making a variety of systems for processing
text and multimodal data accessible to non-experts.}
}
BibTeX
@article{Mehler:et:al:2026:a,
title = {Linguistic Features of Student Responses as Indicators of Performance
in Critical Online Reasoning Tasks},
author = {Alexander Mehler and Walter Bisang and Maxim Konca and Patryik Czerwinski
and Jeremias Josef Graf and Jana Fritsch},
journal = {Zeitschrift für Erziehungswissenschaft},
note = {accepted},
year = {2026},
publisher = {Springer},
keywords = {core,core_b05}
}
BibTeX
@inproceedings{Boenisch:et:al:2025,
title = {Towards Unified, Dynamic and Annotation-based Visualisations and
Exploration of Annotated Big Data Corpora with the Help of Unified
Corpus Explorer},
author = {B{\"o}nisch, Kevin and Abrami, Giuseppe and Mehler, Alexander},
editor = {Dziri, Nouha and Ren, Sean (Xiang) and Diao, Shizhe},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human
Language Technologies (System Demonstrations)},
year = {2025},
address = {Albuquerque, New Mexico},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.naacl-demo.42/},
pages = {522--534},
isbn = {979-8-89176-191-9},
abstract = {The annotation and exploration of large text corpora, both automatic
and manual, presents significant challenges across multiple disciplines,
including linguistics, digital humanities, biology, and legal
science. These challenges are exacerbated by the heterogeneity
of processing methods, which complicates corpus visualization,
interaction, and integration. To address these issues, we introduce
the Unified Corpus Explorer (UCE), a standardized, dockerized,
open-source and dynamic Natural Language Processing (NLP) application
designed for flexible and scalable corpus navigation. Herein,
UCE utilizes the UIMA format for NLP annotations as a standardized
input, constructing interfaces and features around those annotations
while dynamically adapting to the corpora and their extracted
annotations. We evaluate UCE based on a user study and demonstrate
its versatility as a corpus explorer based on generative AI.},
note = {Best Demo Award},
pdf = {https://aclanthology.org/2025.naacl-demo.42.pdf},
keywords = {uce,new-data-spaces,circlet,core,core_c08}
}
BibTeX
@article{Abrami:et:al:2025:a,
title = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
journal = {SoftwareX},
volume = {29},
pages = {102033},
year = {2025},
issn = {2352-7110},
doi = {https://doi.org/10.1016/j.softx.2024.102033},
url = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
author = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
and Alexander Mehler},
keywords = {Docker, Kubernetes, UIMA, Distributed NLP, duui, biofid, neglab, new-data-spaces, circlet, core, core_c08},
abstract = {Processing large amounts of natural language text using machine
learning-based models is becoming important in many disciplines.
This demand is being met by a variety of approaches, resulting
in the heterogeneous deployment of separate, partly incompatible,
not natively scalable applications. To overcome the technological
bottleneck involved, we have developed Docker Unified UIMA Interface,
a system for the standardized, parallel, platform-independent,
distributed and microservices-based solution for processing large
and extensive text corpora with any NLP method. We present DUUI
as a framework that enables automated orchestration of GPU-based
NLP processes beyond the existing Docker Swarm cluster variant,
and in addition to the adaptation to new runtime environments
such as Kubernetes. Therefore, a new driver for DUUI is introduced,
which enables the lightweight orchestration of DUUI processes
within a Kubernetes environment in a scalable setup. In this way,
the paper opens up novel text-technological perspectives for existing
practices in disciplines that deal with the scientific analysis
of large amounts of data based on NLP.}
}
BibTeX
@inproceedings{Abrami:et:al:2025:b,
author = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
title = {DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing},
year = {2025},
booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities},
numpages = {3},
location = {Bielefeld, Germany},
series = {DHd 2025},
publisher = {Zenodo},
keywords = {duui,core,core_c08},
pages = {446--448},
doi = {10.5281/zenodo.14943128},
url = {https://doi.org/10.5281/zenodo.14943128},
poster = {https://zenodo.org/records/14944575}
}
BibTeX
@inbook{Mehler:et:al:2024:a,
author = {Mehler, Alexander and Bagci, Mevl{\"u}t and Schrottenbacher, Patrick
and Henlein, Alexander and Konca, Maxim and Abrami, Giuseppe and B{\"o}nisch, Kevin
and Stoeckel, Manuel and Spiekermann, Christian and Engel, Juliane},
editor = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
and Mehler, Alexander},
title = {Towards New Data Spaces for the Study of Multiple Documents with
Va.Si.Li-Lab: A Conceptual Analysis},
booktitle = {Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains},
year = {2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {259--303},
abstract = {The constitution of multiple documents has so far been studied
essentially as a process in which a single learner consults a
number (of segments) of different documents in the context of
the task at hand in order to construct a mental model for the
purpose of completing the task. As a result of this research focus,
the constitution of multiple documents appears predominantly as
a monomodal, non-interactive process in which mainly textual units
are studied, supplemented by images, text-image relations and
comparable artifacts. This approach is reflected in the contextual
fixity of the research design, in which the learners under study
search for information using suitably equipped computers. If,
on the other hand, we consider the openness of multi-agent learning
situations, this scenario lacks the aspects of interactivity,
contextual openness and, above all, the multimodality of information
objects, information processing and information exchange. This
is where the chapter comes in. It describes Va.Si.Li-Lab as an
instrument for multimodal measurement for studying and modeling
multiple documents in the context of interactive learning in a
multi-agent environment. To this end, the chapter places Va.Si.Li-Lab
in the spectrum of evolutionary approaches that vary the combination
of human and machine innovation and selection. It also combines
the requirements of multimodal representational learning with
various aspects of contextual plasticity to prepare Va.Si.Li-Lab
as a system that can be used for experimental research. The chapter
is conceptual in nature, designing a system of requirements using
the example of Va.Si.Li-Lab to outline an experimental environment
in which the study of Critical Online Reasoning (COR) as a group
process becomes possible. Although the chapter illustrates some
of these requirements with realistic data from the field of simulation-based
learning, the focus is still conceptual rather than experimental,
hypothesis-driven. That is, the chapter is concerned with the
design of a technology for future research into COR processes.},
isbn = {978-3-031-69510-0},
doi = {10.1007/978-3-031-69510-0_12},
url = {https://doi.org/10.1007/978-3-031-69510-0_12},
keywords = {core, core_c08}
}
BibTeX
@inproceedings{Baumartz:et:al:2024,
author = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
and Braunheim, Dominik},
title = {Measuring Group Creativity of Dialogic Interaction Systems by
Means of Remote Entailment Analysis},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3675140},
doi = {10.1145/3648188.3675140},
abstract = {We present a procedure for assessing group creativity that allows
us to compare the contributions of human interlocutors and chatbots
based on generative AI such as ChatGPT. We focus on everyday creativity
in terms of dialogic communication and test four hypotheses about
the difference between human and artificial communication. Our
procedure is based on a test that requires interlocutors to cooperatively
interpret a sequence of sentences for which we control for coherence
gaps with reference to the notion of entailment. Using NLP methods,
we automatically evaluate the spoken or written contributions
of interlocutors (human or otherwise). The paper develops a routine
for automatic transcription based on Whisper, for sampling texts
based on their entailment relations, for analyzing dialogic contributions
along their semantic embeddings, and for classifying interlocutors
and interaction systems based on them. In this way, we highlight
differences between human and artificial conversations under conditions
that approximate free dialogic communication. We show that despite
their obvious classificatory differences, it is difficult to see
clear differences even in the domain of dialogic communication
given the current instruments of NLP.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {153–-166},
numpages = {14},
keywords = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP, core, core_b05, core_c08},
location = {Poznan, Poland},
series = {HT '24}
}
BibTeX
@inproceedings{Abrami:Mehler:2024,
author = {Abrami, Giuseppe and Mehler, Alexander},
title = {Efficient, uniform and scalable parallel NLP pre-processing with
DUUI: Perspectives and Best Practice for the Digital Humanities},
year = {2024},
month = {08},
editor = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
location = {Washington, DC, USA},
series = {DH},
keywords = {duui, core, core_c08},
publisher = {Zenodo},
doi = {10.5281/zenodo.13761079},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Poster.pdf},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Abstract.pdf},
url = {https://doi.org/10.5281/zenodo.13761079},
pages = {15--18},
numpages = {4}
}
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui, core, core_c08}
}
