PhD Student
Goethe-Universität Frankfurt am Main
Robert-Mayer-Straße 10
Room 401c
D-60325 Frankfurt am Main
D-60054 Frankfurt am Main (use for package delivery)
Postfach / P.O. Box: 154
Phone:
Mail:
Office Hour: TBA
Publications
2025
2025.
DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing. Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities.
accepted.
BibTeX
@inproceedings{Abrami:et:al:2025,
author = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
title = {DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing},
year = {2025},
booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities},
numpages = {3},
location = {Bielefeld, Germany},
series = {DHd 2025},
keywords = {duui},
note = {accepted}
}
2024
2024.
Measuring Group Creativity of Dialogic Interaction Systems by
Means of Remote Entailment Analysis. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 153––166.
BibTeX
@inproceedings{Baumartz:et:al:2024,
author = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
and Braunheim, Dominik},
title = {Measuring Group Creativity of Dialogic Interaction Systems by
Means of Remote Entailment Analysis},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3675140},
doi = {10.1145/3648188.3675140},
abstract = {We present a procedure for assessing group creativity that allows
us to compare the contributions of human interlocutors and chatbots
based on generative AI such as ChatGPT. We focus on everyday creativity
in terms of dialogic communication and test four hypotheses about
the difference between human and artificial communication. Our
procedure is based on a test that requires interlocutors to cooperatively
interpret a sequence of sentences for which we control for coherence
gaps with reference to the notion of entailment. Using NLP methods,
we automatically evaluate the spoken or written contributions
of interlocutors (human or otherwise). The paper develops a routine
for automatic transcription based on Whisper, for sampling texts
based on their entailment relations, for analyzing dialogic contributions
along their semantic embeddings, and for classifying interlocutors
and interaction systems based on them. In this way, we highlight
differences between human and artificial conversations under conditions
that approximate free dialogic communication. We show that despite
their obvious classificatory differences, it is difficult to see
clear differences even in the domain of dialogic communication
given the current instruments of NLP.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {153–-166},
numpages = {14},
keywords = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP},
location = {Poznan, Poland},
series = {HT '24}
}
May, 2024.
Dependencies over Times and Tools (DoTT). Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 4641–4653.
BibTeX
@inproceedings{Luecking:et:al:2024,
abstract = {Purpose: Based on the examples of English and German, we investigate
to what extent parsers trained on modern variants of these languages
can be transferred to older language levels without loss. Methods:
We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
which covers, roughly, the time period from 1800 until today,
in conjunction with the further development of the annotation
tool DependencyAnnotator. DoTT consists of a collection of diachronic
corpora enriched with dependency annotations using 3 parsers,
6 pre-trained language models, 5 newly trained models for German,
and two tag sets (TIGER and Universal Dependencies). To assess
how the different parsers perform on texts from different time
periods, we created a gold standard sample as a benchmark. Results:
We found that the parsers/models perform quite well on modern
texts (document-level LAS ranging from 82.89 to 88.54) and slightly
worse on older texts, as expected (average document-level LAS
84.60 vs. 86.14), but not significantly. For German texts, the
(German) TIGER scheme achieved slightly better results than UD.
Conclusion: Overall, this result speaks for the transferability
of parsers to past language levels, at least dating back until
around 1800. This very transferability, it is however argued,
means that studies of language change in the field of dependency
syntax can draw on dependency distance but miss out on some grammatical
phenomena.},
address = {Torino, Italy},
author = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
month = {may},
pages = {4641--4653},
publisher = {ELRA and ICCL},
title = {Dependencies over Times and Tools ({D}o{TT})},
url = {https://aclanthology.org/2024.lrec-main.415},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
year = {2024}
}
2023
2023.
Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI. Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399.
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui}
}
2023.
Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework. Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management, 461–479.
BibTeX
@inproceedings{Henlein:et:al:2023b,
author = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
editor = {Duffy, Vincent G.},
title = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
year = {2023},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {461--479},
abstract = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
scene framework using SemAF (Semantic Annotation Framework) as
a scheme for annotating discourse structures. SeSB integrates
a variety of tools and resources by using SemAF and UIMA as a
unified data structure to generate 3D scenes from textual descriptions.
Based on VR, SeSB allows its users to change annotations through
body movements instead of symbolic manipulations: from annotations
in texts to corrections in editing steps to adjustments in generated
scenes, all this is done by grabbing and moving objects. We evaluate
SeSB in comparison with a state-of-the-art open source text-to-scene
method (the only one which is publicly available) and find that
our approach not only performs better, but also allows for modeling
a greater variety of scenes.},
isbn = {978-3-031-35748-0},
doi = {10.1007/978-3-031-35748-0_32}
}
2021
2021.
From distinguishability to informativity. A quantitative text
model for detecting random texts.. Language and Text: Data, models, information and applications, 356:145–162.
BibTeX
@article{Konca:et:al:2021,
title = {From distinguishability to informativity. A quantitative text
model for detecting random texts.},
author = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
journal = {Language and Text: Data, models, information and applications},
volume = {356},
pages = {145--162},
year = {2021},
editor = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
publisher = {John Benjamins Publishing Company},
doi = {10.1075/cilt.356.10kon}
}
2021.
SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).
BibTeX
@inproceedings{Mehler:Uslu:Baumartz:2021,
author = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
title = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
series = {QUALICO 2021},
location = {Tokyo, Japan},
year = {2021},
poster = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}
2020
June, 2020.
BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries:
Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
experimentelle Studie.
BibTeX
@bathesis{Baumartz:2020,
author = {Baumartz, Daniel},
title = {{Automatic Topic Modeling in the Context of Digital Libraries:
Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
experimentelle Studie}},
year = {2020},
month = {6},
school = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
address = {Frankfurt, Germany},
url = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
pdf = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}
2020.
The Frankfurt Latin Lexicon. From Morphological Expansion and
Word Embeddings to SemioGraphs. Studi e Saggi Linguistici, 58(1):121–155.
BibTeX
@article{Mehler:et:al:2020b,
author = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
title = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
Word Embeddings to SemioGraphs}},
journal = {Studi e Saggi Linguistici},
doi = {10.4454/ssl.v58i1.276},
year = {2020},
volume = {58},
number = {1},
pages = {121--155},
abstract = {In this article we present the Frankfurt Latin Lexicon (FLL),
a lexical resource for Medieval Latin that is used both for the
lemmatization of Latin texts and for the post-editing of lemmatizations.
We describe recent advances in the development of lemmatizers
and test them against the Capitularies corpus (comprising Frankish
royal edicts, mid-6th to mid-9th century), a corpus created as
a reference for processing Medieval Latin. We also consider the
post-correction of lemmatizations using a limited crowdsourcing
process aimed at continuous review and updating of the FLL. Starting
from the texts resulting from this lemmatization process, we describe
the extension of the FLL by means of word embeddings, whose interactive
traversing by means of SemioGraphs completes the digital enhanced
hermeneutic circle. In this way, the article argues for a more
comprehensive understanding of lemmatization, encompassing classical
machine learning as well as intellectual post-corrections and,
in particular, human computation in the form of interpretation
processes based on graph representations of the underlying lexical
resources.},
url = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
pdf = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}
2019
2019.
Computing Classifier-based Embeddings with the Help of text2ddc. Proceedings of the 20th International Conference on Computational
Linguistics and Intelligent Text Processing, (CICLing 2019).
BibTeX
@inproceedings{Uslu:Mehler:Baumartz:2019,
author = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
booktitle = {{Proceedings of the 20th International Conference on Computational
Linguistics and Intelligent Text Processing, (CICLing 2019)}},
location = {La Rochelle, France},
series = {{CICLing 2019}},
title = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
year = {2019}
}
2019.
BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).
BibTeX
@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
author = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
location = {Utrecht, Netherlands},
series = {{DH2019}},
title = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
year = {2019},
url = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}
2019.
text2ddc meets Literature - Ein Verfahren für die Analyse und
Visualisierung thematischer Makrostrukturen. Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019.
BibTeX
@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
author = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
title = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
Visualisierung thematischer Makrostrukturen}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
2018
2018.
LTV: Labeled Topic Vector. Proceedings of COLING 2018, the 27th International Conference
on Computational Linguistics: System Demonstrations, August 20-26.
BibTeX
@inproceedings{Baumartz:Uslu:Mehler:2018,
author = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
title = {{LTV}: Labeled Topic Vector},
booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
on Computational Linguistics: System Demonstrations, August 20-26},
year = {2018},
address = {Santa Fe, New Mexico, USA},
publisher = {The COLING 2018 Organizing Committee},
abstract = {In this paper, we present LTV, a website and an API that generate
labeled topic classifications based on the Dewey Decimal Classification
(DDC), an international standard for topic classification in libraries.
We introduce nnDDC, a largely language-independent neural network-based
classifier for DDC-related topic classification, which we optimized
using a wide range of linguistic features to achieve an F-score
of 87.4\%. To show that our approach is language-independent,
we evaluate nnDDC using up to 40 different languages. We derive
a topic model based on nnDDC, which generates probability distributions
over semantic units for any input on sense-, word- and text-level.
Unlike related approaches, however, these probabilities are estimated
by means of nnDDC so that each dimension of the resulting vector
representation is uniquely labeled by a DDC class. In this way,
we introduce a neural network-based Classifier-Induced Semantic
Space (nnCISS).},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}
2018.
Evaluating and Integrating Databases in the Area of NLP. International Quantitative Linguistics Conference (QUALICO 2018).
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
author = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
and Giuseppe Abrami},
title = {Evaluating and Integrating Databases in the Area of {NLP}},
booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
year = {2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
location = {Wroclaw, Poland}
}
2018.
VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte
interaktive evolutionäre Verarbeitung natürlicher Sprache. Forschungsinfrastrukturen und digitale Informationssysteme in
der germanistischen Sprachwissenschaft, 6.
BibTeX
@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
author = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
title = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
interaktive evolutionäre Verarbeitung natürlicher Sprache}},
booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
der germanistischen Sprachwissenschaft},
publisher = {De Gruyter},
editor = {Henning Lobin and Roman Schneider and Andreas Witt},
volume = {6},
address = {Berlin},
year = {2018}
}
2018.
Towards a DDC-based Topic Network Model of Wikipedia. Proceedings of 2nd International Workshop on Modeling, Analysis,
and Management of Social Networks and their Applications (SOCNET
2018), February 28, 2018.
BibTeX
@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
author = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
title = {Towards a {DDC}-based Topic Network Model of Wikipedia},
booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
and Management of Social Networks and their Applications (SOCNET
2018), February 28, 2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
year = {2018}
}
2018.
fastSense: An Efficient Word Sense Disambiguation Classifier. Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Uslu:et:al:2018,
author = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
and Wahed Hemati},
title = {fastSense: An Efficient Word Sense Disambiguation Classifier},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
year = {2018}
}
2017
2017.
TextImager as a Generic Interface to R. Software Demonstrations of the 15th Conference of the European
Chapter of the Association for Computational Linguistics (EACL
2017).
BibTeX
@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
author = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
title = {{TextImager} as a Generic Interface to {R}},
booktitle = {Software Demonstrations of the 15th Conference of the European
Chapter of the Association for Computational Linguistics (EACL
2017)},
location = {Valencia, Spain},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
year = {2017}
}