Daniel Baumartz – Text Technology Lab

PhD Student

Goethe-Universität Frankfurt am Main
Robert-Mayer-Straße 10
Room 401c
D-60325 Frankfurt am Main
D-60054 Frankfurt am Main (use for package delivery)
Postfach / P.O. Box: 154
Phone:
Mail:

Office Hour: TBA

Publications

2026

Sebastian Gombert, Gianluca Romano, Leon Camus, Daniel Baumartz, Fabiola Gonçalves Ribeiro, Alexander Mehler and Hendrik Drachsler. 2026. NeoBridge: A Scalable Platform for Assessment Orchestration and Log Data Collection in Online Reasoning Assessments. Proceedings of the Twenty-first European Conference on Technology Enhanced Learning. accepted.

BibTeX

@inproceedings{Gombert:et:al:2026:b,
  author    = {Gombert, Sebastian and Romano, Gianluca and Camus, Leon and Baumartz, Daniel
               and Gon{\c{c}}alves Ribeiro, Fabiola and Mehler, Alexander and Drachsler, Hendrik},
  title     = {{NeoBridge}: A Scalable Platform for Assessment Orchestration
               and Log Data Collection in Online Reasoning Assessments},
  booktitle = {Proceedings of the Twenty-first European Conference on Technology
               Enhanced Learning},
  series    = {ECTEL 2026},
  address   = {Valencia, Spain},
  year      = {2026},
  keywords  = {core, core_c08},
  eventdate = {2026-09-14/2026-09-18},
  note      = {accepted}
}

Cedric Borkowski, Giuseppe Abrami, Dawit Terefe, Daniel Baumartz and Alexander Mehler. 2026. DUUIgateway: A Web Service for Platform-independent, Ubiquitous Big Data NLP. SoftwareX, 34:102549.

BibTeX

@article{Borkowski:et:al:2026,
  title     = {{DUUIgateway}: A Web Service for Platform-independent, Ubiquitous Big Data NLP},
  journal   = {SoftwareX},
  volume    = {34},
  pages     = {102549},
  year      = {2026},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2026.102549},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711026000439},
  author    = {Borkowski, Cedric and Abrami, Giuseppe and Terefe, Dawit and Baumartz, Daniel
               and Mehler, Alexander},
  keywords  = {duui, neglab, core, core_b05, core_c08, new-data-spaces, circlet},
  abstract  = {Distributed processing of unstructured text data is a challenge
               in the rapidly changing and evolving natural language processing
               (NLP) landscape. This landscape is characterized by heterogeneous
               systems, models, and formats, and especially by the increasing
               influence of AI systems. While many of these systems handle text
               data, there are also unified systems that process multiple input
               and output formats, while allowing for distributed corpus processing.
               However, there are hardly any user-friendly interfaces that allow
               existing NLP frameworks to be used flexibly and extended in a
               user-controlled manner. Due to this gap and the increasing importance
               of NLP for various scientific disciplines, there has been a demand
               for a web and API based flexible software solution for deploying,
               managing and monitoring NLP systems. Such a solution is provided
               by Docker Unified UIMA-gateway. We introduce DUUIgateway and evaluate
               its API and user-driven approach to encapsulation. We also describe
               how these features improve the usability and accessibility of
               the NLP framework DUUI. We illustrate DUUIgateway in the field
               of process modeling in higher education and show how it closes
               the latter gap in NLP by making a variety of systems for processing
               text and multimodal data accessible to non-experts.}
}

2025

Giuseppe Abrami, Markos Genios, Filip Fitzermann, Daniel Baumartz and Alexander Mehler. 2025. Docker Unified UIMA Interface: New perspectives for NLP on big data. SoftwareX, 29:102033.

BibTeX

@article{Abrami:et:al:2025:a,
  title     = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
  journal   = {SoftwareX},
  volume    = {29},
  pages     = {102033},
  year      = {2025},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2024.102033},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
  author    = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
               and Alexander Mehler},
  keywords  = {Docker, Kubernetes, UIMA, Distributed NLP, duui, biofid, neglab, new-data-spaces, circlet, core, core_c08},
  abstract  = {Processing large amounts of natural language text using machine
               learning-based models is becoming important in many disciplines.
               This demand is being met by a variety of approaches, resulting
               in the heterogeneous deployment of separate, partly incompatible,
               not natively scalable applications. To overcome the technological
               bottleneck involved, we have developed Docker Unified UIMA Interface,
               a system for the standardized, parallel, platform-independent,
               distributed and microservices-based solution for processing large
               and extensive text corpora with any NLP method. We present DUUI
               as a framework that enables automated orchestration of GPU-based
               NLP processes beyond the existing Docker Swarm cluster variant,
               and in addition to the adaptation to new runtime environments
               such as Kubernetes. Therefore, a new driver for DUUI is introduced,
               which enables the lightweight orchestration of DUUI processes
               within a Kubernetes environment in a scalable setup. In this way,
               the paper opens up novel text-technological perspectives for existing
               practices in disciplines that deal with the scientific analysis
               of large amounts of data based on NLP.}
}

Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. 2025. DUUI: A Toolbox for the Construction of a new Kind of Natural Language Processing. Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften und Data Humanities, 446–448.

BibTeX

@inproceedings{Abrami:et:al:2025:b,
  author    = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
  title     = {DUUI: A Toolbox for the Construction of a new Kind of Natural
               Language Processing},
  year      = {2025},
  booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
               und Data Humanities},
  numpages  = {3},
  location  = {Bielefeld, Germany},
  series    = {DHd 2025},
  publisher = {Zenodo},
  keywords  = {duui,core,core_c08},
  pages     = {446--448},
  doi       = {10.5281/zenodo.14943128},
  url       = {https://doi.org/10.5281/zenodo.14943128},
  poster    = {https://zenodo.org/records/14944575}
}

2024

Maxim Konca, Alexander Mehler, Andy Lücking and Daniel Baumartz. 2024. Visualizing Domain-specific and Generic Critical Online Reasoning Related Structures of Online Texts: A Hybrid Approach. In: Students', Graduates' and Young Professionals' Critical Use of Online Information: Digital Performance Assessment and Training within and across Domains, 195–239. Ed. by Olga Zlatkin-Troitschanskaia, Marie-Theres Nagel, Verena Klose and Alexander Mehler. Springer Nature Switzerland.

BibTeX

@inbook{Konca:et:al:2024:a,
  author    = {Konca, Maxim and Mehler, Alexander and L{\"u}cking, Andy and Baumartz, Daniel},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  title     = {Visualizing Domain-specific and Generic Critical Online Reasoning
               Related Structures of Online Texts: A Hybrid Approach},
  booktitle = {Students', Graduates' and Young Professionals' Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  year      = {2024},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {195--239},
  abstract  = {Besides ``traditional'' educational media, young professionals
               in higher education use the Internet to obtain information. To
               utilize their online research in professional contexts, they critically
               evaluate the information they access and its sources. One dimension
               of this evaluation is an assessment of the linguistic state of
               the online sources, either implicitly or explicitly. This computational
               educational linguistic study applies methods from computational
               linguistics to online sources visited by young professionals from
               three fields (law students, teacher trainees, and medicine student)
               and develops partly novel visualizations that allow to quickly
               discover similarities as well as differences between multi-heterogeneous
               Internet sources, that is, sources that exhibit various topics,
               genres, and textual structure, among others. The visualizations
               also allow a comparison of search behaviour between different
               professional fields. In this way, we found that (1) genre classification
               has a significant impact on reliability scores, (2) young professionals'
               search approaches vary by their professional field, and, (3) the
               best predictor of reliability is indeed the linguistic profile
               of an online source.},
  isbn      = {978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0_10},
  url       = {https://doi.org/10.1007/978-3-031-69510-0_10}
}

Daniel Baumartz, Maxim Konca, Alexander Mehler, Patrick Schrottenbacher and Dominik Braunheim. 2024. Measuring Group Creativity of Dialogic Interaction Systems by Means of Remote Entailment Analysis. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 153––166.

BibTeX

@inproceedings{Baumartz:et:al:2024,
  author    = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
               and Braunheim, Dominik},
  title     = {Measuring Group Creativity of Dialogic Interaction Systems by
               Means of Remote Entailment Analysis},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3675140},
  doi       = {10.1145/3648188.3675140},
  abstract  = {We present a procedure for assessing group creativity that allows
               us to compare the contributions of human interlocutors and chatbots
               based on generative AI such as ChatGPT. We focus on everyday creativity
               in terms of dialogic communication and test four hypotheses about
               the difference between human and artificial communication. Our
               procedure is based on a test that requires interlocutors to cooperatively
               interpret a sequence of sentences for which we control for coherence
               gaps with reference to the notion of entailment. Using NLP methods,
               we automatically evaluate the spoken or written contributions
               of interlocutors (human or otherwise). The paper develops a routine
               for automatic transcription based on Whisper, for sampling texts
               based on their entailment relations, for analyzing dialogic contributions
               along their semantic embeddings, and for classifying interlocutors
               and interaction systems based on them. In this way, we highlight
               differences between human and artificial conversations under conditions
               that approximate free dialogic communication. We show that despite
               their obvious classificatory differences, it is difficult to see
               clear differences even in the domain of dialogic communication
               given the current instruments of NLP.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {153–-166},
  numpages  = {14},
  keywords  = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP, core, core_b05, core_c08},
  location  = {Poznan, Poland},
  series    = {HT '24}
}

Andy Lücking, Giuseppe Abrami, Leon Hammerla, Marc Rahn, Daniel Baumartz, Steffen Eger and Alexander Mehler. May, 2024. Dependencies over Times and Tools (DoTT). Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 4641–4653.

BibTeX

@inproceedings{Luecking:et:al:2024,
  abstract  = {Purpose: Based on the examples of English and German, we investigate
               to what extent parsers trained on modern variants of these languages
               can be transferred to older language levels without loss. Methods:
               We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
               which covers, roughly, the time period from 1800 until today,
               in conjunction with the further development of the annotation
               tool DependencyAnnotator. DoTT consists of a collection of diachronic
               corpora enriched with dependency annotations using 3 parsers,
               6 pre-trained language models, 5 newly trained models for German,
               and two tag sets (TIGER and Universal Dependencies). To assess
               how the different parsers perform on texts from different time
               periods, we created a gold standard sample as a benchmark. Results:
               We found that the parsers/models perform quite well on modern
               texts (document-level LAS ranging from 82.89 to 88.54) and slightly
               worse on older texts, as expected (average document-level LAS
               84.60 vs. 86.14), but not significantly. For German texts, the
               (German) TIGER scheme achieved slightly better results than UD.
               Conclusion: Overall, this result speaks for the transferability
               of parsers to past language levels, at least dating back until
               around 1800. This very transferability, it is however argued,
               means that studies of language change in the field of dependency
               syntax can draw on dependency distance but miss out on some grammatical
               phenomena.},
  address   = {Torino, Italy},
  author    = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
               and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {4641--4653},
  publisher = {ELRA and ICCL},
  title     = {Dependencies over Times and Tools ({D}o{TT})},
  url       = {https://aclanthology.org/2024.lrec-main.415},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
  year      = {2024}
}

2023

Alexander Leonhardt, Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. 2023. Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI. Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399.

BibTeX

@inproceedings{Leonhardt:et:al:2023,
  title     = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
  author    = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
               and Mehler, Alexander},
  editor    = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
  year      = {2023},
  address   = {Singapore},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2023.findings-emnlp.29},
  pages     = {385--399},
  pdf       = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
  abstract  = {Automatic analysis of large corpora is a complex task, especially
               in terms of time efficiency. This complexity is increased by the
               fact that flexible, extensible text analysis requires the continuous
               integration of ever new tools. Since there are no adequate frameworks
               for these purposes in the field of NLP, and especially in the
               context of UIMA, that are not outdated or unusable for security
               reasons, we present a new approach to address the latter task:
               Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
               and feature-rich framework for automatic distributed analysis
               of text corpora that leverages Big Data experience and virtualization
               with Docker. We evaluate DUUI{'}s communication approach against
               a state-of-the-art approach and demonstrate its outstanding behavior
               in terms of time efficiency, enabling the analysis of big text
               data.},
  keywords  = {duui, core, core_c08}
}

Alexander Henlein, Attila Kett, Daniel Baumartz, Giuseppe Abrami, Alexander Mehler, Johannes Bastian, Yannic Blecher, David Budgenhagen, Roman Christof, Tim-Oliver Ewald, Tim Fauerbach, Patrick Masny, Julian Mende, Paul Schnüre and Marc Viel. 2023. Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework. Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management, 461–479.

BibTeX

@inproceedings{Henlein:et:al:2023b,
  author    = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
               and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
               and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
               and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
  editor    = {Duffy, Vincent G.},
  title     = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  year      = {2023},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {461--479},
  abstract  = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
               scene framework using SemAF (Semantic Annotation Framework) as
               a scheme for annotating discourse structures. SeSB integrates
               a variety of tools and resources by using SemAF and UIMA as a
               unified data structure to generate 3D scenes from textual descriptions.
               Based on VR, SeSB allows its users to change annotations through
               body movements instead of symbolic manipulations: from annotations
               in texts to corrections in editing steps to adjustments in generated
               scenes, all this is done by grabbing and moving objects. We evaluate
               SeSB in comparison with a state-of-the-art open source text-to-scene
               method (the only one which is publicly available) and find that
               our approach not only performs better, but also allows for modeling
               a greater variety of scenes.},
  isbn      = {978-3-031-35748-0},
  doi       = {10.1007/978-3-031-35748-0_32},
  keywords  = {gemdis}
}

2021

Maxim Konca, Alexander Mehler, Daniel Baumartz and Wahed Hemati. 2021. From distinguishability to informativity. A quantitative text model for detecting random texts.. Language and Text: Data, models, information and applications, 356:145–162.

BibTeX

@article{Konca:et:al:2021,
  title     = {From distinguishability to informativity. A quantitative text
               model for detecting random texts.},
  author    = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
  journal   = {Language and Text: Data, models, information and applications},
  volume    = {356},
  pages     = {145--162},
  year      = {2021},
  editor    = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
  publisher = {John Benjamins Publishing Company},
  doi       = {10.1075/cilt.356.10kon}
}

Alexander Mehler, Daniel Baumartz and Tolga Uslu. 2021. SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).

BibTeX

@inproceedings{Mehler:Uslu:Baumartz:2021,
  author    = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
  title     = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
  series    = {QUALICO 2021},
  location  = {Tokyo, Japan},
  year      = {2021},
  poster    = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}

2020

Daniel Baumartz. June, 2020. BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle Studie.

BibTeX

@bathesis{Baumartz:2020,
  author    = {Baumartz, Daniel},
  title     = {{Automatic Topic Modeling in the Context of Digital Libraries:
               Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
               experimentelle Studie}},
  year      = {2020},
  month     = {6},
  school    = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
  address   = {Frankfurt, Germany},
  url       = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}

Alexander Mehler, Bernhard Jussen, Tim Geelhaar, Alexander Henlein, Giuseppe Abrami, Daniel Baumartz, Tolga Uslu and Wahed Hemati. 2020. The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs. Studi e Saggi Linguistici, 58(1):121–155.

BibTeX

@article{Mehler:et:al:2020b,
  author    = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
               and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
  title     = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
               Word Embeddings to SemioGraphs}},
  journal   = {Studi e Saggi Linguistici},
  doi       = {10.4454/ssl.v58i1.276},
  year      = {2020},
  volume    = {58},
  number    = {1},
  pages     = {121--155},
  abstract  = {In this article we present the Frankfurt Latin Lexicon (FLL),
               a lexical resource for Medieval Latin that is used both for the
               lemmatization of Latin texts and for the post-editing of lemmatizations.
               We describe recent advances in the development of lemmatizers
               and test them against the Capitularies corpus (comprising Frankish
               royal edicts, mid-6th to mid-9th century), a corpus created as
               a reference for processing Medieval Latin. We also consider the
               post-correction of lemmatizations using a limited crowdsourcing
               process aimed at continuous review and updating of the FLL. Starting
               from the texts resulting from this lemmatization process, we describe
               the extension of the FLL by means of word embeddings, whose interactive
               traversing by means of SemioGraphs completes the digital enhanced
               hermeneutic circle. In this way, the article argues for a more
               comprehensive understanding of lemmatization, encompassing classical
               machine learning as well as intellectual post-corrections and,
               in particular, human computation in the form of interpretation
               processes based on graph representations of the underlying lexical
               resources.},
  url       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
  pdf       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}

2019

Tolga Uslu, Alexander Mehler and Daniel Baumartz. 2019. Computing Classifier-based Embeddings with the Help of text2ddc. Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019).

BibTeX

@inproceedings{Uslu:Mehler:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
  booktitle = {{Proceedings of the 20th International Conference on Computational
               Linguistics and Intelligent Text Processing, (CICLing 2019)}},
  location  = {La Rochelle, France},
  series    = {{CICLing 2019}},
  title     = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
  year      = {2019}
}

Tolga Uslu, Alexander Mehler, Clemens Schulz and Daniel Baumartz. 2019. BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).

BibTeX

@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
  booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
  location  = {Utrecht, Netherlands},
  series    = {{DH2019}},
  title     = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
  year      = {2019},
  url       = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}

Alexander Mehler, Tolga Uslu, Rüdiger Gleim and Daniel Baumartz. 2019. text2ddc meets Literature - Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen. Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019.

BibTeX

@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
  author    = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
  title     = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
               Visualisierung thematischer Makrostrukturen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

2018

Daniel Baumartz, Tolga Uslu and Alexander Mehler. 2018. LTV: Labeled Topic Vector. Proceedings of COLING 2018, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26.

BibTeX

@inproceedings{Baumartz:Uslu:Mehler:2018,
  author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
  title     = {{LTV}: Labeled Topic Vector},
  booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
               on Computational Linguistics: System Demonstrations, August 20-26},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {The COLING 2018 Organizing Committee},
  abstract  = {In this paper, we present LTV, a website and an API that generate
               labeled topic classifications based on the Dewey Decimal Classification
               (DDC), an international standard for topic classification in libraries.
               We introduce nnDDC, a largely language-independent neural network-based
               classifier for DDC-related topic classification, which we optimized
               using a wide range of linguistic features to achieve an F-score
               of 87.4\%. To show that our approach is language-independent,
               we evaluate nnDDC using up to 40 different languages. We derive
               a topic model based on nnDDC, which generates probability distributions
               over semantic units for any input on sense-, word- and text-level.
               Unlike related approaches, however, these probabilities are estimated
               by means of nnDDC so that each dimension of the resulting vector
               representation is uniquely labeled by a DDC class. In this way,
               we introduce a neural network-based Classifier-Induced Semantic
               Space (nnCISS).},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}

Wahed Hemati, Alexander Mehler, Tolga Uslu, Daniel Baumartz and Giuseppe Abrami. 2018. Evaluating and Integrating Databases in the Area of NLP. International Quantitative Linguistics Conference (QUALICO 2018).

BibTeX

@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
               and Giuseppe Abrami},
  title     = {Evaluating and Integrating Databases in the Area of {NLP}},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
  year      = {2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
  location  = {Wroclaw, Poland}
}

Alexander Mehler, Wahed Hemati, Rüdiger Gleim and Daniel Baumartz. 2018. VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte interaktive evolutionäre Verarbeitung natürlicher Sprache. Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft, 6.

BibTeX

@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
  author    = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
  title     = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
               interaktive evolutionäre Verarbeitung natürlicher Sprache}},
  booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
               der germanistischen Sprachwissenschaft},
  publisher = {De Gruyter},
  editor    = {Henning Lobin and Roman Schneider and Andreas Witt},
  volume    = {6},
  address   = {Berlin},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler, Andreas Niekler and Daniel Baumartz. 2018. Towards a DDC-based Topic Network Model of Wikipedia. Proceedings of 2nd International Workshop on Modeling, Analysis, and Management of Social Networks and their Applications (SOCNET 2018), February 28, 2018.

BibTeX

@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
  author    = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
  title     = {Towards a {DDC}-based Topic Network Model of Wikipedia},
  booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
               and Management of Social Networks and their Applications (SOCNET
               2018), February 28, 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler, Daniel Baumartz, Alexander Henlein and Wahed Hemati. 2018. fastSense: An Efficient Word Sense Disambiguation Classifier. Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12.

BibTeX

@inproceedings{Uslu:et:al:2018,
  author    = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
               and Wahed Hemati},
  title     = {fastSense: An Efficient Word Sense Disambiguation Classifier},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
  year      = {2018}
}

2017

Tolga Uslu, Wahed Hemati, Alexander Mehler and Daniel Baumartz. 2017. TextImager as a Generic Interface to R. Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2017).

BibTeX

@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
  author    = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
  title     = {{TextImager} as a Generic Interface to {R}},
  booktitle = {Software Demonstrations of the 15th Conference of the European
               Chapter of the Association for Computational Linguistics (EACL
               2017)},
  location  = {Valencia, Spain},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
  year      = {2017}
}