Daniel Baumartz

PhD Student

Goethe-Universität Frankfurt am Main
Robert-Mayer-Straße 10
Room 401c
D-60325 Frankfurt am Main
D-60054 Frankfurt am Main (use for package delivery)
Postfach / P.O. Box: 154
Phone:
Mail:

Office Hour: TBA

Publications

2024

Andy Lücking, Giuseppe Abrami, Leon Hammerla, Marc Rahn, Daniel Baumartz, Steffen Eger and Alexander Mehler. May, 2024. Dependencies over Times and Tools (DoTT). Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 4641–4653.
BibTeX
@inproceedings{Luecking:et:al:2024,
  abstract  = {Purpose: Based on the examples of English and German, we investigate
               to what extent parsers trained on modern variants of these languages
               can be transferred to older language levels without loss. Methods:
               We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
               which covers, roughly, the time period from 1800 until today,
               in conjunction with the further development of the annotation
               tool DependencyAnnotator. DoTT consists of a collection of diachronic
               corpora enriched with dependency annotations using 3 parsers,
               6 pre-trained language models, 5 newly trained models for German,
               and two tag sets (TIGER and Universal Dependencies). To assess
               how the different parsers perform on texts from different time
               periods, we created a gold standard sample as a benchmark. Results:
               We found that the parsers/models perform quite well on modern
               texts (document-level LAS ranging from 82.89 to 88.54) and slightly
               worse on older texts, as expected (average document-level LAS
               84.60 vs. 86.14), but not significantly. For German texts, the
               (German) TIGER scheme achieved slightly better results than UD.
               Conclusion: Overall, this result speaks for the transferability
               of parsers to past language levels, at least dating back until
               around 1800. This very transferability, it is however argued,
               means that studies of language change in the field of dependency
               syntax can draw on dependency distance but miss out on some grammatical
               phenomena.},
  address   = {Torino, Italy},
  author    = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
               and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {4641--4653},
  publisher = {ELRA and ICCL},
  title     = {Dependencies over Times and Tools ({D}o{TT})},
  url       = {https://aclanthology.org/2024.lrec-main.415},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
  year      = {2024}
}

2023

Alexander Henlein, Attila Kett, Daniel Baumartz, Giuseppe Abrami, Alexander Mehler, Johannes Bastian, Yannic Blecher, David Budgenhagen, Roman Christof, Tim-Oliver Ewald, Tim Fauerbach, Patrick Masny, Julian Mende, Paul Schnüre and Marc Viel. 2023. Semantic Scene Builder: Towards a context sensitive Text-to-3D Scene Framework. Semantic, artificial and computational interaction studies: Towards a behavioromics of multimodal communication, Held as Part of the 25rd HCI International Conference, HCII 2023, Copenhagen, Denmark, July 23–28, 2023, Proceedings. accepted.
BibTeX
@inproceedings{Henlein:et:al:2023b,
  author    = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
               and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
               and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
               and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
  booktitle = {Semantic, artificial and computational interaction studies: Towards
               a behavioromics of multimodal communication, Held as Part of the
               25rd HCI International Conference, HCII 2023, Copenhagen, Denmark,
               July 23--28, 2023, Proceedings},
  note      = {accepted},
  organization = {Springer},
  title     = {Semantic Scene Builder: Towards a context sensitive Text-to-3D Scene Framework},
  year      = {2023}
}
Alexander Leonhardt, Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. 2023. Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI. Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399.
BibTeX
@inproceedings{Leonhardt:et:al:2023,
  title     = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
  author    = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
               and Mehler, Alexander},
  editor    = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
  year      = {2023},
  address   = {Singapore},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2023.findings-emnlp.29},
  pages     = {385--399},
  pdf       = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
  abstract  = {Automatic analysis of large corpora is a complex task, especially
               in terms of time efficiency. This complexity is increased by the
               fact that flexible, extensible text analysis requires the continuous
               integration of ever new tools. Since there are no adequate frameworks
               for these purposes in the field of NLP, and especially in the
               context of UIMA, that are not outdated or unusable for security
               reasons, we present a new approach to address the latter task:
               Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
               and feature-rich framework for automatic distributed analysis
               of text corpora that leverages Big Data experience and virtualization
               with Docker. We evaluate DUUI{'}s communication approach against
               a state-of-the-art approach and demonstrate its outstanding behavior
               in terms of time efficiency, enabling the analysis of big text
               data.}
}

2021

Alexander Mehler, Daniel Baumartz and Tolga Uslu. 2021. SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).
BibTeX
@inproceedings{Mehler:Uslu:Baumartz:2021,
  author    = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
  title     = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
  series    = {QUALICO 2021},
  location  = {Tokyo, Japan},
  year      = {2021},
  poster    = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}
Maxim Konca, Alexander Mehler, Daniel Baumartz and Wahed Hemati. 2021. From distinguishability to informativity. A quantitative text model for detecting random texts.. Language and Text: Data, models, information and applications, 356:145–162.
BibTeX
@article{Konca:et:al:2021,
  title     = {From distinguishability to informativity. A quantitative text
               model for detecting random texts.},
  author    = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
  journal   = {Language and Text: Data, models, information and applications},
  volume    = {356},
  pages     = {145--162},
  year      = {2021},
  editor    = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
  publisher = {John Benjamins Publishing Company},
  doi       = {10.1075/cilt.356.10kon}
}

2020

Alexander Mehler, Bernhard Jussen, Tim Geelhaar, Alexander Henlein, Giuseppe Abrami, Daniel Baumartz, Tolga Uslu and Wahed Hemati. 2020. The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs. Studi e Saggi Linguistici, 58(1):121–155.
BibTeX
@article{Mehler:et:al:2020b,
  author    = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
               and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
  title     = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
               Word Embeddings to SemioGraphs}},
  journal   = {Studi e Saggi Linguistici},
  doi       = {10.4454/ssl.v58i1.276},
  year      = {2020},
  volume    = {58},
  number    = {1},
  pages     = {121--155},
  abstract  = {In this article we present the Frankfurt Latin Lexicon (FLL),
               a lexical resource for Medieval Latin that is used both for the
               lemmatization of Latin texts and for the post-editing of lemmatizations.
               We describe recent advances in the development of lemmatizers
               and test them against the Capitularies corpus (comprising Frankish
               royal edicts, mid-6th to mid-9th century), a corpus created as
               a reference for processing Medieval Latin. We also consider the
               post-correction of lemmatizations using a limited crowdsourcing
               process aimed at continuous review and updating of the FLL. Starting
               from the texts resulting from this lemmatization process, we describe
               the extension of the FLL by means of word embeddings, whose interactive
               traversing by means of SemioGraphs completes the digital enhanced
               hermeneutic circle. In this way, the article argues for a more
               comprehensive understanding of lemmatization, encompassing classical
               machine learning as well as intellectual post-corrections and,
               in particular, human computation in the form of interpretation
               processes based on graph representations of the underlying lexical
               resources.},
  url       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
  pdf       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}
Daniel Baumartz. June, 2020. BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle Studie.
BibTeX
@bathesis{Baumartz:2020,
  author    = {Baumartz, Daniel},
  title     = {{Automatic Topic Modeling in the Context of Digital Libraries:
               Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
               experimentelle Studie}},
  year      = {2020},
  month     = {6},
  school    = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
  address   = {Frankfurt, Germany},
  url       = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}

2019

Alexander Mehler, Tolga Uslu, Rüdiger Gleim and Daniel Baumartz. 2019. text2ddc meets Literature - Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen. Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019.
BibTeX
@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
  author    = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
  title     = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
               Visualisierung thematischer Makrostrukturen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}
Tolga Uslu, Alexander Mehler, Clemens Schulz and Daniel Baumartz. 2019. BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).
BibTeX
@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
  booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
  location  = {Utrecht, Netherlands},
  series    = {{DH2019}},
  title     = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
  year      = {2019},
  url       = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}
Tolga Uslu, Alexander Mehler and Daniel Baumartz. 2019. Computing Classifier-based Embeddings with the Help of text2ddc. Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019).
BibTeX
@inproceedings{Uslu:Mehler:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
  booktitle = {{Proceedings of the 20th International Conference on Computational
               Linguistics and Intelligent Text Processing, (CICLing 2019)}},
  location  = {La Rochelle, France},
  series    = {{CICLing 2019}},
  title     = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
  year      = {2019}
}

2018

Tolga Uslu, Alexander Mehler, Daniel Baumartz, Alexander Henlein and Wahed Hemati. 2018. fastSense: An Efficient Word Sense Disambiguation Classifier. Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Uslu:et:al:2018,
  author    = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
               and Wahed Hemati},
  title     = {fastSense: An Efficient Word Sense Disambiguation Classifier},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
  year      = {2018}
}
Tolga Uslu, Alexander Mehler, Andreas Niekler and Daniel Baumartz. 2018. Towards a DDC-based Topic Network Model of Wikipedia. Proceedings of 2nd International Workshop on Modeling, Analysis, and Management of Social Networks and their Applications (SOCNET 2018), February 28, 2018.
BibTeX
@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
  author    = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
  title     = {Towards a {DDC}-based Topic Network Model of Wikipedia},
  booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
               and Management of Social Networks and their Applications (SOCNET
               2018), February 28, 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
  year      = {2018}
}
Alexander Mehler, Wahed Hemati, Rüdiger Gleim and Daniel Baumartz. 2018. VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte interaktive evolutionäre Verarbeitung natürlicher Sprache. Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft, 6.
BibTeX
@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
  author    = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
  title     = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
               interaktive evolutionäre Verarbeitung natürlicher Sprache}},
  booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
               der germanistischen Sprachwissenschaft},
  publisher = {De Gruyter},
  editor    = {Henning Lobin and Roman Schneider and Andreas Witt},
  volume    = {6},
  address   = {Berlin},
  year      = {2018}
}
Wahed Hemati, Alexander Mehler, Tolga Uslu, Daniel Baumartz and Giuseppe Abrami. 2018. Evaluating and Integrating Databases in the Area of NLP. International Quantitative Linguistics Conference (QUALICO 2018).
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
               and Giuseppe Abrami},
  title     = {Evaluating and Integrating Databases in the Area of {NLP}},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
  year      = {2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
  location  = {Wroclaw, Poland}
}
Daniel Baumartz, Tolga Uslu and Alexander Mehler. 2018. LTV: Labeled Topic Vector. Proceedings of COLING 2018, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26.
BibTeX
@inproceedings{Baumartz:Uslu:Mehler:2018,
  author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
  title     = {{LTV}: Labeled Topic Vector},
  booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
               on Computational Linguistics: System Demonstrations, August 20-26},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {The COLING 2018 Organizing Committee},
  abstract  = {In this paper, we present LTV, a website and an API that generate
               labeled topic classifications based on the Dewey Decimal Classification
               (DDC), an international standard for topic classification in libraries.
               We introduce nnDDC, a largely language-independent neural network-based
               classifier for DDC-related topic classification, which we optimized
               using a wide range of linguistic features to achieve an F-score
               of 87.4\%. To show that our approach is language-independent,
               we evaluate nnDDC using up to 40 different languages. We derive
               a topic model based on nnDDC, which generates probability distributions
               over semantic units for any input on sense-, word- and text-level.
               Unlike related approaches, however, these probabilities are estimated
               by means of nnDDC so that each dimension of the resulting vector
               representation is uniquely labeled by a DDC class. In this way,
               we introduce a neural network-based Classifier-Induced Semantic
               Space (nnCISS).},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}

2017

Tolga Uslu, Wahed Hemati, Alexander Mehler and Daniel Baumartz. 2017. TextImager as a Generic Interface to R. Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2017).
BibTeX
@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
  author    = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
  title     = {{TextImager} as a Generic Interface to {R}},
  booktitle = {Software Demonstrations of the 15th Conference of the European
               Chapter of the Association for Computational Linguistics (EACL
               2017)},
  location  = {Valencia, Spain},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
  year      = {2017}
}