Daniel Baumartz

Doctoral candidate

Goethe-Universität Frankfurt am Main
Robert-Mayer-Straße 10
Room 401c
D-60325 Frankfurt am Main
D-60054 Frankfurt am Main (use for package delivery)
Postfach / P.O. Box: 154
Phone: +49 69-798-24664
Fax: +49 69-798-28931

ContactAbout meInterestsShort CVPublications
TBA
TBA
TBA

Total: 14

2021 (3)

  • [DOI] M. Konca, A. Mehler, D. Baumartz, and W. Hemati, “From distinguishability to informativity. A quantitative text model for detecting random texts.,” Language and Text: Data, models, information and applications, vol. 356, pp. 145-162, 2021.
    [BibTeX]

    @article{Konca:et:al:2021,
      title={From distinguishability to informativity. A quantitative text model for detecting random texts.},
      author={Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
      journal={Language and Text: Data, models, information and applications},
      volume={356},
      pages={145--162},
      year={2021},
      editor={Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
      publisher={John Benjamins Publishing Company},
      doi={10.1075/cilt.356.10kon}
    }
  • A. Mehler, D. Baumartz, and T. Uslu, “SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs,” in International Quantitative Linguistics Conference (QUALICO 2021), 2021.
    [Poster][BibTeX]

    @InProceedings{Mehler:Uslu:Baumartz:2021,
      Author         = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
      Title          = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
      BookTitle      = {International Quantitative Linguistics Conference (QUALICO 2021)},
      Series         = {QUALICO 2021},
      location       = {Tokyo, Japan},
      year           = {2021},
      poster   = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
    }
  • A. Mehler, D. Baumartz, and T. Uslu, “SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs,” in International Quantitative Linguistics Conference (QUALICO 2021), 2021.
    [BibTeX]

    @InProceedings{Mehler:Uslu:Baumartz:2021,
      Author         = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
      Title          = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
      BookTitle      = {International Quantitative Linguistics Conference (QUALICO 2021)},
      Series         = {QUALICO 2021},
      location       = {Tokyo, Japan},
      year           = {2021}
    }

2020 (2)

  • [PDF] [https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381] D. Baumartz, “Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc – eine experimentelle Studie,” , 2020.
    [BibTeX]

    @thesis{Baumartz:2020,
            author = {Baumartz, Daniel},
            title = {{Automatic Topic Modeling in the Context of Digital Libraries:
    Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle
    Studie}},
            year = 2020,
            month = 6,
            type = {bathesis},
            school = {Johann Wolfgang Goethe-Universität, Institute of Computer
    Science and Mathematics, Text Technology Lab},
            address = {Frankfurt, Germany},
            url = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
            pdf = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
    }
  • [PDF] [https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276] [DOI] A. Mehler, B. Jussen, T. Geelhaar, A. Henlein, G. Abrami, D. Baumartz, T. Uslu, and W. Hemati, “The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs,” Studi e Saggi Linguistici, vol. 58, iss. 1, pp. 121-155, 2020.
    [Abstract] [BibTeX]

    In this article we present the Frankfurt Latin Lexicon (FLL), a lexical resource for Medieval Latin that is used both for the lemmatization of Latin texts and for the post-editing of lemmatizations. We describe recent advances in the development of lemmatizers and test them against the Capitularies corpus (comprising Frankish royal edicts, mid-6th to mid-9th century), a corpus created as a reference for processing Medieval Latin. We also consider the post-correction of lemmatizations using a limited crowdsourcing process aimed at continuous review and updating of the FLL. Starting from the texts resulting from this lemmatization process, we describe the extension of the FLL by means of word embeddings, whose interactive traversing by means of SemioGraphs completes the digital enhanced hermeneutic circle. In this way, the article argues for a more comprehensive understanding of lemmatization, encompassing classical machine learning as well as intellectual post-corrections and, in particular, human computation in the form of interpretation processes based on graph representations of the underlying lexical resources.
    @article{Mehler:et:al:2020b,
        author={Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
        title={{The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs}},
        journal={Studi e Saggi Linguistici},
        doi={10.4454/ssl.v58i1.276},
        year={2020},
        volume={58},
        number={1},
        pages={121--155},
        abstract={In this article we present the Frankfurt Latin Lexicon (FLL), a lexical resource for Medieval Latin that is used both for the lemmatization of Latin texts and for the post-editing of lemmatizations. We describe recent advances in the development of lemmatizers and test them against the Capitularies corpus (comprising Frankish royal edicts, mid-6th to mid-9th century), a corpus created as a reference for processing Medieval Latin. We also consider the post-correction of lemmatizations using a limited crowdsourcing process aimed at continuous review and updating of the FLL. Starting from the texts resulting from this lemmatization process, we describe the extension of the FLL by means of word embeddings, whose interactive traversing by means of SemioGraphs completes the digital enhanced hermeneutic circle. In this way, the article argues for a more comprehensive understanding of lemmatization, encompassing classical machine learning as well as intellectual post-corrections and, in particular, human computation in the form of interpretation processes based on graph representations of the underlying lexical resources.},
        url={https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
        pdf={https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
    }

2019 (3)

  • T. Uslu, A. Mehler, and D. Baumartz, “Computing Classifier-based Embeddings with the Help of text2ddc,” in Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019), 2019.
    [BibTeX]

    @inproceedings{Uslu:Mehler:Baumartz:2019,
      author = "Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel",
      booktitle = "{Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019)}",
      location = "La Rochelle, France",
      series = "{CICLing 2019}",
      title = "{Computing Classifier-based Embeddings with the Help of text2ddc}",
      year = 2019
    }
  • [https://dev.clariah.nl/files/dh2019/boa/0199.html] T. Uslu, A. Mehler, C. Schulz, and D. Baumartz, “BigSense: a Word Sense Disambiguator for Big Data,” in Proceedings of the Digital Humanities 2019, (DH2019), 2019.
    [BibTeX]

    @inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
      author = "Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel",
      booktitle = "{Proceedings of the Digital Humanities 2019, (DH2019)}",
      location = "Utrecht, Netherlands",
      series = "{DH2019}",
      title = "{{BigSense}: a Word Sense Disambiguator for Big Data}",
      year = 2019,
      url={https://dev.clariah.nl/files/dh2019/boa/0199.html}
    }
  • [PDF] A. Mehler, T. Uslu, R. Gleim, and D. Baumartz, “text2ddc meets Literature – Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen,” in Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019, 2019.
    [Poster][BibTeX]

    @InProceedings{Mehler:Uslu:Gleim:Baumartz:2019,
      Author         = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
      Title          = {{text2ddc meets Literature - Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen}},
      BookTitle      = {Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019},
      poster   = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
      Series         = {DHd 2019},
      pdf     = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
      location       = {Frankfurt, Germany},
      year           = 2019
    }

2018 (5)

  • [PDF] D. Baumartz, T. Uslu, and A. Mehler, “LTV: Labeled Topic Vector,” in Proceedings of COLING 2018, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26, Santa Fe, New Mexico, USA, 2018.
    [Abstract] [BibTeX]

    In this paper, we present LTV, a website and an API that generate labeled topic classifications based on the Dewey Decimal Classification (DDC), an international standard for topic classification in libraries. We introduce nnDDC, a largely language-independent neural network-based classifier for DDC-related topic classification, which we optimized using a wide range of linguistic features to achieve an F-score of 87.4%. To show that our approach is language-independent, we evaluate nnDDC using up to 40 different languages. We derive a topic model based on nnDDC, which generates probability distributions over semantic units for any input on sense-, word- and text-level. Unlike related approaches, however, these probabilities are estimated by means of nnDDC so that each dimension of the resulting vector representation is uniquely labeled by a DDC class. In this way, we introduce a neural network-based Classifier-Induced Semantic Space (nnCISS).
    @InProceedings{Baumartz:Uslu:Mehler:2018,
        author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
        title     = {{LTV}: Labeled Topic Vector},
        booktitle = {Proceedings of {COLING 2018}, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26},
        year      = {2018},
        address   = {Santa Fe, New Mexico, USA},
        publisher = {The COLING 2018 Organizing Committee},
        abstract  = {In this paper, we present LTV, a website and an API that generate labeled topic classifications based on the Dewey Decimal Classification (DDC), an international standard for topic classification in libraries. We introduce nnDDC, a largely language-independent neural network-based classifier for DDC-related topic classification, which we optimized using a wide range of linguistic features to achieve an F-score of 87.4%. To show that our approach is language-independent, we evaluate nnDDC using up to 40 different languages. We derive a topic model based on nnDDC, which generates probability distributions over semantic units for any input on sense-, word- and text-level. Unlike related approaches, however, these probabilities are estimated by means of nnDDC so that each dimension of the resulting vector representation is uniquely labeled by a DDC class. In this way, we introduce a neural network-based Classifier-Induced Semantic Space (nnCISS).},
        pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
    }
  • [PDF] W. Hemati, A. Mehler, T. Uslu, D. Baumartz, and G. Abrami, “Evaluating and Integrating Databases in the Area of NLP,” in International Quantitative Linguistics Conference (QUALICO 2018), 2018.
    [Poster][BibTeX]

    @inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
        author={Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz and Giuseppe Abrami},
        title={Evaluating and Integrating Databases in the Area of {NLP}},
        booktitle={International Quantitative Linguistics Conference (QUALICO 2018)},
        year={2018},
        pdf={https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
        poster={https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
        location={Wroclaw, Poland}
    }
  • A. Mehler, W. Hemati, R. Gleim, and D. Baumartz, “VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte interaktive evolutionäre Verarbeitung natürlicher Sprache,” in Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft , H. Lobin, R. Schneider, and A. Witt, Eds., Berlin: De Gruyter, 2018, vol. 6.
    [BibTeX]

    @InCollection{Mehler:Hemati:Gleim:Baumartz:2018,
      Author         = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim
                       and Daniel Baumartz},
      Title          = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
                       interaktive evolutionäre Verarbeitung natürlicher
                       Sprache}},
      BookTitle      = {Forschungsinfrastrukturen und digitale
                       Informationssysteme in der germanistischen
                       Sprachwissenschaft },
      Publisher      = {De Gruyter},
      Editor         = {Henning Lobin and Roman Schneider and Andreas Witt},
      Volume         = {6},
      Address        = {Berlin},
      year           = 2018
    }
  • [PDF] T. Uslu, A. Mehler, A. Niekler, and D. Baumartz, “Towards a DDC-based Topic Network Model of Wikipedia,” in Proceedings of 2nd International Workshop on Modeling, Analysis, and Management of Social Networks and their Applications (SOCNET 2018), February 28, 2018, 2018.
    [BibTeX]

    @InProceedings{Uslu:Mehler:Niekler:Baumartz:2018,
      Author         = {Tolga Uslu and Alexander Mehler and Andreas Niekler
                       and Daniel Baumartz},
      Title          = {Towards a {DDC}-based Topic Network Model of Wikipedia},
      BookTitle      = {Proceedings of 2nd International Workshop on Modeling,
                       Analysis, and Management of Social Networks and their
                       Applications (SOCNET 2018), February 28, 2018},
      pdf            = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
      year           = 2018
    }
  • [PDF] T. Uslu, A. Mehler, D. Baumartz, A. Henlein, and W. Hemati, “fastSense: An Efficient Word Sense Disambiguation Classifier,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
    [BibTeX]

    @InProceedings{Uslu:et:al:2018,
      Author         = {Tolga Uslu and Alexander Mehler and Daniel Baumartz
                       and Alexander Henlein and Wahed Hemati },
      Title          = {fastSense: An Efficient Word Sense Disambiguation
                       Classifier},
      BookTitle      = {Proceedings of the 11th edition of the Language
                       Resources and Evaluation Conference, May 7 - 12},
      Series         = {LREC 2018},
      Address        = {Miyazaki, Japan},
      pdf            = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
      year           = 2018
    }

2017 (1)

  • [PDF] T. Uslu, W. Hemati, A. Mehler, and D. Baumartz, “TextImager as a Generic Interface to R,” in Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2017), 2017.
    [BibTeX]

    @InProceedings{Uslu:Hemati:Mehler:Baumartz:2017,
      Author         = {Tolga Uslu and Wahed Hemati and Alexander Mehler and
                       Daniel Baumartz},
      Title          = {{TextImager} as a Generic Interface to {R}},
      BookTitle      = {Software Demonstrations of the 15th Conference of the
                       European Chapter of the Association for Computational
                       Linguistics (EACL 2017)},
      location       = {Valencia, Spain},
      pdf            = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
      year           = 2017
    }