Publications – Text Technology Lab

Total: 505

2026

Cordula Artelt, Anika Schenck-Fontaine, Corinna Kleinert, Stefan Liebig, Alexander Mehler and Reinhard Pollak. February, 2026. Infrastructure Priority Programme "New Data Spaces for the Social Sciences" (SPP 2431) – Programme Overview.

BibTeX

@techreport{Artelt:et:al:2026,
  author    = {Cordula Artelt and Anika Schenck-Fontaine and Corinna Kleinert
               and Stefan Liebig and Alexander Mehler and Reinhard Pollak},
  title     = {Infrastructure Priority Programme "New Data Spaces for the Social
               Sciences" (SPP 2431) -- Programme Overview},
  institution = {New Data Spaces for the Social Sciences (SPP 2431)},
  series    = {New Data Spaces | Reports},
  number    = {1},
  year      = {2026},
  month     = {feb},
  keywords  = {spp},
  doi       = {10.5157/SPP2431:WP1:1.0},
  pdf       = {https://www.new-data-spaces.de/Portals/11/adam/Publication/ZhRUzWdz0kyeOzHlsyzWOw/PDFfile/WP-1 Programme Description.pdf}
}

Patrick Schrottenbacher, Alexander Mehler, Vivienne Bernhardt, Leon Rohe and Giuseppe Abrami. 2026. ReEmote: Towards Emotion Representation in VR Through Va.Si.Li-Lab. Proceedings of XR Salento 2026. accepted.

BibTeX

@inproceedings{Schrottenbacher:et:al:2026:a,
  author    = {Schrottenbacher, Patrick and Mehler, Alexander and Bernhardt, Vivienne
               and Rohe, Leon and Abrami, Giuseppe},
  title     = {ReEmote: Towards Emotion Representation in {VR} Through {Va.Si.Li}-Lab},
  booktitle = {Proceedings of XR Salento 2026},
  year      = {2026},
  publisher = {Springer International Publishing},
  keywords  = {VR, XR, affective computing, virtual humans, emotion detection, FACES},
  abstract  = {Human social interactions are inherently multimodal, shaped not
               only by what speakers convey but also by cues such as facial expressions,
               posture, and gestures. Together, these channels shape both participants'
               perceptions and behaviors, further reinforcing conversational
               feedback loops. This multimodal system extends to VR, where avatars
               serve as proxies for human interaction, making both visual and
               auditory fidelity essential for engaging. To properly utilize
               the emotional expression space that virtual environments allow,
               we introduce ReEmote. ReEmote extends the capabilities of Va.Si.Li-Lab,
               a collaborative, multi-user VR platform built on Ubiq. While Va.Si.Li-Lab
               supports user emotional expression through facial and hand tracking,
               ReEmote extends this by introducing schema-based emotion mappings
               that affect both avatars and their environments. This fosters
               immersive, emotionally aware environments that are beneficial
               for human and chatbot agent interactions, where human users and
               virtual agents share an emotional expression space. By enabling
               richer emotional dynamics, ReEmote opens up new ways of designing
               affective and engaging virtual experiences.In this paper, we describe
               the design choices behind ReEmote and present an evaluation of
               the graphical validity of the emotion representation introduced
               by ReEmote. Our results indicate that emotions can be validly
               represented through avatar facial expressions that users can quickly
               identify as Ekman's basic emotions.This opens up several possibilities
               for extending emotion-related text-to-speech (TTS) applications
               in Extended Reality (XR) with ReEmote. The paper also outlines
               use cases for XR-based TTS applications.},
  note      = {accepted}
}

Yanran Chen, Wei Zhao, Anne Breitbarth, Manuel Stoeckel, Alexander Mehler, Dominik Schlechtweg and Steffen Eger. April, 2026. Syntactic language change in English and German: Metrics, parsers, and convergences. PLOS ONE, 21(4):1–33.

BibTeX

@article{Chen:et:al:2026,
  doi       = {10.1371/journal.pone.0346096},
  author    = {Chen, Yanran and Zhao, Wei and Breitbarth, Anne and Stoeckel, Manuel
               and Mehler, Alexander and Schlechtweg, Dominik and Eger, Steffen},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {Syntactic language change in English and German: Metrics, parsers,
               and convergences},
  year      = {2026},
  month     = {04},
  volume    = {21},
  url       = {https://doi.org/10.1371/journal.pone.0346096},
  pages     = {1-33},
  abstract  = {Syntactic language change has gained increasing attention in recent
               years. Previous computational work based on dependency relations
               has focused on diachronic trends in dependency distance, which
               measures the linear distance between dependent words, using dependency
               trees automatically predicted by a dependency parser (mostly the
               Stanford CoreNLP parser). In this work, we introduce a set of
               15 syntax metrics that extend the analysis beyond linear distance
               by incorporating both linear and tree graph properties of dependency
               trees, such as tree height and degree. Besides, we propose a multi-parser
               approach to reduce the impact of using specific parsers, thereby
               increasing the robustness of the detected language changes. Through
               a cross-lingual investigation of English and German in parliamentary
               debates from the last 160 years, using 6 different parsers (CoreNLP
               and five newer alternatives), we demonstrate that: (1) Relying
               on one single parser can be problematic, as the agreement on predicted
               trends can be low across parsers. (2) Our set of metrics can capture
               subtle patterns of syntactic changes. Our analysis shows that
               syntactic change over the time period inspected is largely similar
               between English and German, with only 2.2% of cases yielding opposite
               trends in these metrics. (3) We also show that changes in syntactic
               metrics seem to be more frequent at the tails of sentence length
               distributions and often move in opposite directions for short
               and long sentences. To our best knowledge, ours is the most comprehensive
               computational analysis of syntactic language change using modern
               NLP technology in recent corpora of English and German.},
  number    = {4}
}

Samuel Richer, Mounika Marreddy and Alexander Mehler. 2026. TTLab at SemEval-2026 Task 10: Transformer-based Approaches for Psycholinguistic Conspiracy Detection in Social Media Discourse. Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026). accepted.

BibTeX

@inproceedings{Richer:et:al:2026,
  title     = {TTLab at SemEval-2026 Task 10: Transformer-based Approaches for
               Psycholinguistic Conspiracy Detection in Social Media Discourse},
  author    = {Richer, Samuel and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

Noah Tratzsch, Asmaa Al-Raian, Mounika Marreddy and Alexander Mehler. 2026. SemEval-2026 Task 11: Reducing Content Effects Using Layered Activation Steering. Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026). accepted.

BibTeX

@inproceedings{Tratzsch:et:al2026,
  title     = {SemEval-2026 Task 11: Reducing Content Effects Using Layered Activation Steering},
  author    = {Tratzsch, Noah and Al-Raian, Asmaa and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

Yahya Missaoui, Solomon Kebede, Mounika Marreddy and Alexander Mehler. 2026. SemEval-2026 Task 3: Dimensional Aspect-Based Sentiment Analysis. Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026). accepted.

BibTeX

@inproceedings{Missaoui:et:al:2026,
  title     = {SemEval-2026 Task 3: Dimensional Aspect-Based Sentiment Analysis},
  author    = {Missaoui, Yahya and Kebede, Solomon and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

Julia Weiss, Axel Burger, Joss Roßmann, Jan Eric Meurer and Ali Abusaleh. 2026. From Images to Topics: Evaluating Vision-Language Models for Topic Classification of Election Advertising. Proceedings of the 18th ACM Web Science Conference 2026. accepted.

BibTeX

@inproceedings{weiss:et:al:2026,
  title     = {From Images to Topics: Evaluating Vision-Language Models for Topic
               Classification of Election Advertising},
  author    = {Weiss, Julia and Burger, Axel and Roßmann, Joss and Meurer, Jan Eric
               and Abusaleh, Ali},
  booktitle = {Proceedings of the 18th ACM Web Science Conference 2026},
  eventdate = {May, 2026},
  location  = {Braunschweig, Germany},
  year      = {2026},
  keywords  = {Multimodal Large Language Models, Political communication, Privacy-aware AI, new-data-spaces, circlet},
  note      = {accepted}
}

Sonja Hahn, Leon Hammerla, Corinna Hankeln, Sebastian Gross, Marie Steinke, Christina M. Röper Korf and Ulf Kroehne. April, 2026. Using Artificial Intelligence for Eliciting Diagnostic Evidence From Students’ Drawings: A Case Study From a Formative Mathematics Assessment. Psychological Test Adaptation and Development, 7:73–90.

BibTeX

@article{hahn:etal:2026,
  title     = {Using Artificial Intelligence for Eliciting Diagnostic Evidence
               From Students’ Drawings: A Case Study From a Formative Mathematics
               Assessment},
  volume    = {7},
  issn      = {2698-1866},
  url       = {http://dx.doi.org/10.1027/2698-1866/a000123},
  doi       = {10.1027/2698-1866/a000123},
  journal   = {Psychological Test Adaptation and Development},
  publisher = {Hogrefe Publishing Group},
  author    = {Hahn, Sonja and Hammerla, Leon and Hankeln, Corinna and Gross, Sebastian
               and Steinke, Marie and R\"{o}per Korf, Christina M. and Kroehne, Ulf},
  year      = {2026},
  month     = {apr},
  pages     = {73–90}
}

Leon Hammerla and Alexander Mehler. 2026. Gutenberg+: A More Temporally Faithful Corpus for Diachronic NLP. Proceedings Workshop on Structured Linguistic Data and Evaluation (SLiDE 2026), co-located with the Language Resources and Evaluation Conference (LREC 2026). accepted.

BibTeX

@inproceedings{Hammerla:Mehler:2026:a,
  title     = {{Gutenberg+}: A More Temporally Faithful Corpus for Diachronic {NLP}},
  author    = {Leon Hammerla and Alexander Mehler},
  booktitle = {Proceedings Workshop on Structured Linguistic Data and Evaluation
               (SLiDE 2026), co-located with the Language Resources and Evaluation
               Conference (LREC 2026)},
  address   = {Palma de Mallorca (Spain)},
  year      = {2026},
  keywords  = {neglab},
  note      = {accepted}
}

Ali Abusaleh, Bhuvanesh Verma and Alexander Mehler. 2026. TTLab at AraSentEval: SARF (صرف) Sentiment Analysis via Root-based Fusion for Multi-Dialectal Arabic. Proceedings of the 7th Workshop on Open-Source Arabic Corpora and Processing Tools (OSACT7), co-located with the Language Resources and Evaluation Conference (LREC 2026). accepted.

BibTeX

@inproceedings{Abusaleh:et:al:2026:sarf,
  title     = {TTLab at AraSentEval: SARF (صرف) Sentiment Analysis via Root-based
               Fusion for Multi-Dialectal Arabic},
  author    = {Abusaleh, Ali and Verma, Bhuvanesh and Mehler, Alexander},
  booktitle = {Proceedings of the 7th Workshop on Open-Source Arabic Corpora
               and Processing Tools (OSACT7), co-located with the Language Resources
               and Evaluation Conference (LREC 2026)},
  eventdate = {May, 2026},
  location  = {Palma, Mallorca, Spain},
  year      = {2026},
  keywords  = {NLP, Sentiment Analysis, Arabic analysis, new-data-spaces, circlet, satek},
  abstract  = {Arabic sentiment analysis is challenged by morphological complexity
               and lexical variation across Arabic dialects, compounded by subjectivity
               in how speakers and writers express sentiment. In this paper,
               we present our submission for the AraSentEval 2026 Shared Task
               on Arabic Dialect Sentiment Analysis. We propose SARF (صرف) a
               multi-view architectural framework that integrates surface-level
               context with stemmed and rooted morphological perspectives using
               a shared MARBERTv2 encoder. Our system employs a hybrid BERT-CNN-BiLSTM-Attention
               architecture to capture both local sentiment n-grams and global
               sequential dependencies. Experimental results show that while
               individual morphological normalization strategies (stemming or
               rooting) may degrade performance, their joint integration via
               cross-morphological attention provides robust features across
               diverse dialects. Our final system achieved a competitive macro-F1-score
               of 0.9263, ranking 2nd out of 15 participating teams.},
  note      = {accepted}
}

Ali Abusaleh, Leon Hammerla and Alexander Mehler. 2026. Learning to Detect Cross-Modal Negation: An Analysis of Latent Representations and an Attention-Based Solution. 2026 8th International Conference on Natural Language Processing (ICNLP). accepted.

BibTeX

@inproceedings{Abusaleh:et:al:2026,
  title     = {Learning to Detect Cross-Modal Negation: An Analysis of Latent
               Representations and an Attention-Based Solution},
  author    = {Abusaleh, Ali and Hammerla, Leon and Mehler, Alexander},
  booktitle = {2026 8th International Conference on Natural Language Processing (ICNLP)},
  eventdate = {2026-03-20/2026-03-22},
  location  = {Xi'an,China},
  year      = {2026},
  keywords  = {Vision language model, Natural language processing, Cross-modal retrieval, negation detection, video analysis, Multimodal analysis, Political Communication, neglab, new-data-spaces, circlet},
  abstract  = {Detecting high-level semantic concepts like negation across modalities
               remains a challenge for current multimodal systems. We analyze
               this as a fundamental representation learning problem, providing
               the first evidence that negation does not form a linearly or non-linearly
               separable class in the latent spaces of standard vision-language
               models (VLMs). We demonstrate that pretrained embeddings primarily
               encode modality-specific features, lacking a generalizable negation
               signal. To overcome this, we propose a novel cross-modal attention
               architecture that explicitly models inter-modal dependencies,
               achieving performance gains of up to +7.03% F1 over unimodal baselines.
               Our analysis reveals a key asymmetry: while textual negation often
               appears independently, visual negation is semantically dependent
               on linguistic context, a finding validated through our statistical
               analysis of 3,222 political video-text pairs automatically annotated
               via Qwen2.5-VL. By combining this analysis with self-supervised
               video representations (JEPA2), we advance the modeling of temporal
               negation. This work provides new methods and insights for learning
               robust, semantically-aligned representations in multimodal systems.},
  note      = {accepted}
}

Manuel Schaaf, Kevin Bönisch and Alexander Mehler. May, 2026. GhostWriter: Hidden AI-Generated Texts over Multiple Languages, Domains and Generators. Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026), 10497–10516.

BibTeX

@inproceedings{Schaaf:et:al:2026,
  title     = {GhostWriter: Hidden AI-Generated Texts over Multiple Languages,
               Domains and Generators},
  author    = {Schaaf, Manuel and Bönisch, Kevin and Mehler, Alexander},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  month     = {May},
  year      = {2026},
  pages     = {10497--10516},
  keywords  = {Corpus, Natural Language Generation; Validation of LRs, AI-generated Text Detection, core, core_b05},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  doi       = {10.63317/57fd7juh5zek},
  abstract  = {The advent of Transformer-based Large Language Models (LLMs) has
               led to an unprecedented surge of AI-generated text (AIGT) across
               online platforms and academic domains. While these models exhibit
               near-human fluency and stylistic coherence, their widespread adoption
               has raised concerns about authorship integrity, research quality,
               and the recursive contamination of training corpora with synthetic
               data. These developments underscore the need for reliable AIGT
               detection methods and benchmark datasets, particularly for malicious
               or deceptive *ghostwriting* scenarios where AIGT is intentionally
               crafted to evade detection. To address this, we present **GhostWriter**,
               a large-scale, bilingual (German and English), multi-generator,
               and multi-domain dataset for AIGT detection. The dataset comprises
               human- and AI-authored texts produced under domain-specific *ghostwriting*
               conditions, including examples intentionally embedded within otherwise
               human-written texts to obscure their AI origin. With **GhostWriter**,
               we (i) aim to expand the resources available for German AIGT datasets,
               (ii) emphasize mixed or fused synthesizations—since most existing
               corpora are limited to the document level—and (iii) introduce
               specifically crafted malicious ghostwriting scenarios across multiple
               domains and generators.}
}

Thiemo Dahmann, Julian Schneider, Philipp Stephan, Giuseppe Abrami and Alexander Mehler. 2026. Towards the Generation and Application of Dynamic Web-Based Visualization of UIMA-based Annotations for Big-Data Corpora with the Help of Unified Dynamic Annotation Visualizer. Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026), 6695–6705.

BibTeX

@inproceedings{Dahmann:et:al:2026,
  title     = {Towards the Generation and Application of Dynamic Web-Based Visualization
               of UIMA-based Annotations for Big-Data Corpora with the Help of
               Unified Dynamic Annotation Visualizer},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  year      = {2026},
  pages     = {6695--6705},
  author    = {Dahmann, Thiemo and Schneider, Julian and Stephan, Philipp and Abrami, Giuseppe
               and Mehler, Alexander},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  doi       = {10.63317/5ce2aaity4yz},
  keywords  = {NLP, UIMA, Annotations, dynamic visualization, uce},
  abstract  = {The automatic and manual annotation of unstructured corpora is
               a routine task in many scientific fields and is supported by a
               variety of existing software solutions. Despite this variety,
               few solutions currently support annotation visualization, especially
               for dynamic generation and interaction. To bridge this gap and
               visualize annotated corpora based on user-, project-, or corpus-specific
               aspects, we developed Unified Dynamic Annotation Visualizer (UDAV).
               UDAV is a web-based solution that implements features not supported
               by comparable tools, enabling a customizable and extensible toolbox
               for interacting with annotations and allowing integration into
               existing big-data frameworks. We exemplify UDAV through a range
               of visualizations and also provide an evaluation of corpus import
               and processing performance.},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2026/pdf/2026.lrec2026-1.533.pdf},
  video     = {https://www.youtube.com/watch?v=LFBiGlmEDog}
}

Bhuvanesh Verma and Alexander Mehler. 2026. Predicting Topic (Co-)Occurrence Using Topic Networks Built from the Project Gutenberg Corpus. Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026), 860–869.

BibTeX

@inproceedings{Verma:Mehler:2026,
  title     = {Predicting Topic (Co-)Occurrence Using Topic Networks Built from
               the Project Gutenberg Corpus},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  pages     = {860--869},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  year      = {2026},
  author    = {Verma, Bhuvanesh and Mehler, Alexander},
  doi       = {10.63317/58x3h7gjbpb4},
  keywords  = {Topic Evolution, Topic Network,Time-aware Networks, Temporal Autocorrelation, Project Gutenberg, satek},
  abstract  = {Although temporal topic modeling has been widely applied to scientific
               and legal texts, literary corpora have largely been overlooked
               in this regard. To address this issue, we analyze topic evolution
               in a subset of the Project Gutenberg (PG) corpus. We model this
               subset as a sequence of topic networks that capture the emergence,
               persistence, and interaction of thematic structures over decades.
               Using supervised topic representations, we predict nodes (topics)
               and edges (topic pairings) to forecast future topics and their
               co-occurrence. Our experiments demonstrate moderate to strong
               temporal persistence in topic connectivity patterns across three
               topic systems, with ROC-AUC and AP values consistently above 0.85.
               We find that the temporal span of topic networks significantly
               impacts predictive performance: longer spans improve the stability
               and recall of topic presence, while shorter spans better capture
               evolving topic relationships. Overall, our findings demonstrate
               the predictability of topics in literary texts over time.} pdf
               = {http://www.lrec-conf.org/proceedings/lrec2026/pdf/2026.lrec2026-1.65.pdf}
}

Walter Bisang and Alexander Mehler. 2026. Linguistic Features as Predictors of Students' Performance in Domain-Specific Critical Online Reasoning Tasks. International Test Commission Conference (ITC) 2026. accepted.

BibTeX

@inproceedings{Bisang:Mehler:2026,
  title     = {Linguistic Features as Predictors of Students' Performance in
               Domain-Specific Critical Online Reasoning Tasks},
  author    = {Bisang, Walter and Mehler, Alexander},
  booktitle = {International Test Commission Conference (ITC) 2026},
  eventdate = {2026-06-30/2026-07-03},
  location  = {Auckland, New Zealand},
  note      = {accepted},
  year      = {2026},
  keywords  = {core,core_b05}
}

Bhuvanesh Verma, Mounika Marreddy and Alexander Mehler. 2026. Predicting Convincingness in Political Speech: How Emotional Tone Shapes Persuasive Strength. Proceedings of the 15th Workshop on Computational Approaches to Subjectivity, Sentiment, & Social Media Analysis. accepted.

BibTeX

@inproceedings{Verma:et:al:2026,
  title     = {Predicting Convincingness in Political Speech: How Emotional Tone
               Shapes Persuasive Strength},
  booktitle = {Proceedings of the 15th Workshop on Computational Approaches to
               Subjectivity, Sentiment, \& Social Media Analysis},
  year      = {2026},
  author    = {Verma, Bhuvanesh and Marreddy, Mounika and Mehler, Alexander},
  keywords  = {Argument Detection, Argument Quality Assessment,Topic Modelling, Persuasiveness, Convincingness, Emotion Analysis, Argument Mining, satek},
  abstract  = {Emotional tone plays a central role in persuasion, yet its impact
               on computational assessments of political argument quality in
               real world election campaign speeches remains understudied. In
               this work, we investigate whether positive emotional framing correlates
               with higher perceived convincingness in political arguments. We
               fine-tune language models on argument quality datasets and test
               their ability to transfer convincingness predictions to real-world
               campaign speeches. Using a corpus of U.S. presidential campaign
               speeches, we analyze emotional polarity in relation to predicted
               persuasive strength to test whether positively framed arguments
               are judged more convincing than neutral or negative ones. Our
               empirical analysis shows that political parties rely heavily on
               argumentation during their election campaigns. Also, we found
               the evidence that politicians strategically employ emotional cues
               within their arguments during these campaign speeches, with positive
               emotions being more strongly associated with persuasive strength,
               for example in topics such as USMCA’s Effect on American Jobs
               and Agriculture, Border Control Policies, Progressive Tax Reforms.
               At the same time, we find that negative emotions have a weaker
               yet still non-negligible influence on voter persuasion in topics
               such as City Crime and Civil Unrest and White Supremacist Violence
               (Charlottesville Incident).},
  note      = {accepted}
}

Cedric Borkowski, Giuseppe Abrami, Dawit Terefe, Daniel Baumartz and Alexander Mehler. 2026. DUUIgateway: A Web Service for Platform-independent, Ubiquitous Big Data NLP. SoftwareX, 34:102549.

BibTeX

@article{Borkowski:et:al:2026,
  title     = {{DUUIgateway}: A Web Service for Platform-independent, Ubiquitous Big Data NLP},
  journal   = {SoftwareX},
  volume    = {34},
  pages     = {102549},
  year      = {2026},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2026.102549},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711026000439},
  author    = {Borkowski, Cedric and Abrami, Giuseppe and Terefe, Dawit and Baumartz, Daniel
               and Mehler, Alexander},
  keywords  = {duui, neglab, core, core_b05, core_c08, new-data-spaces, circlet},
  abstract  = {Distributed processing of unstructured text data is a challenge
               in the rapidly changing and evolving natural language processing
               (NLP) landscape. This landscape is characterized by heterogeneous
               systems, models, and formats, and especially by the increasing
               influence of AI systems. While many of these systems handle text
               data, there are also unified systems that process multiple input
               and output formats, while allowing for distributed corpus processing.
               However, there are hardly any user-friendly interfaces that allow
               existing NLP frameworks to be used flexibly and extended in a
               user-controlled manner. Due to this gap and the increasing importance
               of NLP for various scientific disciplines, there has been a demand
               for a web and API based flexible software solution for deploying,
               managing and monitoring NLP systems. Such a solution is provided
               by Docker Unified UIMA-gateway. We introduce DUUIgateway and evaluate
               its API and user-driven approach to encapsulation. We also describe
               how these features improve the usability and accessibility of
               the NLP framework DUUI. We illustrate DUUIgateway in the field
               of process modeling in higher education and show how it closes
               the latter gap in NLP by making a variety of systems for processing
               text and multimodal data accessible to non-experts.}
}

Longwei Cong, Leon Hammerla, Sonja Hahn, Sebastian Gombert, Hendrik Drachsler and Ulf Kröhne. 2026. Automatic Short Answer Grading with LLMs: From Memorization to Reasoning. Proceedings of the 16th International Learning Analytics & Knowledge Conference (LAK26). accepted.

BibTeX

@inproceedings{Cong:et:al:2026a,
  author    = {Cong, Longwei and Hammerla, Leon and Hahn, Sonja and Gombert, Sebastian
               and Drachsler, Hendrik and Kr{\"o}hne, Ulf},
  title     = {Automatic Short Answer Grading with LLMs: From Memorization to Reasoning},
  booktitle = {Proceedings of the 16th International Learning Analytics \& Knowledge
               Conference (LAK26)},
  series    = {LAK26},
  year      = {2026},
  pubstate  = {forthcoming},
  location  = {Bergen, Norway},
  note      = {accepted},
  abstract  = {Short-answer questions provide valuable insights into students’
               understanding and cognitive processes for learning analytics.
               However, they are difficult to grade automatically as they require
               a high level of language comprehension. Automatic Short Answer
               Grading (ASAG) is therefore essential in large-scale educational
               settings. Recent work has applied encode-only pre-trained language
               models (PLMs), such as BERT, and generative large language models
               (LLMs) to ASAG. Although fine-tuned BERT-based models currently
               produce state-of-the-art results, they depend on substantial annotated
               datasets, which are frequently expensive and insufficient. This
               paper examines the performance of fine-tuning of several PLMs
               and LLMs for different dataset sizes and compares the results
               to those of prompt-based approaches. General-purpose and domain-specific
               models were fine-tuned on datasets ranging from 800 to 26,674
               student responses. Different prompt engineering strategies were
               tested including rubric-based prompts. Our results demonstrate
               that fine-tuned LLMs and rubric-based prompting can match or exceed
               the performance of BERT-based models. Rubric-based prompts with
               open-source model deliver comparable results without the need
               for annotation data or hardware-intensive training, while also
               mitigating data protection concerns. This work provides empirical
               evidence of the role of LLMs in ASAG and paves the way for future
               research into resource-efficient, interpretable and reasoning-driven
               grading.}
}

Alexander Mehler, Walter Bisang, Maxim Konca, Patryik Czerwinski, Jeremias Josef Graf and Jana Fritsch. 2026. Linguistic Features of Student Responses as Indicators of Performance in Critical Online Reasoning Tasks. Zeitschrift für Erziehungswissenschaft.

BibTeX

@article{Mehler:et:al:2026:a,
  title     = {Linguistic Features of Student Responses as Indicators of Performance
               in Critical Online Reasoning Tasks},
  author    = {Alexander Mehler and Walter Bisang and Maxim Konca and Patryik Czerwinski
               and Jeremias Josef Graf and Jana Fritsch},
  journal   = {Zeitschrift für Erziehungswissenschaft},
  issn      = {1862-5215},
  url       = {http://dx.doi.org/10.1007/s11618-026-01388-6},
  doi       = {10.1007/s11618-026-01388-6},
  year      = {2026},
  publisher = {Springer Science and Business Media LLC},
  keywords  = {core,core_b05}
}

Andy Lücking and Alexander Mehler. 2026–01–28/2026–01–30. Sprachbegleitende Gesten, KI und Virtuelle Realität. Invited talk.

BibTeX

@misc{Luecking:Mehler:2026,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {{Sprachbegleitende Gesten, KI und Virtuelle Realität}},
  subtitle  = {{Multimodale Kommunikationsforschung im Schnittfeld von Linguistik und Computerwissenschaft}},
  howpublished = {Invited talk at DaFWEBKON26, Webkonferenz für
                  Deutschlehrende},
  date      = {2026-01-28/2026-01-30},
  url       = {https://dafwebkon.com/events/sprachbegleitende-gesten/},
  keywords  = {talk, cosgrin-vr},
  note      = {Invited talk},
  abstract  = {Alltagskommunikation ist üblicherweise multimodal (d.h., nutzt
               mehr als einen Informationskanal). Gesprochene Sprache wird beispielsweise
               von manuellen Gesten begleitet. Diese Gesten wiederum können über
               die linguistische Bedeutung hinausgehende Information beitragen.
               Sie sind also semantisch interessant.<br><br>Der Vortrag skizziert
               eine räumliche Gestensemantik und führt in KI-gestützte Gestenklassifikation
               ein. Um multimodale Verhaltensdaten zu erfassen und auszuwerten,
               werden zunehmend Methoden der Virtuellen Realität (VR) eingesetzt.
               Das Frankfurter Va.Si.Li-Lab kombiniert KI und VR für Multimodalitätsforschung.
               Auf diese Weise lassen sich z.B. mutlimodal, avatarbasierte VR-Interaktionen
               untersuchen und mit Face-to-face-Interaktionen vergleichen. Der
               Vortrag stellt erste Ergebnisse vor.}
}

Andy Lücking, Leon Hammerla and Alexander Mehler. 2026. Not every quantifier can be negated. Proceedings of Sinn und Bedeutung, Special Session “Philosophical and Linguistic Approaches to Negation (PhilLingNeg)”. accepted.

BibTeX

@inproceedings{Luecking:Hammerla:Mehler:2026,
  author    = {Lücking, Andy and Hammerla, Leon and Mehler, Alexander},
  title     = {Not every quantifier can be negated},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung}, Special Session ``Philosophical
               and Linguistic Approaches to Negation (PhilLingNeg)''},
  series    = {SuB'30},
  location  = {Frankfurt am Main},
  year      = {2026},
  pubstate  = {forthcoming},
  keywords  = {neglab},
  note      = {accepted}
}

2025

Nasimeh Bahmanian, Mercedes Martinez Bruera, Andy Lücking, Leon Hammerla, Giuseppe Abrami, Manfred Sailer, Alexander Mehler and Sol Lago. 2025. Data management protocol for CRC 1629.

BibTeX

@techreport{Bahmanian:et:al:2025,
  author    = {Bahmanian, Nasimeh and Martinez Bruera, Mercedes and Lücking, Andy
               and Hammerla, Leon and Abrami, Giuseppe and Sailer, Manfred and Mehler, Alexander
               and Lago, Sol},
  title     = {Data management protocol for CRC 1629},
  institution = {CRC 1629 NegLaB - INF},
  year      = {2025},
  number    = {1},
  keywords  = {neglab},
  url       = {https://next.hessenbox.de/index.php/s/zQYBAfeXTJSDaib}
}

Mehdi Rahim and Ali Abusaleh. 2025. GENERATIVE AI ON CGM: TOWARDS A FOUNDATION MODEL FOR GLUCOSE PREDICTION, ROOT CAUSE ANALYSIS AND ANOMALY DETECTION. DIABETES TECHNOLOGY & THERAPEUTICS, 27:E144–E144.

BibTeX

@inproceedings{rahim2025generative,
  title     = {GENERATIVE AI ON CGM: TOWARDS A FOUNDATION MODEL FOR GLUCOSE PREDICTION,
               ROOT CAUSE ANALYSIS AND ANOMALY DETECTION},
  author    = {Rahim, Mehdi and Abusaleh, Ali},
  booktitle = {DIABETES TECHNOLOGY \& THERAPEUTICS},
  volume    = {27},
  pages     = {E144--E144},
  year      = {2025},
  organization = {MARY ANN LIEBERT, INC 140 HUGUENOT STREET, 3RD FL, NEW ROCHELLE, NY 10801 USA}
}

Andy Lücking. 2025–12–01/2025–12–02. Formal and Computational Iconic Gesture Semantics. Invited talk.

BibTeX

@misc{Luecking:2025-zif,
  author    = {Lücking, Andy},
  keywords  = {cosgrin-vr},
  title     = {Formal and Computational Iconic Gesture Semantics},
  howpublished = {Invited talk at the ZiF Workshop \textit{Multimodal
                  Creativity}, Zentrum für interdisziplinäre
                  Forschung, Universität Bielefeld},
  note      = {Invited talk},
  date      = {2025-12-01/2025-12-02}
}

Andy Lücking and Alexander Henlein. 2025–07–28/2025–08–08. Spatial Gesture Semantics. ESSLLI 2025 Advanced Course, Ruhr University Bochum.

BibTeX

@misc{Luecking:Henlein:2025-esslli,
  author    = {Lücking, Andy and Henlein, Alexander},
  year      = {2025},
  date      = {2025-07-28/2025-08-08},
  title     = {Spatial Gesture Semantics},
  howpublished = {ESSLLI 2025 Advanced Course, Ruhr University Bochum},
  note      = {ESSLLI 2025 Advanced Course, Ruhr University Bochum},
  url       = {https://aluecking.github.io/ESSLLI2025/},
  keywords  = {gemdis}
}

Andy Lücking. 2025–09–24. From Gesture Representation to Spatial Gesture Semantics. Invited talk.

BibTeX

@misc{Luecking:2025-mmsr,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {From Gesture Representation to Spatial Gesture Semantics},
  howpublished = {Invited talk at the IWCS Workshop \textit{Beyond
                  Language: Multimodal Semantic Representations} (MMSR
                  II), Heinrich Heine University, Düsseldorf},
  date      = {2025-09-24},
  note      = {Invited talk},
  url       = {https://mmsr-workshop.github.io/}
}

Leon Hammerla, Andy Lücking, Carolin Reinert and Alexander Mehler. December, 2025. D-Neg: Syntax-Aware Graph Reasoning for Negation Detection. Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, 1432–1454.

BibTeX

@inproceedings{Hammerla:et:al:2025b,
  author    = {Hammerla, Leon and Lücking, Andy and Reinert, Carolin and Mehler, Alexander},
  title     = {{D}-Neg: Syntax-Aware Graph Reasoning for Negation Detection},
  editor    = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
               and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
               and Singh, Dhirendra Pratap},
  booktitle = {Proceedings of the 14th International Joint Conference on Natural
               Language Processing and the 4th Conference of the Asia-Pacific
               Chapter of the Association for Computational Linguistics},
  month     = {dec},
  year      = {2025},
  address   = {Mumbai, India},
  publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-ijcnlp.89/},
  pages     = {1432--1454},
  isbn      = {979-8-89176-303-6},
  abstract  = {Despite the communicative importance of negation, its detection
               remains challenging. Previous approaches perform poorly in out-of-domain
               scenarios, and progress outside of English has been slow due to
               a lack of resources and robust models. To address this gap, we
               present D-Neg: a syntax-aware graph reasoning model based on a
               transformer that incorporates syntactic embeddings by attention-gating.
               D-Neg uses graph attention to represent syntactic structures,
               emulating the effectiveness of rule-based dependency approaches
               for negation detection. We train D-Neg using 7 English resources
               and their translations into 10 languages, all aligned at the annotation
               level. We conduct an evaluation of all these datasets in in-domain
               and out-of-domain settings. Our work represents a significant
               advance in negation detection, enabling more effective cross-lingual
               research.},
  keywords  = {neglab}
}

Leon Hammerla, Alexander Mehler and Giuseppe Abrami. December, 2025. Standardizing Heterogeneous Corpora with DUUR: A Dual Data- and Process-Oriented Approach to Enhancing NLP Pipeline Integration. Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, 1410–1425.

BibTeX

@inproceedings{Hammerla:et:al:2025a,
  author    = {Hammerla, Leon and Mehler, Alexander and Abrami, Giuseppe},
  title     = {Standardizing Heterogeneous Corpora with {DUUR}: A Dual Data-
               and Process-Oriented Approach to Enhancing NLP Pipeline Integration},
  editor    = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
               and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
               and Singh, Dhirendra Pratap},
  booktitle = {Proceedings of the 14th International Joint Conference on Natural
               Language Processing and the 4th Conference of the Asia-Pacific
               Chapter of the Association for Computational Linguistics},
  month     = {dec},
  year      = {2025},
  address   = {Mumbai, India},
  publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-ijcnlp.87/},
  pages     = {1410--1425},
  isbn      = {979-8-89176-303-6},
  abstract  = {Despite their success, LLMs are too computationally expensive
               to replace task- or domain-specific NLP systems. However, the
               variety of corpus formats makes reusing these systems difficult.
               This underscores the importance of maintaining an interoperable
               NLP landscape. We address this challenge by pursuing two objectives:
               standardizing corpus formats and enabling massively parallel corpus
               processing. We present a unified conversion framework embedded
               in a massively parallel, microservice-based, programming language-independent
               NLP architecture designed for modularity and extensibility. It
               allows for the integration of external NLP conversion tools and
               supports the addition of new components that meet basic compatibility
               requirements. To evaluate our dual data- and process-oriented
               approach to standardization, we (1) benchmark its efficiency in
               terms of processing speed and memory usage, (2) demonstrate the
               benefits of standardized corpus formats for NLP downstream tasks,
               and (3) illustrate the advantages of incorporating custom formats
               into a corpus format ecosystem.},
  keywords  = {neglab,duui}
}

Sonja Hahn, Leon Hammerla, Corinna Hankeln, Sebastian Groß, Christina Röpers and Ulf Kröhne. 2025. Constructed Responses beyond NLP – Auswertungsansätze für graphische Antworten. Inproceedings of 12. Jahrestagung der Gesellschaft für empirische Bildungsforschung (GEBF 2025).

BibTeX

@inproceedings{Hahn:et:al:2025,
  author    = {Sonja Hahn and Leon Hammerla and Corinna Hankeln and Sebastian Groß
               and Christina Röpers and Ulf Kröhne},
  title     = {Constructed Responses beyond NLP – Auswertungsansätze für graphische Antworten},
  booktitle = {Inproceedings of 12. Jahrestagung der Gesellschaft für empirische
               Bildungsforschung (GEBF 2025)},
  location  = {Mannheim, Deutschland},
  year      = {2025}
}

Staffan Larsson, Jonathan Ginzburg, Robin Cooper and Andy Lücking. 2025. Finding Answers to Questions: Bridging between Type-based and Computational Neuroscience Approaches. Proceedings of the 16th International Conference on Computational Semantics, 128–136.

BibTeX

@inproceedings{Larsson:et:al:2025-spa-qna,
  title     = {Finding Answers to Questions: {Bridging} between Type-based and
               Computational Neuroscience Approaches},
  author    = {Larsson, Staffan and Ginzburg, Jonathan and Cooper, Robin and Lücking, Andy},
  booktitle = {Proceedings of the 16th International Conference on Computational Semantics},
  editor    = {Evang, Kilian and Kallmeyer, Laura and Pogodalla, Sylvain},
  location  = {Düsseldorf, Germany},
  publisher = {Association for Computational Linguistics},
  url       = {https://preview.aclanthology.org/iwcs-25-ingestion/2025.iwcs-1.12/},
  pages     = {128--136},
  series    = {IWCS},
  year      = {2025}
}

Roman Christof, Farnaz Zeidi, Manuela Messelhäußer, Dirk Mentzer, Renate Koenig, Liam Childs and Alexander Mehler. November, 2025. MedLinkDE – MedDRA Entity Linking for German with Guided Chain of Thought Reasoning. Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, 31569–31581.

BibTeX

@inproceedings{Christof:et:al:2025,
  author    = {Christof, Roman and Zeidi, Farnaz and Messelhäußer, Manuela and Mentzer, Dirk
               and Koenig, Renate and Childs, Liam and Mehler, Alexander},
  title     = {{M}ed{L}ink{DE} {--} {M}ed{DRA} Entity Linking for {G}erman with
               Guided Chain of Thought Reasoning},
  editor    = {Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn
               and Peng, Violet},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural
               Language Processing},
  month     = {nov},
  year      = {2025},
  address   = {Suzhou, China},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.emnlp-main.1609/},
  doi       = {10.18653/v1/2025.emnlp-main.1609},
  pages     = {31569--31581},
  isbn      = {979-8-89176-332-6},
  pdf       = {https://aclanthology.org/2025.emnlp-main.1609.pdf},
  abstract  = {In pharmacovigilance, effective automation of medical data structuring,
               especially linking entities to standardized terminologies such
               as MedDRA, is critical. This challenge is rarely addressed for
               German data. With MedLinkDE we address German MedDRA entity linking
               for adverse drug reactions in a two-step approach: (1) retrieval
               of medical terms with fine-tuned embedding models, followed (2)
               by guided chain-of-thought re-ranking using LLMs. To this end,
               we introduce RENOde, a German real-world MedDRA dataset consisting
               of reportings from patients and healthcare professionals. To overcome
               the challenges posed by the linguistic diversity of these reports,
               we generate synthetic data mapping the two reporting styles of
               patients and healthcare professionals. Our embedding models, fine-tuned
               on these synthetic, quasi-personalized datasets, show competitive
               performance with real datasets in terms of accuracy at high top-
               recall, providing a robust basis for re-ranking. Our subsequent
               guided Chain of Thought (CoT) re-ranking, informed by MedDRA coding
               guidelines, improves entity linking accuracy by approximately
               15{\%} (Acc@1) compared to embedding-only strategies. In this
               way, our approach demonstrates the feasibility of entity linking
               in medical reports under the constraints of data scarcity by relying
               on synthetic data reflecting different informant roles of reporting
               persons.}
}

Andy Lücking, Felix Voll, Daniel Rott, Alexander Henlein and Alexander Mehler. 2025. Head and Hand Movements During Turn Transitions: Data-Based Multimodal Analysis Using the Frankfurt VR Gesture–Speech Alignment Corpus (FraGA). Proceedings of the 29th Workshop on The Semantics and Pragmatics of Dialogue – Full Papers, 146–156.

BibTeX

@inproceedings{Luecking:Voll:Rott:Henlein:Mehler:2025-fraga,
  title     = {Head and Hand Movements During Turn Transitions: Data-Based Multimodal
               Analysis Using the {Frankfurt VR Gesture--Speech Alignment Corpus}
               ({FraGA})},
  author    = {Lücking, Andy and Voll, Felix and Rott, Daniel and Henlein, Alexander
               and Mehler, Alexander},
  year      = {2025},
  booktitle = {Proceedings of the 29th Workshop on The Semantics and Pragmatics
               of Dialogue -- Full Papers},
  series    = {SemDial'25 -- Bialogue},
  publisher = {SEMDIAL},
  url       = {http://semdial.org/anthology/Z25-Luecking_semdial_3316.pdf},
  pages     = {146--156},
  keywords  = {gemdis}
}

Andy Lücking. 2025. Referential Transparency Theory. Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online.

BibTeX

@incollection{Luecking:2025-wsk-rtt,
  booktitle = {Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online},
  url       = {https://www.degruyterbrill.com/database/WSK/entry/wsk__38780752/html},
  editor    = {Schierholz, Stefan J. and Giacomini, Laura},
  doi       = {10.1515/wsk},
  title     = {Referential Transparency Theory},
  author    = {Lücking, Andy},
  keywords  = {own,bookchapter},
  year      = {2025},
  publisher = {De Gruyter},
  address   = {Berlin and Boston}
  keywords  = {gemdis,neglab}
}

Giuseppe Abrami, Daniel Bundan, Chrisowaladis Manolis and Alexander Mehler. 2025. VR-ParlExplorer: A Hypertext System for the Collaborative Interaction in Parliamentary Debate Spaces. Proceedings of the 36th ACM Conference on Hypertext and Social Media, 177–183.

BibTeX

@inproceedings{Abrami:et:al:2025:c,
  author    = {Abrami, Giuseppe and Bundan, Daniel and Manolis, Chrisowaladis
               and Mehler, Alexander},
  title     = {VR-ParlExplorer: A Hypertext System for the Collaborative Interaction
               in Parliamentary Debate Spaces},
  year      = {2025},
  isbn      = {9798400715341},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3720553.3746672},
  doi       = {10.1145/3720553.3746672},
  abstract  = {The enhanced visualization and interaction with information in
               collaborative VR environments enabled by chatbots is currently
               rather limited. To fill this gap and create a concrete application
               that combines spatial and virtual concepts of hypertext systems
               based on the use of LLMs, we present VR-ParlExplorer as a system
               for virtualizing plenary debates that allows users to interact
               with virtual members of parliament through chatbots. VR-ParlExplorer
               is implemented as a Plugin for Va.Si.Li-Lab to enable immersion
               in the dynamics of communication in parliamentary debates. The
               paper describes the functionality of VR-ParlExplorer and discusses
               specifics of the use case it addresses.},
  booktitle = {Proceedings of the 36th ACM Conference on Hypertext and Social Media},
  pages     = {177--183},
  numpages  = {7},
  location  = {Chicago, USA},
  series    = {HT '25},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3720553.3746672}
}

Daniel Bundan, Giuseppe Abrami and Alexander Mehler. 2025. Multimodal Docker Unified UIMA Interface: New Horizons for Distributed Microservice-Oriented Processing of Corpora using UIMA. Proceedings of the 21st Conference on Natural Language Processing (KONVENS 2025): Long and Short Papers, 257–268.

BibTeX

@inproceedings{Bundan:Abrami:Mehler:2025,
  author    = {Bundan, Daniel and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Multimodal Docker Unified {UIMA} Interface: New Horizons for Distributed
               Microservice-Oriented Processing of Corpora using {UIMA}},
  booktitle = {Proceedings of the 21st Conference on Natural Language Processing
               (KONVENS 2025): Long and Short Papers},
  year      = {2025},
  editor    = {Wartena, Christian and Heid, Ulrich},
  location  = {Hildesheim, Germany},
  address   = {Hannover, Germany},
  publisher = {HsH Applied Academics},
  pages     = {257--268},
  series    = {KONVENS '25},
  url       = {https://aclanthology.org/2025.konvens-1.22/},
  pdf       = {https://aclanthology.org/2025.konvens-1.22.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2025/09/Poster_Multimodal_DUUI_KONVENS_2025.pdf},
  keywords  = {duui,neglab,new-data-spaces,circlet}
}

Subba Reddy Oota, Khushbu Pahwa, Marreddy Mounika, Maneesh Singh, Manish Gupta and Bapi S. Raju. 2025. Multi-modal brain encoding models for multi-modal stimuli. Proceedings of the International Conference on Learning Representations (ICLR).

BibTeX

@inproceedings{marreddy2025iclr,
  title     = {Multi-modal brain encoding models for multi-modal stimuli},
  author    = {Oota, Subba Reddy and Pahwa, Khushbu and Mounika, Marreddy and Singh, Maneesh
               and Gupta, Manish and Raju, Bapi S.},
  booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)},
  year      = {2025}
}

Mounika Marreddy, Subba Reddy Oota and Manish Gupta. 2025. Large language models are human-like annotators. European Conference on Information Retrieval, 291–299.

BibTeX

@inproceedings{marreddy:et:al:2025-ecir,
  title     = {Large language models are human-like annotators},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Gupta, Manish},
  booktitle = {European Conference on Information Retrieval},
  pages     = {291--299},
  year      = {2025},
  organization = {Springer}
}

Mounika Marreddy, Subba Reddy Oota, Venkata Charan Chinni, Manish Gupta and Lucie Flek. 2025. USDC: A Dataset of User Stance and Dogmatism in Long Conversations. Findings of ACL.

BibTeX

@article{marreddy:et:al:2025,
  title     = {USDC: A Dataset of User Stance and Dogmatism in Long Conversations},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Chinni, Venkata Charan
               and Gupta, Manish and Flek, Lucie},
  journal   = {Findings of ACL},
  year      = {2025}
}

Omar Momen, Manuel Schaaf and Alexander Mehler. July, 2025. Filling the Temporal Void: Recovering Missing Publication Years in the Project Gutenberg Corpus Using LLMs. Findings of the Association for Computational Linguistics: ACL 2025, 17318–17334.

BibTeX

@inproceedings{Momen:Schaaf:Mehler:2025,
  title     = {Filling the Temporal Void: Recovering Missing Publication Years
               in the Project Gutenberg Corpus Using {LLM}s},
  author    = {Momen, Omar and Schaaf, Manuel and Mehler, Alexander},
  editor    = {Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2025},
  month     = {jul},
  year      = {2025},
  address   = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-acl.890/},
  pages     = {17318--17334},
  isbn      = {979-8-89176-256-5},
  abstract  = {Analysing texts spanning long periods of time is critical for
               researchers in historical linguistics and related disciplines.
               However, publicly available corpora suitable for such analyses
               are scarce. The Project Gutenberg (PG) corpus presents a significant
               yet underutilized opportunity in this context, due to the absence
               of accurate temporal metadata. We take advantage of language models
               and information retrieval to explore four sources of information
               {--} Open Web, Wikipedia, Open Library API, and PG books texts
               {--} to add missing temporal metadata to the PG corpus. Through
               20 experiments employing state-of-the-art Large Language Models
               (LLMs) and Retrieval-Augmented Generation (RAG) methods, we estimate
               the production years of all PG books. We curate an enriched metadata
               repository for the PG corpus and propose a refined version for
               it, which includes 53,774 books with a total of 3.8 billion tokens
               in 11 languages, produced between 1600 and 2000. This work provides
               a new resource for computational linguistics and humanities studies
               focusing on diachronic analyses. The final dataset and all experiments
               data are publicly available (https://github.com/OmarMomen14/pg-dates).},
  pdf       = {https://aclanthology.org/2025.findings-acl.890.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2025. Postmodern Quantification with Stuff. Proceedings of Sinn und Bedeutung, 29:917–934.

BibTeX

@inproceedings{Luecking:Ginzburg:2025-mass-nouns,
  title     = {Postmodern Quantification with Stuff},
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung}},
  volume    = {29},
  series    = {SuB'29},
  editor    = {Longo, Federica and Panizza, Daniele},
  year      = {2025},
  doi       = {10.18148/sub/2024.v29.1254},
  url       = {https://doi.org/10.18148/sub/2024.v29.1254},
  pdf       = {https://ojs.ub.uni-konstanz.de/sub/index.php/sub/article/view/1254/1207},
  pages     = {917-934},
  location  = {Noto (Syracuse), Italy}
}

Mevlüt Bagci, Alexander Mehler, Giuseppe Abrami, Patrick Schrottenbacher, Christian Spiekermann, Maxim Konca, Jakob Schreiber, Kevin Saukel, Marc Quintino and Juliane Engel. April, 2025. Simulation-Based Learning in Virtual Reality: Three Use Cases from Social Science and Technological Foundations in Terms of Va.Si.Li-Lab. Technology, Knowledge and Learning.

BibTeX

@article{Bagci:et:al:2025,
  author    = {Bagci, Mevl{\"u}t and Mehler, Alexander and Abrami, Giuseppe and Schrottenbacher, Patrick
               and Spiekermann, Christian and Konca, Maxim and Schreiber, Jakob and Saukel, Kevin
               and Quintino, Marc and Engel, Juliane},
  title     = {Simulation-Based Learning in Virtual Reality: Three Use Cases
               from Social Science and Technological Foundations in Terms of
               Va.Si.Li-Lab},
  journal   = {Technology, Knowledge and Learning},
  publisher = {Springer Nature},
  year      = {2025},
  month     = {April},
  day       = {01},
  abstract  = {This article examines the predictability of communication scenarios
               within the context of simulation-based learning in virtual reality
               (VR). The aim is to investigate multimodal patterns of social
               interaction that accompany human communication in conflict situations.
               Understanding these patterns can ultimately enhance educational
               technologies' ability to address problematic learning situations
               and support learners in benefiting from VR-based learning. To
               achieve this, the system must accurately predict the task context.
               A central goal of this article is to shed light on this potential.
               Additionally, our research extends to visual communication beyond
               purely linguistic interactions, aiming to enhance VR immersion
               in communicative practices. To this end, the article examines
               the associations between multimodal information units generated
               by individuals interacting in three distinct learning scenarios:
               work organization, school pedagogy, and social life. Several experiments
               demonstrate that predictability exists when multimodal communication
               is analyzed at the level of eight coarse-grained modalities, including
               speech, head and body movements, and gestures. The interactions
               are observed in VR using Va.Si.Li-Lab, a simulation-based system
               that virtualizes learning scenarios, enabling participants to
               collaboratively manage potentially conflicting tasks through multimodal
               communication (Mehler et al. in: Duffy (ed) Digital human modeling
               and applications in health, safety, ergonomics and risk management,
               Springer Nature Switzerland, Cham, 2023). The article discusses
               the technology underlying Va.Si.Li-Lab, its database, and the
               post-processing of interaction data, including speech data. It
               provides theoretical motivation for the application scenarios
               and presents experimental data to illustrate the system's usefulness.
               Based on these data, the article details experiments on the multimodal
               detection of social scenarios, positioning Va.Si.Li-Lab as a use
               case in simulation-based learning.},
  issn      = {2211-1670},
  doi       = {10.1007/s10758-025-09837-7},
  url       = {https://doi.org/10.1007/s10758-025-09837-7}
}

Kevin Bönisch, Giuseppe Abrami and Alexander Mehler. 2025. Towards Unified, Dynamic and Annotation-based Visualisations and Exploration of Annotated Big Data Corpora with the Help of Unified Corpus Explorer. Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (System Demonstrations), 522–534. Best Demo Award.

BibTeX

@inproceedings{Boenisch:et:al:2025,
  title     = {Towards Unified, Dynamic and Annotation-based Visualisations and
               Exploration of Annotated Big Data Corpora with the Help of Unified
               Corpus Explorer},
  author    = {B{\"o}nisch, Kevin and Abrami, Giuseppe and Mehler, Alexander},
  editor    = {Dziri, Nouha and Ren, Sean (Xiang) and Diao, Shizhe},
  booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
               Chapter of the Association for Computational Linguistics: Human
               Language Technologies (System Demonstrations)},
  year      = {2025},
  address   = {Albuquerque, New Mexico},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.naacl-demo.42/},
  pages     = {522--534},
  isbn      = {979-8-89176-191-9},
  abstract  = {The annotation and exploration of large text corpora, both automatic
               and manual, presents significant challenges across multiple disciplines,
               including linguistics, digital humanities, biology, and legal
               science. These challenges are exacerbated by the heterogeneity
               of processing methods, which complicates corpus visualization,
               interaction, and integration. To address these issues, we introduce
               the Unified Corpus Explorer (UCE), a standardized, dockerized,
               open-source and dynamic Natural Language Processing (NLP) application
               designed for flexible and scalable corpus navigation. Herein,
               UCE utilizes the UIMA format for NLP annotations as a standardized
               input, constructing interfaces and features around those annotations
               while dynamically adapting to the corpora and their extracted
               annotations. We evaluate UCE based on a user study and demonstrate
               its versatility as a corpus explorer based on generative AI.},
  note      = {Best Demo Award},
  pdf       = {https://aclanthology.org/2025.naacl-demo.42.pdf},
  keywords  = {uce,new-data-spaces,circlet,core,core_c08}
}

Andy Lücking. 2025. Deixis. Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online.

BibTeX

@incollection{Luecking:2025-wsk-deixis,
  booktitle = {Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online},
  url       = {https://www.degruyterbrill.com/database/WSK/entry/wsk_idf6497d06-df52-478d-a30f-a3a99151d81e/html},
  editor    = {Schierholz, Stefan J. and Giacomini, Laura},
  year      = {2025},
  publisher = {De Gruyter},
  address   = {Berlin, Boston},
  doi       = {10.1515/wsk},
  title     = {Deixis},
  author    = {Lücking, Andy},
  keywords  = {gemdis}
}

Andy Lücking and Jonathan Ginzburg. 2025. Exceptions From Rules and Noteworthy Exceptions. Linguistics and Philosophy, 48:371–409.

BibTeX

@article{Luecking:Ginzburg:2025-exceptions,
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  title     = {Exceptions From Rules and Noteworthy Exceptions},
  subtitle  = {The Balance Scale for Making Exceptions},
  journal   = {Linguistics and Philosophy},
  year      = {2025},
  volume    = {48},
  pages     = {371-409},
  url       = {https://doi.org/10.1007/s10988-024-09429-1},
  doi       = {10.1007/s10988-024-09429-1},
  keywords  = {gemdis,neglab}
}

Giuseppe Abrami, Markos Genios, Filip Fitzermann, Daniel Baumartz and Alexander Mehler. 2025. Docker Unified UIMA Interface: New perspectives for NLP on big data. SoftwareX, 29:102033.

BibTeX

@article{Abrami:et:al:2025:a,
  title     = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
  journal   = {SoftwareX},
  volume    = {29},
  pages     = {102033},
  year      = {2025},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2024.102033},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
  author    = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
               and Alexander Mehler},
  keywords  = {Docker, Kubernetes, UIMA, Distributed NLP, duui, biofid, neglab, new-data-spaces, circlet, core, core_c08},
  abstract  = {Processing large amounts of natural language text using machine
               learning-based models is becoming important in many disciplines.
               This demand is being met by a variety of approaches, resulting
               in the heterogeneous deployment of separate, partly incompatible,
               not natively scalable applications. To overcome the technological
               bottleneck involved, we have developed Docker Unified UIMA Interface,
               a system for the standardized, parallel, platform-independent,
               distributed and microservices-based solution for processing large
               and extensive text corpora with any NLP method. We present DUUI
               as a framework that enables automated orchestration of GPU-based
               NLP processes beyond the existing Docker Swarm cluster variant,
               and in addition to the adaptation to new runtime environments
               such as Kubernetes. Therefore, a new driver for DUUI is introduced,
               which enables the lightweight orchestration of DUUI processes
               within a Kubernetes environment in a scalable setup. In this way,
               the paper opens up novel text-technological perspectives for existing
               practices in disciplines that deal with the scientific analysis
               of large amounts of data based on NLP.}
}

Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. 2025. DUUI: A Toolbox for the Construction of a new Kind of Natural Language Processing. Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften und Data Humanities, 446–448.

BibTeX

@inproceedings{Abrami:et:al:2025:b,
  author    = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
  title     = {DUUI: A Toolbox for the Construction of a new Kind of Natural
               Language Processing},
  year      = {2025},
  booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
               und Data Humanities},
  numpages  = {3},
  location  = {Bielefeld, Germany},
  series    = {DHd 2025},
  publisher = {Zenodo},
  keywords  = {duui,core,core_c08},
  pages     = {446--448},
  doi       = {10.5281/zenodo.14943128},
  url       = {https://doi.org/10.5281/zenodo.14943128},
  poster    = {https://zenodo.org/records/14944575}
}

Patrick Schrottenbacher, Alexander Mehler, Theresa Berg, Jasper Hustedt, Julian Gagel, Timo Lüttig and Giuseppe Abrami. 2025. Geo-spatial hypertext in virtual reality: mapping and navigating global news event spaces. New Review of Hypermedia and Multimedia, 31(1-2):76–105.

BibTeX

@article{Schrottenbacher:et:al:2025,
  author    = {Schrottenbacher, Patrick and Mehler, Alexander and Berg, Theresa
               and Hustedt, Jasper and Gagel, Julian and Lüttig, Timo and Abrami, Giuseppe},
  title     = {Geo-spatial hypertext in virtual reality: mapping and navigating
               global news event spaces},
  journal   = {New Review of Hypermedia and Multimedia},
  volume    = {31},
  number    = {1-2},
  pages     = {76--105},
  year      = {2025},
  publisher = {Taylor \& Francis},
  doi       = {10.1080/13614568.2024.2383601},
  url       = {https://doi.org/10.1080/13614568.2024.2383601},
  eprint    = {https://doi.org/10.1080/13614568.2024.2383601},
  abstract  = {Every day, a myriad of events take place that are documented and
               shared online through news articles from a variety of sources.
               As a result, as users navigate the Web, the volume of data can
               lead to information overload, making it difficult to find specific
               details about an event. We present News in Time and Space (NiTS)
               to address this issue: NiTS is a fully immersive system integrated
               into Va.Si.Li-Lab that organises textual information in a geospatial
               hypertext system in virtual reality. With NiTS, users can visualise,
               filter and interact with information currently based on GDELT
               on a virtual globe providing document networks to analyse global
               events and trends. The article describes NiTS, its event semantics
               and architecture. It evaluates NiTS in comparison to a classic
               search engine website, extended by NiTSs information filtering
               capabilities to make it comparable. Our comparison with this website
               technology, which is directly linked to the user's usage habits,
               shows that NiTS enables comparable information exploration even
               if the users have little or no experience with VR. That is, we
               observe an equivalent search result behaviour, but with the advantage
               that VR allows users to get their results with a higher level
               of usability without distracting them from their tasks. Through
               its integration with Va.Si.Li-Lab, a simulation-based learning
               environment, NiTS can be used in simulations of learning processes
               aimed at studying critical online reasoning, where Va.Si.Li-Lab
               guarantees that this can be done in relation to individual or
               groups of learners.}
}

Kevin Bönisch, Alexander Mehler, Shaduan Babbili, Yannick Heinrich, Philipp Stephan and Giuseppe Abrami. 2025. Viki LibraRy: Collaborative Hypertext Browsing and Navigation in Virtual Reality. New Review of Hypermedia and Multimedia, 31(1-2):45–75.

BibTeX

@article{Boenisch:et:al:2025:b,
  author    = {B\"{o}nisch, Kevin and Mehler, Alexander and Babbili, Shaduan
               and Heinrich, Yannick and Stephan, Philipp and Abrami, Giuseppe},
  abstract  = {We present Viki LibraRy, a dynamically built library in virtual
               reality (VR) designed to visualize hypertext systems, with an
               emphasis on collaborative interaction and spatial immersion. Viki
               LibraRy goes beyond traditional methods of text distribution by
               providing a platform where users can share, process, and engage
               with textual information. It operates at the interface of VR,
               collaborative learning and spatial data processing to make reading
               tangible and memorable in a spatially mediated way. The article
               describes the building blocks of Viki LibraRy, its underlying
               architecture, and several use cases. It evaluates Viki LibraRy
               in comparison to a conventional web interface for text retrieval
               and reading. The article shows that Viki LibraRy provides users
               with spatial references for structuring their recall, so that
               they can better remember consulted texts and their meta-information
               (e.g. in terms of subject areas and content categories)},
  title     = {{Viki LibraRy: Collaborative Hypertext Browsing and Navigation
               in Virtual Reality}},
  journal   = {New Review of Hypermedia and Multimedia},
  volume    = {31},
  number    = {1-2},
  pages     = {45--75},
  year      = {2025},
  publisher = {Taylor \& Francis},
  doi       = {10.1080/13614568.2024.2383581},
  url       = {https://doi.org/10.1080/13614568.2024.2383581},
  eprint    = {https://doi.org/10.1080/13614568.2024.2383581}
}

2024

Andy Lücking. 2024–03–14. Gesture semantics: Deictic Reference, deferred reference, and iconic co-speech gestures. Invited talk.

BibTeX

@misc{Luecking:2024-quebec,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Gesture semantics: Deictic Reference, deferred reference, and
               iconic co-speech gestures},
  howpublished = {Invited talk at Stevan Harnad's interdisciplinary
                  seminar series in Cognitive Informatics at the
                  Université du Québec à Montréal},
  note      = {Invited talk},
  date      = {2024-03-14}
}

Ulf Kröhne, Leon Hammerla, Corinna Hankeln, Marc Müller and Sonja Hahn. 2024. How much training data are required? Automatic scoring using prompting compared to text classification tasks as fine-tuning large-language models. Inproceedings of 53. Kongress der Deutschen Gesellschaft für Psychologie / 15. ÖGP Conference.

BibTeX

@inproceedings{Kroehne:et:al:2024,
  author    = {Ulf Kröhne and Leon Hammerla and Corinna Hankeln and Marc Müller and Sonja Hahn},
  title     = {How much training data are required? Automatic scoring using prompting
               compared to text classification tasks as fine-tuning large-language
               models},
  booktitle = {Inproceedings of 53. Kongress der Deutschen Gesellschaft für Psychologie
               / 15. ÖGP Conference},
  location  = {Wien, Österreich},
  year      = {2024}
}

Ali Abusaleh and Mehdi Rahim. 2024. A Multitask VAE for Time Series Preprocessing and Prediction of Blood Glucose Level.

BibTeX

@misc{Abusaleh:Rahim:2024,
  title     = {A Multitask VAE for Time Series Preprocessing and Prediction of
               Blood Glucose Level},
  author    = {Ali Abusaleh and Mehdi Rahim},
  year      = {2024},
  eprint    = {2410.00015},
  archiveprefix = {arXiv},
  primaryclass = {eess.SP},
  url       = {https://arxiv.org/abs/2410.00015}
}

Babajide Owoyele, Bhuvanesh Verma, Victor Omolaoye, Jonathan Antonio Edelman, Derk Loorbach and Gerard de Melo. 2024. Socio-Semantic X-Ray of Multi-Actor Constellations using Topics and Interstitial Authors: A Toolkit for Augmenting Computational Literature Reviews. Available at SSRN 4713155.

BibTeX

@article{Owoyele:et:al:2020,
  title     = {Socio-Semantic X-Ray of Multi-Actor Constellations using Topics
               and Interstitial Authors: A Toolkit for Augmenting Computational
               Literature Reviews},
  author    = {Owoyele, Babajide and Verma, Bhuvanesh and Omolaoye, Victor and Edelman, Jonathan Antonio
               and Loorbach, Derk and de Melo, Gerard},
  journal   = {Available at SSRN 4713155},
  doi       = {10.2139/ssrn.4713155},
  url       = {https://dx.doi.org/10.2139/ssrn.4713155},
  year      = {2024}
}

Babajide Alamu Owoyele, Martin Schilling, Rohan Sawahn, Niklas Kaemer, Pavel Zherebenkov, Bhuvanesh Verma, Wim Pouw and Gerard de Melo. 2024. MaskAnyone Toolkit: Offering Strategies for Minimizing Privacy Risks and Maximizing Utility in Audio-Visual Data Archiving.

BibTeX

@misc{Owoyele:et:al:2024,
  title     = {MaskAnyone Toolkit: Offering Strategies for Minimizing Privacy
               Risks and Maximizing Utility in Audio-Visual Data Archiving},
  author    = {Babajide Alamu Owoyele and Martin Schilling and Rohan Sawahn and Niklas Kaemer
               and Pavel Zherebenkov and Bhuvanesh Verma and Wim Pouw and Gerard de Melo},
  year      = {2024},
  eprint    = {2408.03185},
  archiveprefix = {arXiv},
  primaryclass = {cs.CR},
  url       = {https://arxiv.org/abs/2408.03185}
}

Bhuvanesh Verma and Lisa Raithel. 2024. DFKI-NLP at SemEval-2024 Task 2: Towards Robust LLMs Using Data Perturbations and MinMax Training.

BibTeX

@misc{Verma:Raithel:2024,
  title     = {DFKI-NLP at SemEval-2024 Task 2: Towards Robust LLMs Using Data
               Perturbations and MinMax Training},
  author    = {Bhuvanesh Verma and Lisa Raithel},
  year      = {2024},
  eprint    = {2405.00321},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url       = {https://arxiv.org/abs/2405.00321}
}

Lisa Raithel, Philippe Thomas, Bhuvanesh Verma, Roland Roller, Hui-Syuan Yeh, Shuntaro Yada, Cyril Grouin, Shoko Wakamiya, Eiji Aramaki, Sebastian Möller and Pierre Zweigenbaum. August, 2024. Overview of #SMM4H 2024 – Task 2: Cross-Lingual Few-Shot Relation Extraction for Pharmacovigilance in French, German, and Japanese. Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks, 170–182.

BibTeX

@inproceedings{Raithel:et:al:2024,
  title     = {Overview of {\#}{SMM}4{H} 2024 {--} Task 2: Cross-Lingual Few-Shot
               Relation Extraction for Pharmacovigilance in {F}rench, {G}erman,
               and {J}apanese},
  author    = {Raithel, Lisa and Thomas, Philippe and Verma, Bhuvanesh and Roller, Roland
               and Yeh, Hui-Syuan and Yada, Shuntaro and Grouin, Cyril and Wakamiya, Shoko
               and Aramaki, Eiji and M{\"o}ller, Sebastian and Zweigenbaum, Pierre},
  editor    = {Xu, Dongfang and Gonzalez-Hernandez, Graciela},
  booktitle = {Proceedings of The 9th Social Media Mining for Health Research
               and Applications (SMM4H 2024) Workshop and Shared Tasks},
  month     = {aug},
  year      = {2024},
  address   = {Bangkok, Thailand},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2024.smm4h-1.39/},
  pages     = {170--182},
  abstract  = {This paper provides an overview of Task 2 from the Social Media
               Mining for Health 2024 shared task ({\#}SMM4H 2024), which focused
               on Named Entity Recognition (NER, Subtask 2a) and the joint task
               of NER and Relation Extraction (RE, Subtask 2b) for detecting
               adverse drug reactions (ADRs) in German, Japanese, and French
               texts written by patients. Participants were challenged with a
               few-shot learning scenario, necessitating models that can effectively
               generalize from limited annotated examples. Despite the diverse
               strategies employed by the participants, the overall performance
               across submissions from three teams highlighted significant challenges.
               The results underscored the complexity of extracting entities
               and relations in multi-lingual contexts, especially from the noisy
               and informal nature of user-generated content. Further research
               is required to develop robust systems capable of accurately identifying
               and associating ADR-related information in low-resource and multilingual
               settings.}
}

Alexander Mehler, Mevlüt Bagci, Patrick Schrottenbacher, Alexander Henlein, Maxim Konca, Giuseppe Abrami, Kevin Bönisch, Manuel Stoeckel, Christian Spiekermann and Juliane Engel. 2024. Towards New Data Spaces for the Study of Multiple Documents with Va.Si.Li-Lab: A Conceptual Analysis. In: Students', Graduates' and Young Professionals' Critical Use of Online Information: Digital Performance Assessment and Training within and across Domains, 259–303. Ed. by Olga Zlatkin-Troitschanskaia, Marie-Theres Nagel, Verena Klose and Alexander Mehler. Springer Nature Switzerland.

BibTeX

@inbook{Mehler:et:al:2024:a,
  author    = {Mehler, Alexander and Bagci, Mevl{\"u}t and Schrottenbacher, Patrick
               and Henlein, Alexander and Konca, Maxim and Abrami, Giuseppe and B{\"o}nisch, Kevin
               and Stoeckel, Manuel and Spiekermann, Christian and Engel, Juliane},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  title     = {Towards New Data Spaces for the Study of Multiple Documents with
               Va.Si.Li-Lab: A Conceptual Analysis},
  booktitle = {Students', Graduates' and Young Professionals' Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  year      = {2024},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {259--303},
  abstract  = {The constitution of multiple documents has so far been studied
               essentially as a process in which a single learner consults a
               number (of segments) of different documents in the context of
               the task at hand in order to construct a mental model for the
               purpose of completing the task. As a result of this research focus,
               the constitution of multiple documents appears predominantly as
               a monomodal, non-interactive process in which mainly textual units
               are studied, supplemented by images, text-image relations and
               comparable artifacts. This approach is reflected in the contextual
               fixity of the research design, in which the learners under study
               search for information using suitably equipped computers. If,
               on the other hand, we consider the openness of multi-agent learning
               situations, this scenario lacks the aspects of interactivity,
               contextual openness and, above all, the multimodality of information
               objects, information processing and information exchange. This
               is where the chapter comes in. It describes Va.Si.Li-Lab as an
               instrument for multimodal measurement for studying and modeling
               multiple documents in the context of interactive learning in a
               multi-agent environment. To this end, the chapter places Va.Si.Li-Lab
               in the spectrum of evolutionary approaches that vary the combination
               of human and machine innovation and selection. It also combines
               the requirements of multimodal representational learning with
               various aspects of contextual plasticity to prepare Va.Si.Li-Lab
               as a system that can be used for experimental research. The chapter
               is conceptual in nature, designing a system of requirements using
               the example of Va.Si.Li-Lab to outline an experimental environment
               in which the study of Critical Online Reasoning (COR) as a group
               process becomes possible. Although the chapter illustrates some
               of these requirements with realistic data from the field of simulation-based
               learning, the focus is still conceptual rather than experimental,
               hypothesis-driven. That is, the chapter is concerned with the
               design of a technology for future research into COR processes.},
  isbn      = {978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0_12},
  url       = {https://doi.org/10.1007/978-3-031-69510-0_12},
  keywords  = {core, core_c08}
}

Maxim Konca, Alexander Mehler, Andy Lücking and Daniel Baumartz. 2024. Visualizing Domain-specific and Generic Critical Online Reasoning Related Structures of Online Texts: A Hybrid Approach. In: Students', Graduates' and Young Professionals' Critical Use of Online Information: Digital Performance Assessment and Training within and across Domains, 195–239. Ed. by Olga Zlatkin-Troitschanskaia, Marie-Theres Nagel, Verena Klose and Alexander Mehler. Springer Nature Switzerland.

BibTeX

@inbook{Konca:et:al:2024:a,
  author    = {Konca, Maxim and Mehler, Alexander and L{\"u}cking, Andy and Baumartz, Daniel},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  title     = {Visualizing Domain-specific and Generic Critical Online Reasoning
               Related Structures of Online Texts: A Hybrid Approach},
  booktitle = {Students', Graduates' and Young Professionals' Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  year      = {2024},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {195--239},
  abstract  = {Besides ``traditional'' educational media, young professionals
               in higher education use the Internet to obtain information. To
               utilize their online research in professional contexts, they critically
               evaluate the information they access and its sources. One dimension
               of this evaluation is an assessment of the linguistic state of
               the online sources, either implicitly or explicitly. This computational
               educational linguistic study applies methods from computational
               linguistics to online sources visited by young professionals from
               three fields (law students, teacher trainees, and medicine student)
               and develops partly novel visualizations that allow to quickly
               discover similarities as well as differences between multi-heterogeneous
               Internet sources, that is, sources that exhibit various topics,
               genres, and textual structure, among others. The visualizations
               also allow a comparison of search behaviour between different
               professional fields. In this way, we found that (1) genre classification
               has a significant impact on reliability scores, (2) young professionals'
               search approaches vary by their professional field, and, (3) the
               best predictor of reliability is indeed the linguistic profile
               of an online source.},
  isbn      = {978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0_10},
  url       = {https://doi.org/10.1007/978-3-031-69510-0_10}
}

Olga Zlatkin-Troitschanskaia, Marie-Theres Nagel, Verena Klose and Alexander Mehler, eds. 2024. Students’, Graduates’ and Young Professionals’ Critical Use of Online Information: Digital Performance Assessment and Training within and across Domains. Springer Cham.

BibTeX

@book{Zlatkin-Troitschanskaia:et:al:2024,
  title     = {Students’, Graduates’ and Young Professionals’ Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  isbn      = {9783031695100},
  url       = {http://dx.doi.org/10.1007/978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0},
  publisher = {Springer Cham},
  year      = {2024},
  abstract  = {This book addresses the topic of online information for everyday
               personal and professional use by students, graduates, and young
               professionals. It focuses on the development of the job-related
               use of online information by young professionals in their practical
               phases of education (traineeship/practical year) in the domains
               of law, teaching, and medicine. The research conducted in this
               context investigates the general and domain-specific use of online
               resources in educational contexts and examines the effectiveness
               of an innovative digital training approach in enhancing skills
               required for the competent use of online information. For this
               purpose, the presented research uses a yet unprecedented approach
               of data triangulation, in which self-rated data, digitally and
               in vivo assessed response process data and expert ratings are
               integrated into a theoretically founded assessment framework and
               are examined from various interdisciplinary perspectives with
               different analysis methods. Overall, this work addresses key research
               questions related to the use of online information in practical
               tasks as well as to the impact of digital training. It provides
               in-depth multidisciplinary analyses of multimodal processes and
               performance data, allowing implications equally relevant for practitioners,
               policymakers, and researchers in the field of education.}
}

Patrick Schrottenbacher. 2024. BA Thesis: Identifying toxic behaviour in online games. Goethe University.

BibTeX

@bathesis{schrottenbacher:2024,
  author    = {Patrick Schrottenbacher},
  title     = {Identifying toxic behaviour in online games},
  institution = {Goethe University},
  pages     = {35},
  year      = {2024},
  url       = {https://publikationen.ub.uni-frankfurt.de/files/81676/Toxic_video_game_classification.pdf}
  repository = {https://github.com/TheBv/toxic-video-games-gnn}
}

Alexander Henlein, Andy Lücking and Alexander Mehler. 2024. Virtually Restricting Modalities in Interactions: Va.Si.Li-Lab for Experimental Multimodal Research. Proceedings of the 2nd International Symposium on Multimodal Communication (MMSYM 2024), Frankfurt, 25-27 September 2024, 96–97.

BibTeX

@inproceedings{Henlein:Luecking:Mehler:2024,
  title     = {Virtually Restricting Modalities in Interactions: Va.Si.Li-Lab
               for Experimental Multimodal Research},
  author    = {Henlein, Alexander and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
               (MMSYM 2024), Frankfurt, 25-27 September 2024},
  pages     = {96--97},
  year      = {2024},
  pdf       = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf},
  keywords  = {gemdis}
}

Andy Lücking, Alexander Mehler and Alexander Henlein. 2024. The Gesture–Prosody Link in Multimodal Grammar. Proceedings of the 2nd International Symposium on Multimodal Communication (MMSYM 2024), Frankfurt, 25-27 September 2024, 128–129.

BibTeX

@inproceedings{Luecking:Mehler:Henlein:2024,
  title     = {The Gesture–Prosody Link in Multimodal Grammar},
  author    = {L{\"u}cking, Andy and Mehler, Alexander and Henlein, Alexander},
  booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
               (MMSYM 2024), Frankfurt, 25-27 September 2024},
  pages     = {128--129},
  year      = {2024},
  pdf       = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf},
  keywords  = {gemdis}
}

Jonathan Ginzburg, Chris Eliasmith and Andy Lücking. 2024. Swann's name: Towards a Dialogical Brain Semantics. Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Ginzburg:Eliasmith:Luecking:2024-swann,
  title     = {Swann's name: {Towards} a Dialogical Brain Semantics},
  author    = {Ginzburg, Jonathan and Eliasmith, Chris and Lücking, Andy},
  year      = {2024},
  booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
  series    = {SemDial'24 -- TrentoLogue},
  location  = {Università di Trento, Palazzo Piomarta, Rovereto},
  url       = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-3007/},
  pdf       = {http://semdial.org/anthology/Z24-Ginzburg_semdial_0007.pdf}
}

Andy Lücking, Alexander Mehler and Alexander Henlein. 2024. The Linguistic Interpretation of Non-emblematic Gestures Must be agreed in Dialogue: Combining Perceptual Classifiers and Grounding/Clarification Mechanisms. Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Luecking:Mehler:Henlein:2024-classifier,
  title     = {The Linguistic Interpretation of Non-emblematic Gestures Must
               be agreed in Dialogue: Combining Perceptual Classifiers and Grounding/Clarification
               Mechanisms},
  author    = {Lücking, Andy and Mehler, Alexander and Henlein, Alexander},
  year      = {2024},
  booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
  series    = {SemDial'24 -- TrentoLogue},
  location  = {Università di Trento, Palazzo Piomarta, Rovereto},
  url       = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-4031/},
  pdf       = {http://semdial.org/anthology/Z24-Lucking_semdial_0031.pdf},
  keywords  = {gemdis}
}

Dominik Mattern, Wahed Hemati, Andy Lücking and Alexander Mehler. Sep., 2024. On German verb sense disambiguation: A three-part approach based on linking a sense inventory (GermaNet) to a corpus through annotation (TGVCorp) and using the corpus to train a VSD classifier (TTvSense). Journal of Language Modelling, 12(1):155–212.

BibTeX

@article{Mattern:Hemati:Lücking:Mehler:2024,
  author    = {Mattern, Dominik and Hemati, Wahed and Lücking, Andy and Mehler, Alexander},
  title     = {On German verb sense disambiguation: A three-part approach based
               on linking a sense inventory (GermaNet) to a corpus through annotation
               (TGVCorp) and using the corpus to train a VSD classifier (TTvSense)},
  abstractnote = {We develop a three-part approach to Verb Sense Disambiguation (VSD) in German. After considering a set of lexical resources and corpora, we arrive at a statistically motivated selection of a subset of verbs and their senses from GermaNet. This sub-inventory is then used to disambiguate the occurrences of the corresponding verbs in a corpus resulting from the union of TüBa-D/Z, Salsa, and E-VALBU. The corpus annotated in this way is called TGVCorp. It is used in the third part of the paper for training a classifier for VSD and for its comparative evaluation with a state-of-the-art approach in this research area, namely EWISER. Our simple classifier outperforms the transformer-based approach on the same data in both accuracy and speed in German but not in English and we discuss possible reasons.},
  journal   = {Journal of Language Modelling},
  volume    = {12},
  number    = {1},
  year      = {2024},
  month     = {Sep.},
  pages     = {155–212},
  url       = {https://jlm.ipipan.waw.pl/index.php/JLM/article/view/356}
}

Kevin Bönisch and Alexander Mehler. 2024. Finding Needles in Emb(a)dding Haystacks: Legal Document Retrieval via Bagging and SVR Ensembles. Proceedings of the 2nd Legal Information Retrieval meets Artificial Intelligence Workshop LIRAI 2024.

BibTeX

@inproceedings{Boenisch:Mehler:2024,
  title     = {Finding Needles in Emb(a)dding Haystacks: Legal Document Retrieval
               via Bagging and SVR Ensembles},
  author    = {B\"{o}nisch, Kevin and Mehler, Alexander},
  year      = {2024},
  booktitle = {Proceedings of the 2nd Legal Information Retrieval meets Artificial
               Intelligence Workshop LIRAI 2024},
  location  = {Poznan, Poland},
  publisher = {CEUR-WS.org},
  address   = {Aachen, Germany},
  series    = {CEUR Workshop Proceedings},
  abstract  = {We introduce a retrieval approach leveraging Support Vector Regression
               (SVR) ensembles, bootstrap aggregation (bagging), and embedding
               spaces on the German Dataset for Legal Information Retrieval (GerDaLIR).
               By conceptualizing the retrieval task in terms of multiple binary
               needle-in-a-haystack subtasks, we show improved recall over the
               baselines (0.849 > 0.803 | 0.829) using our voting ensemble, suggesting
               promising initial results, without training or fine-tuning any
               deep learning models. Our approach holds potential for further
               enhancement, particularly through refining the encoding models
               and optimizing hyperparameters.},
  archiveprefix = {arXiv},
  eprint    = {2501.05018},
  url       = {https://arxiv.org/pdf/2501.05018},
  keywords  = {legal information retrieval, support vector regression, word embeddings, bagging ensemble}
}

Kevin Bönisch, Manuel Stoeckel and Alexander Mehler. 2024. HyperCausal: Visualizing Causal Inference in 3D Hypertext. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 330––336.

BibTeX

@inproceedings{Boenisch:et:al:2024,
  author    = {B\"{o}nisch, Kevin and Stoeckel, Manuel and Mehler, Alexander},
  title     = {HyperCausal: Visualizing Causal Inference in 3D Hypertext},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3677049},
  doi       = {10.1145/3648188.3677049},
  abstract  = {We present HyperCausal, a 3D hypertext visualization framework
               for exploring causal inference in generative Large Language Models
               (LLMs). HyperCausal maps the generative processes of LLMs into
               spatial hypertexts, where tokens are represented as nodes connected
               by probability-weighted edges. The edges are weighted by the prediction
               scores of next tokens, depending on the underlying language model.
               HyperCausal facilitates navigation through the causal space of
               the underlying LLM, allowing users to explore predicted word sequences
               and their branching. Through comparative analysis of LLM parameters
               such as token probabilities and search algorithms, HyperCausal
               provides insight into model behavior and performance. Implemented
               using the Hugging Face transformers library and Three.js, HyperCausal
               ensures cross-platform accessibility to advance research in natural
               language processing using concepts from hypertext research. We
               demonstrate several use cases of HyperCausal and highlight the
               potential for detecting hallucinations generated by LLMs using
               this framework. The connection with hypertext research arises
               from the fact that HyperCausal relies on user interaction to unfold
               graphs with hierarchically appearing branching alternatives in
               3D space. This approach refers to spatial hypertexts and early
               concepts of hierarchical hypertext structures. A third connection
               concerns hypertext fiction, since the branching alternatives mediated
               by HyperCausal manifest non-linearly organized reading threads
               along artificially generated texts that the user decides to follow
               optionally depending on the reading context.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {330–-336},
  numpages  = {7},
  keywords  = {3D hypertext, large language models, visualization},
  location  = {Poznan, Poland},
  series    = {HT '24},
  video     = {https://www.youtube.com/watch?v=ANHFTupnKhI}
}

Daniel Baumartz, Maxim Konca, Alexander Mehler, Patrick Schrottenbacher and Dominik Braunheim. 2024. Measuring Group Creativity of Dialogic Interaction Systems by Means of Remote Entailment Analysis. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 153––166.

BibTeX

@inproceedings{Baumartz:et:al:2024,
  author    = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
               and Braunheim, Dominik},
  title     = {Measuring Group Creativity of Dialogic Interaction Systems by
               Means of Remote Entailment Analysis},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3675140},
  doi       = {10.1145/3648188.3675140},
  abstract  = {We present a procedure for assessing group creativity that allows
               us to compare the contributions of human interlocutors and chatbots
               based on generative AI such as ChatGPT. We focus on everyday creativity
               in terms of dialogic communication and test four hypotheses about
               the difference between human and artificial communication. Our
               procedure is based on a test that requires interlocutors to cooperatively
               interpret a sequence of sentences for which we control for coherence
               gaps with reference to the notion of entailment. Using NLP methods,
               we automatically evaluate the spoken or written contributions
               of interlocutors (human or otherwise). The paper develops a routine
               for automatic transcription based on Whisper, for sampling texts
               based on their entailment relations, for analyzing dialogic contributions
               along their semantic embeddings, and for classifying interlocutors
               and interaction systems based on them. In this way, we highlight
               differences between human and artificial conversations under conditions
               that approximate free dialogic communication. We show that despite
               their obvious classificatory differences, it is difficult to see
               clear differences even in the domain of dialogic communication
               given the current instruments of NLP.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {153–-166},
  numpages  = {14},
  keywords  = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP, core, core_b05, core_c08},
  location  = {Poznan, Poland},
  series    = {HT '24}
}

Giuseppe Abrami, Dominik Alexander Wontke, Gurpreet Singh and Alexander Mehler. 2024. Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change and User Feedback in Va.Si.Li-Lab. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 357––368.

BibTeX

@inproceedings{Abrami:et:al:2024:b,
  author    = {Abrami, Giuseppe and Wontke, Dominik Alexander and Singh, Gurpreet
               and Mehler, Alexander},
  title     = {Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change
               and User Feedback in Va.Si.Li-Lab},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3675154},
  doi       = {10.1145/3648188.3675154},
  abstract  = {Flexibility, adaptability, modularity, and extensibility in the
               context of a collaborative system are critical features for multi-user
               hypertext systems. In addition to facilitating acceptance and
               increasing reusability, these features simplify development cycles
               and enable a larger range of application areas. However, especially
               in virtual 3D hypertext systems, many of the features are only
               partially available or not available at all. To fill this gap,
               we present an approach to virtual hypertext systems for the realization
               of dynamic event systems. Such an event system can be created
               and serialized simultaneously at run time regarding the modification
               of situational, environmental parameters. This includes informing
               users and allowing them to participate in the environmental dynamics
               of the system. We present Va.Si.Li-ES as a module of Va.Si.Li-Lab,
               describe several environmental scenarios that can be adapted,
               and provide use cases in the context of 3D hypertext systems.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {357–-368},
  numpages  = {12},
  keywords  = {Collaborative Simulation, Environmental Event System, Hypertext, Ubiq, Va.Si.Li-Lab, Virtual Reality},
  location  = {Poznan, Poland},
  series    = {HT '24}
}

Alexander Henlein, Anastasia Bauer, Reetu Bhattacharjee, Aleksandra Ćwiek, Alina Gregori, Frank Kügler, Jens Lemanski, Andy Lücking, Alexander Mehler, Pilar Prieto, Paula G. Sánchez-Ramón, Job Schepens, Martin Schulte-Rüther, Stefan R. Schweinberger and Celina I. von Eiff. 2024. An Outlook for AI Innovation in Multimodal Communication Research. Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management., 182–234.

BibTeX

@inproceedings{Henlein:et:al:2024-vicom,
  title     = {An Outlook for AI Innovation in Multimodal Communication Research},
  author    = {Henlein, Alexander and Bauer, Anastasia and Bhattacharjee, Reetu
               and Ćwiek, Aleksandra and Gregori, Alina and Kügler, Frank and Lemanski, Jens
               and Lücking, Andy and Mehler, Alexander and Prieto, Pilar and Sánchez-Ramón, Paula G.
               and Schepens, Job and Schulte-Rüther, Martin and Schweinberger, Stefan R.
               and von Eiff, Celina I.},
  editor    = {Duffy, Vincent G.},
  year      = {2024},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management.},
  series    = {HCII 2024. Lecture Notes in Computer Science},
  publisher = {Springer},
  address   = {Cham},
  pages     = {182--234},
  isbn      = {978-3-031-61066-0},
  keywords  = {gemdis}
}

Giuseppe Abrami and Alexander Mehler. August, 2024. Efficient, uniform and scalable parallel NLP pre-processing with DUUI: Perspectives and Best Practice for the Digital Humanities. Digital Humanities Conference 2024 - Book of Abstracts (DH 2024), 15–18.

BibTeX

@inproceedings{Abrami:Mehler:2024,
  author    = {Abrami, Giuseppe and Mehler, Alexander},
  title     = {Efficient, uniform and scalable parallel NLP pre-processing with
               DUUI: Perspectives and Best Practice for the Digital Humanities},
  year      = {2024},
  month     = {08},
  editor    = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
  booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
  location  = {Washington, DC, USA},
  series    = {DH},
  keywords  = {duui, core, core_c08},
  publisher = {Zenodo},
  doi       = {10.5281/zenodo.13761079},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Poster.pdf},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Abstract.pdf},
  url       = {https://doi.org/10.5281/zenodo.13761079},
  pages     = {15--18},
  numpages  = {4}
}

Andy Lücking, Giuseppe Abrami, Leon Hammerla, Marc Rahn, Daniel Baumartz, Steffen Eger and Alexander Mehler. May, 2024. Dependencies over Times and Tools (DoTT). Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 4641–4653.

BibTeX

@inproceedings{Luecking:et:al:2024,
  abstract  = {Purpose: Based on the examples of English and German, we investigate
               to what extent parsers trained on modern variants of these languages
               can be transferred to older language levels without loss. Methods:
               We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
               which covers, roughly, the time period from 1800 until today,
               in conjunction with the further development of the annotation
               tool DependencyAnnotator. DoTT consists of a collection of diachronic
               corpora enriched with dependency annotations using 3 parsers,
               6 pre-trained language models, 5 newly trained models for German,
               and two tag sets (TIGER and Universal Dependencies). To assess
               how the different parsers perform on texts from different time
               periods, we created a gold standard sample as a benchmark. Results:
               We found that the parsers/models perform quite well on modern
               texts (document-level LAS ranging from 82.89 to 88.54) and slightly
               worse on older texts, as expected (average document-level LAS
               84.60 vs. 86.14), but not significantly. For German texts, the
               (German) TIGER scheme achieved slightly better results than UD.
               Conclusion: Overall, this result speaks for the transferability
               of parsers to past language levels, at least dating back until
               around 1800. This very transferability, it is however argued,
               means that studies of language change in the field of dependency
               syntax can draw on dependency distance but miss out on some grammatical
               phenomena.},
  address   = {Torino, Italy},
  author    = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
               and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {4641--4653},
  publisher = {ELRA and ICCL},
  title     = {Dependencies over Times and Tools ({D}o{TT})},
  url       = {https://aclanthology.org/2024.lrec-main.415},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
  year      = {2024}
}

Maxim Konca, Andy Lücking and Alexander Mehler. May, 2024. German SRL: Corpus Construction and Model Training. Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 7717–7727.

BibTeX

@inproceedings{Konca:et:al:2024,
  abstract  = {A useful semantic role-annotated resource for training semantic
               role models for the German language is missing. We point out some
               problems of previous resources and provide a new one due to a
               combined translation and alignment process: The gold standard
               CoNLL-2012 semantic role annotations are translated into German.
               Semantic role labels are transferred due to alignment models.
               The resulting dataset is used to train a German semantic role
               model. With F1-scores around 0.7, the major roles achieve competitive
               evaluation scores, but avoid limitations of previous approaches.
               The described procedure can be applied to other languages as well.},
  address   = {Torino, Italy},
  author    = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {7717--7727},
  publisher = {ELRA and ICCL},
  title     = {{G}erman {SRL}: Corpus Construction and Model Training},
  url       = {https://aclanthology.org/2024.lrec-main.682},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_GERMAN_SRL.pdf},
  year      = {2024}
}

Giuseppe Abrami, Mevlüt Bagci and Alexander Mehler. 2024. German Parliamentary Corpus (GerParCor) Reloaded. Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 7707–7716.

BibTeX

@inproceedings{Abrami:et:al:2024:a,
  abstract  = {In 2022, the largest German-speaking corpus of parliamentary protocols
               from three different centuries, on a national and federal level
               from the countries of Germany, Austria, Switzerland and Liechtenstein,
               was collected and published - GerParCor. Through GerParCor, it
               became possible to provide for the first time various parliamentary
               protocols which were not available digitally and, moreover, could
               not be retrieved and processed in a uniform manner. Furthermore,
               GerParCor was additionally preprocessed using NLP methods and
               made available in XMI format. In this paper, GerParCor is significantly
               updated by including all new parliamentary protocols in the corpus,
               as well as adding and preprocessing further parliamentary protocols
               previously not covered, so that a period up to 1797 is now covered.
               Besides the integration of a new, state-of-the-art and appropriate
               NLP preprocessing for the handling of large text corpora, this
               update also provides an overview of the further reuse of GerParCor
               by presenting various provisioning capabilities such as API's,
               among others.},
  address   = {Torino, Italy},
  author    = {Abrami, Giuseppe and Bagci, Mevl{\"u}t and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  pages     = {7707--7716},
  publisher = {ELRA and ICCL},
  title     = {{G}erman Parliamentary Corpus ({G}er{P}ar{C}or) Reloaded},
  url       = {https://aclanthology.org/2024.lrec-main.681},
  pdf       = {https://aclanthology.org/2024.lrec-main.681.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/GerParCor_Reloaded_Poster.pdf},
  video     = {https://www.youtube.com/watch?v=5X-w_oXOAYo},
  keywords  = {gerparcor,corpus},
  year      = {2024}
}

2023

Pavan Kalyan Reddy Neerudu, Subba Reddy Oota, Mounika Marreddy, Venkateswara Rao Kagita and Manish Gupta. 2023. On robustness of finetuned transformer-based nlp models. arXiv preprint arXiv:2305.14453.

BibTeX

@article{Marreddy:et:al:2023emnlp,
  title     = {On robustness of finetuned transformer-based nlp models},
  author    = {Neerudu, Pavan Kalyan Reddy and Oota, Subba Reddy and Marreddy, Mounika
               and Kagita, Venkateswara Rao and Gupta, Manish},
  journal   = {arXiv preprint arXiv:2305.14453},
  year      = {2023}
}

Subba Reddy Oota, Mounika Marreddy, Manish Gupta and Raju Bapi. 2023. How does the brain process syntactic structure while listening?. Findings of the Association for Computational Linguistics: ACL 2023, 6624–6647.

BibTeX

@inproceedings{Marreddy:et:al:2023acl,
  title     = {How does the brain process syntactic structure while listening?},
  author    = {Oota, Subba Reddy and Marreddy, Mounika and Gupta, Manish and Bapi, Raju},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2023},
  pages     = {6624--6647},
  year      = {2023}
}

Subba Reddy Oota, Khushbu Pahwa, Mounika Marreddy, Manish Gupta and Bapi S Raju. 2023. Neural architecture of speech. ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 1–5.

BibTeX

@inproceedings{Marreddy:et:al:2023icassp,
  title     = {Neural architecture of speech},
  author    = {Oota, Subba Reddy and Pahwa, Khushbu and Marreddy, Mounika and Gupta, Manish
               and Raju, Bapi S},
  booktitle = {ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech
               and Signal Processing (ICASSP)},
  pages     = {1--5},
  year      = {2023},
  organization = {IEEE}
}

Alina Gregori, Federica Amici, Ingmar Brilmayer, Aleksandra Ćwiek, Lennart Fritzsche, Susanne Fuchs, Alexander Henlein, Oliver Herbort, Frank Kügler, Jens Lemanski, Katja Liebal, Andy Lücking, Alexander Mehler, Kim Tien Nguyen, Wim Pouw, Pilar Prieto, Patrick Louis Rohrer, Paula G. Sánchez-Ramón, Martin Schulte-Rüther, Petra B. Schumacher, Stefan R. Schweinberger, Volker Struckmeier, Patrick C. Trettenbrein and Celina I. von Eiff. 2023. A Roadmap for Technological Innovation in Multimodal Communication Research. Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management, 402–438.

BibTeX

@inproceedings{Gregori:et:al:2023-vicom,
  author    = {Gregori, Alina and Amici, Federica and Brilmayer, Ingmar and {\'{C}}wiek, Aleksandra
               and Fritzsche, Lennart and Fuchs, Susanne and Henlein, Alexander and Herbort, Oliver
               and K{\"u}gler, Frank and Lemanski, Jens and Liebal, Katja and L{\"u}cking, Andy
               and Mehler, Alexander and Nguyen, Kim Tien and Pouw, Wim and Prieto, Pilar
               and Rohrer, Patrick Louis and S{\'a}nchez-Ram{\'o}n, Paula G. and Schulte-R{\"u}ther, Martin
               and Schumacher, Petra B. and Schweinberger, Stefan R. and Struckmeier, Volker
               and Trettenbrein, Patrick C. and von Eiff, Celina I.},
  editor    = {Duffy, Vincent G.},
  title     = {A Roadmap for Technological Innovation in Multimodal Communication Research},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  year      = {2023},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {402--438},
  abstract  = {Multimodal communication research focuses on how different means
               of signalling coordinate to communicate effectively. This line
               of research is traditionally influenced by fields such as cognitive
               and neuroscience, human-computer interaction, and linguistics.
               With new technologies becoming available in fields such as natural
               language processing and computer vision, the field can increasingly
               avail itself of new ways of analyzing and understanding multimodal
               communication. As a result, there is a general hope that multimodal
               research may be at the ``precipice of greatness'' due to technological
               advances in computer science and resulting extended empirical
               coverage. However, for this to come about there must be sufficient
               guidance on key (theoretical) needs of innovation in the field
               of multimodal communication. Absent such guidance, the research
               focus of computer scientists might increasingly diverge from crucial
               issues in multimodal communication. With this paper, we want to
               further promote interaction between these fields, which may enormously
               benefit both communities. The multimodal research community (represented
               here by a consortium of researchers from the Visual Communication
               [ViCom] Priority Programme) can engage in the innovation by clearly
               stating which technological tools are needed to make progress
               in the field of multimodal communication. In this article, we
               try to facilitate the establishment of a much needed common ground
               on feasible expectations (e.g., in terms of terminology and measures
               to be able to train machine learning algorithms) and to critically
               reflect possibly idle hopes for technical advances, informed by
               recent successes and challenges in computer science, social signal
               processing, and related domains.},
  isbn      = {978-3-031-35748-0},
  pdf       = {https://pure.mpg.de/rest/items/item_3511464_5/component/file_3520176/content},
  keywords  = {gemdis}
}

Kevin Bönisch. 2023. BA Thesis: Dialog generation using language models. Goethe University.

BibTeX

@bathesis{boenisch:2023,
  author    = {Kevin B{\"o}nisch},
  title     = {Dialog generation using language models},
  institution = {Goethe University},
  pages     = {28},
  year      = {2023},
  url       = {https://publikationen.ub.uni-frankfurt.de/opus4/frontdoor/index/index/docId/79165},
  repository = {https://github.com/texttechnologylab/ROBERT}
}

Kevin Bönisch, Giuseppe Abrami, Sabine Wehnert and Alexander Mehler. 2023. Bundestags-Mine: Natural Language Processing for Extracting Key Information from Government Documents. Legal Knowledge and Information Systems.

BibTeX

@inproceedings{Boenisch:et:al:2023,
  title     = {{Bundestags-Mine}: Natural Language Processing for Extracting
               Key Information from Government Documents},
  isbn      = {9781643684734},
  issn      = {1879-8314},
  url       = {http://dx.doi.org/10.3233/FAIA230996},
  doi       = {10.3233/faia230996},
  booktitle = {Legal Knowledge and Information Systems},
  publisher = {IOS Press},
  author    = {B\"{o}nisch, Kevin and Abrami, Giuseppe and Wehnert, Sabine and Mehler, Alexander},
  year      = {2023}
}

Alexander Leonhardt, Giuseppe Abrami, Daniel Baumartz and Alexander Mehler. 2023. Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI. Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399.

BibTeX

@inproceedings{Leonhardt:et:al:2023,
  title     = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
  author    = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
               and Mehler, Alexander},
  editor    = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
  year      = {2023},
  address   = {Singapore},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2023.findings-emnlp.29},
  pages     = {385--399},
  pdf       = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
  abstract  = {Automatic analysis of large corpora is a complex task, especially
               in terms of time efficiency. This complexity is increased by the
               fact that flexible, extensible text analysis requires the continuous
               integration of ever new tools. Since there are no adequate frameworks
               for these purposes in the field of NLP, and especially in the
               context of UIMA, that are not outdated or unusable for security
               reasons, we present a new approach to address the latter task:
               Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
               and feature-rich framework for automatic distributed analysis
               of text corpora that leverages Big Data experience and virtualization
               with Docker. We evaluate DUUI{'}s communication approach against
               a state-of-the-art approach and demonstrate its outstanding behavior
               in terms of time efficiency, enabling the analysis of big text
               data.},
  keywords  = {duui, core, core_c08}
}

Andy Lücking, Chiara Mazzocconi and Darinka Verdonik. 2023. Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue. SemDial 2023 – MariLogue. University of Maribor.

BibTeX

@proceedings{SemDial:2023-marilogue,
  title     = {Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue},
  keywords  = {own,editor},
  year      = {2023},
  editor    = {Lücking, Andy and Mazzocconi, Chiara and Verdonik, Darinka},
  editor+an = {1=highlight},
  series    = {SemDial 2023 -- MariLogue},
  publisher = {University of Maribor},
  url       = {https://www.semdial.org/anthology/events/semdial-2023/}
}

Jonathan Ginzburg and Andy Lücking. 2023. Referential Transparency and Inquisitivity. Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond the Sentence Boundary, 11–20.

BibTeX

@inproceedings{Ginzburg:Luecking:2023-wh,
  author    = {Ginzburg, Jonathan and Lücking, Andy},
  author+an = {2=highlight},
  keywords  = {own,conference},
  title     = {Referential Transparency and Inquisitivity},
  booktitle = {Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond
               the Sentence Boundary},
  series    = {InqBnB4'23},
  pages     = {11-20},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.inqbnb-1.2/},
  pdf       = {https://aclanthology.org/2023.inqbnb-1.2.pdf}
}

Andy Lücking. 2023. Towards Referential Transparent Annotations of Quantified Noun Phrases. Proceedings of the 2023 Joint ACL–ISO Workshop on Interoperable Semantic Annotation, 47–55.

BibTeX

@inproceedings{Luecking:2023-rtt-annotation,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Towards Referential Transparent Annotations of Quantified Noun Phrases},
  booktitle = {Proceedings of the 2023 Joint ACL--ISO Workshop on Interoperable
               Semantic Annotation},
  series    = {ISA-19},
  pages     = {47-55},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.isa-1.7/},
  pdf       = {https://aclanthology.org/2023.isa-1.7.pdf}
}

Staffan Larsson, Robin Cooper, Jonathan Ginzburg and Andy Lücking. 2023. TTR at the SPA: Relating type-theoretical semantics to neural semantic pointers. Proceedings of Natural Logic Meets Machine Learning IV.

BibTeX

@inproceedings{Larsson:Cooper:Ginzburg:Luecking:2023-ttr-spa,
  author    = {Larsson, Staffan and Cooper, Robin and Ginzburg, Jonathan and Lücking, Andy},
  author+an = {4=highlight},
  keywords  = {own,conference},
  title     = {{TTR} at the {SPA}: {Relating} type-theoretical semantics to neural
               semantic pointers},
  booktitle = {Proceedings of Natural Logic Meets Machine Learning IV},
  series    = {NALOMA'23},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.naloma-1.5/},
  pdf       = {https://aclanthology.org/2023.naloma-1.5.pdf}
}

Alexander Henlein, Andy Lücking, Mevlüt Bagci and Alexander Mehler. 2023. Towards grounding multimodal semantics in interaction data with Va.Si.Li-Lab. Proceedings of the 8th Conference on Gesture and Speech in Interaction (GESPIN).

BibTeX

@inproceedings{Henlein:et:al:2023c,
  title     = {Towards grounding multimodal semantics in interaction data with Va.Si.Li-Lab},
  author    = {Henlein, Alexander and Lücking, Andy and Bagci, Mevlüt and Mehler, Alexander},
  booktitle = {Proceedings of the 8th Conference on Gesture and Speech in Interaction (GESPIN)},
  location  = {Nijmegen, Netherlands},
  year      = {2023},
  keywords  = {vasililab, gemdis},
  pdf       = {https://www.gespin2023.nl/documents/talks_and_posters/GeSpIn_2023_papers/GeSpIn_2023_paper_1692.pdf}
}

Shaduan Babbili, Kevin Bönisch, Yannick Heinrich, Philipp Stephan, Giuseppe Abrami and Alexander Mehler. 2023. Viki LibraRy: A Virtual Reality Library for Collaborative Browsing and Navigation through Hypertext. Proceedings of the 34th ACM Conference on Hypertext and Social Media.

BibTeX

@inproceedings{Babbili:et:al:2023,
  author    = {Babbili, Shaduan and B\"{o}nisch, Kevin and Heinrich, Yannick
               and Stephan, Philipp and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Viki LibraRy: A Virtual Reality Library for Collaborative Browsing
               and Navigation through Hypertext},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609079},
  doi       = {10.1145/3603163.3609079},
  abstract  = {We present Viki LibraRy, a virtual-reality-based system for generating
               and exploring online information as a spatial hypertext. It creates
               a virtual library based on Wikipedia in which Rooms are used to
               make data available via a RESTful backend. In these Rooms, users
               can browse through all articles of the corresponding Wikipedia
               category in the form of Books. In addition, users can access different
               Rooms, through virtual portals. Beyond that, the explorations
               can be done alone or collaboratively, using Ubiq.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {6},
  numpages  = {3},
  keywords  = {virtual reality simulation, virtual reality, virtual hypertext, virtual museum},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609079}
}

Julian Gagel, Jasper Hustedt, Timo Lüttig, Theresa Berg, Giuseppe Abrami and Alexander Mehler. 2023. News in Time and Space: Global Event Exploration in Virtual Reality. Proceedings of the 34th ACM Conference on Hypertext and Social Media.

BibTeX

@inproceedings{Gagel:et:al:2023,
  author    = {Gagel, Julian and Hustedt, Jasper and L\"{u}ttig, Timo and Berg, Theresa
               and Abrami, Giuseppe and Mehler, Alexander},
  title     = {News in Time and Space: Global Event Exploration in Virtual Reality},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609080},
  doi       = {10.1145/3603163.3609080},
  abstract  = {We present News in Time and Space (NiTS), a virtual reality application
               for visualization, filtering and interaction with geo-referenced
               events based on GDELT. It can be used both via VR glasses and
               as a desktop solution for shared use by multiple users with Ubiq.
               The aim of NiTS is to provide overviews of global events and trends
               in order to create a resource for their monitoring and analysis.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {7},
  numpages  = {3},
  keywords  = {virtual hypertext, human data interaction, spatial computing, virtual reality simulation, geographic information systems, virtual reality},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609080}
}

Giuseppe Abrami, Alexander Mehler, Mevlüt Bagci, Patrick Schrottenbacher, Alexander Henlein, Christian Spiekermann, Juliane Engel and Jakob Schreiber. 2023. Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in Virtual Reality and Its Potential Fields of Application. Proceedings of the 34th ACM Conference on Hypertext and Social Media.

BibTeX

@inproceedings{Abrami:et:al:2023,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Bagci, Mevl\"{u}t and Schrottenbacher, Patrick
               and Henlein, Alexander and Spiekermann, Christian and Engel, Juliane
               and Schreiber, Jakob},
  title     = {Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in
               Virtual Reality and Its Potential Fields of Application},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609076},
  doi       = {10.1145/3603163.3609076},
  abstract  = {During the last thirty years a variety of hypertext approaches
               and virtual environments -- some virtual hypertext environments
               -- have been developed and discussed. Although the development
               of virtual and augmented reality technologies is rapid and improving,
               and many technologies can be used at affordable conditions, their
               usability for hypertext systems has not yet been explored. At
               the same time, even for virtual three-dimensional virtual and
               augmented environments, there is no generally accepted concept
               that is similar or nearly as elegant as hypertext. This gap will
               have to be filled in the next years and a good concept should
               be developed; in this article we aim to contribute in this direction
               and also introduce a prototype for a possible implementation of
               criteria for virtual hypertext simulations.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {22},
  numpages  = {9},
  keywords  = {VaSiLiLab, virtual hypertext, virtual reality, virtual reality simulation, authoring system, gemdis},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609076}
}

Alexander Henlein. 2023. PhD Thesis: Toward context-based text-to-3D scene generation.

BibTeX

@phdthesis{Henlein:2023,
  author    = {Alexander Henlein},
  title     = {Toward context-based text-to-3D scene generation},
  type      = {doctoralthesis},
  pages     = {199},
  school    = {Johann Wolfgang Goethe-Universität},
  doi       = {10.21248/gups.73448},
  year      = {2023},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/73448/main.pdf},
  keywords  = {gemdis}
}

Alexander Henlein, Anju Gopinath, Nikhil Krishnaswamy, Alexander Mehler and James Pustejovsky. 2023. Grounding human-object interaction to affordance behavior in multimodal datasets. Frontiers in Artificial Intelligence, 6.

BibTeX

@article{Henlein:et:al:2023a,
  author    = {Henlein, Alexander and Gopinath, Anju and Krishnaswamy, Nikhil
               and Mehler, Alexander and Pustejovsky, James},
  doi       = {10.3389/frai.2023.1084740},
  issn      = {2624-8212},
  journal   = {Frontiers in Artificial Intelligence},
  title     = {Grounding human-object interaction to affordance behavior in multimodal datasets},
  url       = {https://www.frontiersin.org/articles/10.3389/frai.2023.1084740},
  volume    = {6},
  year      = {2023},
  keywords  = {gemdis}
}

Alexander Henlein, Attila Kett, Daniel Baumartz, Giuseppe Abrami, Alexander Mehler, Johannes Bastian, Yannic Blecher, David Budgenhagen, Roman Christof, Tim-Oliver Ewald, Tim Fauerbach, Patrick Masny, Julian Mende, Paul Schnüre and Marc Viel. 2023. Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework. Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management, 461–479.

BibTeX

@inproceedings{Henlein:et:al:2023b,
  author    = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
               and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
               and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
               and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
  editor    = {Duffy, Vincent G.},
  title     = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  year      = {2023},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {461--479},
  abstract  = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
               scene framework using SemAF (Semantic Annotation Framework) as
               a scheme for annotating discourse structures. SeSB integrates
               a variety of tools and resources by using SemAF and UIMA as a
               unified data structure to generate 3D scenes from textual descriptions.
               Based on VR, SeSB allows its users to change annotations through
               body movements instead of symbolic manipulations: from annotations
               in texts to corrections in editing steps to adjustments in generated
               scenes, all this is done by grabbing and moving objects. We evaluate
               SeSB in comparison with a state-of-the-art open source text-to-scene
               method (the only one which is publicly available) and find that
               our approach not only performs better, but also allows for modeling
               a greater variety of scenes.},
  isbn      = {978-3-031-35748-0},
  doi       = {10.1007/978-3-031-35748-0_32},
  keywords  = {gemdis}
}

Alexander Mehler, Mevlüt Bagci, Alexander Henlein, Giuseppe Abrami, Christian Spiekermann, Patrick Schrottenbacher, Maxim Konca, Andy Lücking, Juliane Engel, Marc Quintino, Jakob Schreiber, Kevin Saukel and Olga Zlatkin-Troitschanskaia. 2023. A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab. Digital Human Modeling and Applications in Health, Safety, Ergonomics and Risk Management, 539–565.

BibTeX

@inproceedings{Mehler:et:al:2023:a,
  abstract  = {Simulation-based learning is a method in which learners learn
               to master real-life scenarios and tasks from simulated application
               contexts. It is particularly suitable for the use of VR technologies,
               as these allow immersive experiences of the targeted scenarios.
               VR methods are also relevant for studies on online learning, especially
               in groups, as they provide access to a variety of multimodal learning
               and interaction data. However, VR leads to a trade-off between
               technological conditions of the observability of such data and
               the openness of learner behavior. We present Va.Si.Li-Lab, a VR-L
               ab for Simulation-based Learn ing developed to address this trade-off.
               Va.Si.Li-Lab uses a graph-theoretical model based on hypergraphs
               to represent the data diversity of multimodal learning and interaction.
               We develop this data model in relation to mono- and multimodal,
               intra- and interpersonal data and interleave it with ISO-Space
               to describe distributed multiple documents from the perspective
               of their interactive generation. The paper adds three use cases
               to motivate the broad applicability of Va.Si.Li-Lab and its data
               model.},
  address   = {Cham},
  author    = {Mehler, Alexander and Bagci, Mevl{\"u}t and Henlein, Alexander
               and Abrami, Giuseppe and Spiekermann, Christian and Schrottenbacher, Patrick
               and Konca, Maxim and L{\"u}cking, Andy and Engel, Juliane and Quintino, Marc
               and Schreiber, Jakob and Saukel, Kevin and Zlatkin-Troitschanskaia, Olga},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  editor    = {Duffy, Vincent G.},
  isbn      = {978-3-031-35741-1},
  pages     = {539--565},
  publisher = {Springer Nature Switzerland},
  title     = {A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab},
  year      = {2023},
  doi       = {10.1007/978-3-031-35741-1_39},
  keywords  = {gemdis}
}

2022

Andy Lücking. 2022–05–03. Pointing: From reference to attention and back. Invited talk.

BibTeX

@misc{Luecking:2022-bochum,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Pointing: From reference to attention and back},
  howpublished = {Invited talk at the Language Colloquium,
                  Ruhr-Universit{\"a}t Bochum},
  note      = {Invited talk},
  date      = {2022-05-03}
}

Mounika Marreddy, Subba Reddy Oota, Lakshmi Sireesha Vakada, Venkata Charan Chinni and Radhika Mamidi. 2022. Multi-task text classification using graph convolutional networks for large-scale low resource language. 2022 international joint conference on neural networks (IJCNN), 1–8.

BibTeX

@inproceedings{marreddy:et:al:2022multi,
  title     = {Multi-task text classification using graph convolutional networks
               for large-scale low resource language},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  booktitle = {2022 international joint conference on neural networks (IJCNN)},
  pages     = {1--8},
  year      = {2022},
  organization = {IEEE}
}

Subba Reddy Oota, Jashn Arora, Veeral Agarwal, Mounika Marreddy, Manish Gupta and Bapi Raju Surampudi. 2022. Neural language taskonomy: Which NLP tasks are the most predictive of fMRI brain activity?. arXiv preprint arXiv:2205.01404.

BibTeX

@article{Oota:et:al:2022,
  title     = {Neural language taskonomy: Which NLP tasks are the most predictive
               of fMRI brain activity?},
  author    = {Oota, Subba Reddy and Arora, Jashn and Agarwal, Veeral and Marreddy, Mounika
               and Gupta, Manish and Surampudi, Bapi Raju},
  journal   = {arXiv preprint arXiv:2205.01404},
  url       = {https://arxiv.org/pdf/2205.01404},
  year      = {2022},
  abstract  = {Several popular Transformer based language models have been found
               to be successful for text-driven brain encoding. However, existing
               literature leverages only pretrained text Transformer models and
               has not explored the efficacy of task-specific learned Transformer
               representations. In this work, we explore transfer learning from
               representations learned for ten popular natural language processing
               tasks (two syntactic and eight semantic) for predicting brain
               responses from two diverse datasets: Pereira (subjects reading
               sentences from paragraphs) and Narratives (subjects listening
               to the spoken stories). Encoding models based on task features
               are used to predict activity in different regions across the whole
               brain. Features from coreference resolution, NER, and shallow
               syntax parsing explain greater variance for the reading activity.
               On the other hand, for the listening activity, tasks such as paraphrase
               generation, summarization, and natural language inference show
               better encoding performance. Experiments across all 10 task representations
               provide the following cognitive insights: (i) language left hemisphere
               has higher predictive brain activity versus language right hemisphere,
               (ii) posterior medial cortex, temporoparieto-occipital junction,
               dorsal frontal lobe have higher correlation versus early auditory
               and auditory association cortex, (iii) syntactic and semantic
               tasks display a good predictive performance across brain regions
               for reading and listening stimuli resp},
  pdf       = {https://arxiv.org/pdf/2205.01404}
}

Mounika Marreddy, Subba Reddy Oota, Lakshmi Sireesha Vakada, Venkata Charan Chinni and Radhika Mamidi. 2022. Am I a resource-poor language? Data sets, embeddings, models and analysis for four different NLP tasks in telugu language. ACM Transactions on Asian and Low-Resource Language Information Processing, 22(1).

BibTeX

@article{Marreddy:et:al:2022,
  title     = {Am I a resource-poor language? Data sets, embeddings, models and
               analysis for four different NLP tasks in telugu language},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  journal   = {ACM Transactions on Asian and Low-Resource Language Information Processing},
  volume    = {22},
  number    = {1},
  numpages  = {34},
  articleno = {18},
  year      = {2022},
  issn      = {2375-4699},
  url       = {https://doi.org/10.1145/3531535},
  doi       = {10.1145/3531535},
  publisher = {Association for Computing Machinery},
  abstract  = {Due to the lack of a large annotated corpus, many resource-poor
               Indian languages struggle to reap the benefits of recent deep
               feature representations in Natural Language Processing (NLP).
               Moreover, adopting existing language models trained on large English
               corpora for Indian languages is often limited by data availability,
               rich morphological variation, syntax, and semantic differences.
               In this paper, we explore the traditional to recent efficient
               representations to overcome the challenges of a low resource language,
               Telugu. In particular, our main objective is to mitigate the low-resource
               problem for Telugu. Overall, we present several contributions
               to a resource-poor language viz. Telugu. (i) a large annotated
               data (35,142 sentences in each task) for multiple NLP tasks such
               as sentiment analysis, emotion identification, hate-speech detection,
               and sarcasm detection, (ii) we create different lexicons for sentiment,
               emotion, and hate-speech for improving the efficiency of the models,
               (iii) pretrained word and sentence embeddings, and (iv) different
               pretrained language models for Telugu such as ELMo-Te, BERT-Te,
               RoBERTa-Te, ALBERT-Te, and DistilBERT-Te on a large Telugu corpus
               consisting of 8,015,588 sentences (1,637,408 sentences from Telugu
               Wikipedia and 6,378,180 sentences crawled from different Telugu
               websites). Further, we show that these representations significantly
               improve the performance of four NLP tasks and present the benchmark
               results for Telugu. We argue that our pretrained embeddings are
               competitive or better than the existing multilingual pretrained
               models: mBERT, XLM-R, and IndicBERT. Lastly, the fine-tuning of
               pretrained models show higher performance than linear probing
               results on four NLP tasks with the following F1-scores: Sentiment
               (68.72), Emotion (58.04), Hate-Speech (64.27), and Sarcasm (77.93).
               We also experiment on publicly available Telugu datasets (Named
               Entity Recognition, Article Genre Classification, and Sentiment
               Analysis) and find that our Telugu pretrained language models
               (BERT-Te and RoBERTa-Te) outperform the state-of-the-art system
               except for the sentiment task. We open-source our corpus, four
               different datasets, lexicons, embeddings, and code &nbsp;https://github.com/Cha14ran/DREAM-T.
               The pretrained Transformer models for Telugu are available at
               &nbsp;https://huggingface.co/ltrctelugu.},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3531535}
}

Arne Binder, Bhuvanesh Verma and Leonhard Hennig. 2022. Full-Text Argumentation Mining on Scientific Publications.

BibTeX

@misc{Binder:et:al:2022,
  title     = {Full-Text Argumentation Mining on Scientific Publications},
  author    = {Arne Binder and Bhuvanesh Verma and Leonhard Hennig},
  year      = {2022},
  eprint    = {2210.13084},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url       = {https://arxiv.org/abs/2210.13084}
}

Cornelia Ebert, Andy Lücking and Alexander Mehler. 2022. Introduction to the 2nd Edition of “Semantic, Artificial and Computational Interaction Studies”. HCI International 2022 - Late Breaking Papers. Multimodality in Advanced Interaction Environments, 36–47.

BibTeX

@inproceedings{Ebert:et:al:2022,
  abstract  = {``Behavioromics'' is a term that has been invented to cover the
               study of multimodal interaction from various disciplines and points
               of view. These disciplines and points of view, however, lack a
               platform for exchange. The workshop session on ``Semantic, artificial
               and computational interaction studies'' provides such a platform.
               We motivate behavioromics, sketch its historical background, and
               summarize this year's contributions.},
  address   = {Cham},
  author    = {Ebert, Cornelia and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {HCI International 2022 - Late Breaking Papers. Multimodality in
               Advanced Interaction Environments},
  editor    = {Kurosu, Masaaki and Yamamoto, Sakae and Mori, Hirohiko and Schmorrow, Dylan D.
               and Fidopiastis, Cali M. and Streitz, Norbert A. and Konomi, Shin'ichi},
  isbn      = {978-3-031-17618-0},
  pages     = {36--47},
  publisher = {Springer Nature Switzerland},
  title     = {Introduction to the 2nd Edition of ``Semantic, Artificial and
               Computational Interaction Studies''},
  doi       = {https://doi.org/10.1007/978-3-031-17618-0_3},
  year      = {2022}
}

Sajawel Ahmed, Rob van der Goot, Misbahur Rehman, Carl Kruse, Ömer Özsoy, Alexander Mehler and Gemma Roig. October, 2022. Tafsir Dataset: A Novel Multi-Task Benchmark for Named Entity Recognition and Topic Modeling in Classical Arabic Literature. Proceedings of the 29th International Conference on Computational Linguistics, 3753–3768.

BibTeX

@inproceedings{Ahmed:et:al:2022,
  title     = {Tafsir Dataset: A Novel Multi-Task Benchmark for Named Entity
               Recognition and Topic Modeling in Classical {A}rabic Literature},
  author    = {Ahmed, Sajawel and van der Goot, Rob and Rehman, Misbahur and Kruse, Carl
               and {\"O}zsoy, {\"O}mer and Mehler, Alexander and Roig, Gemma},
  booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
  month     = {oct},
  year      = {2022},
  address   = {Gyeongju, Republic of Korea},
  publisher = {International Committee on Computational Linguistics},
  url       = {https://aclanthology.org/2022.coling-1.330},
  pages     = {3753--3768},
  abstract  = {Various historical languages, which used to be lingua franca of
               science and arts, deserve the attention of current NLP research.
               In this work, we take the first data-driven steps towards this
               research line for Classical Arabic (CA) by addressing named entity
               recognition (NER) and topic modeling (TM) on the example of CA
               literature. We manually annotate the encyclopedic work of Tafsir
               Al-Tabari with span-based NEs, sentence-based topics, and span-based
               subtopics, thus creating the Tafsir Dataset with over 51,000 sentences,
               the first large-scale multi-task benchmark for CA. Next, we analyze
               our newly generated dataset, which we make open-source available,
               with current language models (lightweight BiLSTM, transformer-based
               MaChAmP) along a novel script compression method, thereby achieving
               state-of-the-art performance for our target task CA-NER. We also
               show that CA-TM from the perspective of historical topic models,
               which are central to Arabic studies, is very challenging. With
               this interdisciplinary work, we lay the foundations for future
               research on automatic analysis of CA literature.}
}

Jonathan Ginzburg and Andy Lücking. 2022. The Integrated Model of Memory: A Dialogical Perspective. Proceedings of SemDial 2022, 6–17.

BibTeX

@inproceedings{Ginzburg:Luecking:2022:a,
  title     = {The Integrated Model of Memory: {A} Dialogical Perspective},
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  booktitle = {Proceedings of SemDial 2022},
  series    = {SemDial 2022 -- DubDial},
  location  = {Dublin, Ireland},
  year      = {2022},
  editor    = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
  pages     = {6-17},
  url       = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3004/},
  pdf       = {http://semdial.org/anthology/Z22-Ginzburg_semdial_0004.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2022. Leading voices: Dialogue semantics, cognitive science, and the polyphonic structure of multimodal interaction. Language and Cognition.

BibTeX

@article{Luecking:Ginzburg:2022-lv,
  title     = {Leading voices: {Dialogue} semantics, cognitive science, and the
               polyphonic structure of multimodal interaction},
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  journal   = {Language and Cognition},
  year      = {2022},
  doi       = {10.1017/langcog.2022.30},
  keywords  = {gemdis}
}

Andy Lücking and Jonathan Ginzburg. 2022. How to repair a slip of the tongue?. Proceedings of SemDial 2022, 35–46.

BibTeX

@inproceedings{Luecking:Ginzburg:2022:a,
  title     = {How to repair a slip of the tongue?},
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  booktitle = {Proceedings of SemDial 2022},
  series    = {SemDial 2022 -- DubDial},
  location  = {Dublin, Ireland},
  year      = {2022},
  editor    = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
  pages     = {35-46},
  url       = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3007/},
  pdf       = {http://semdial.org/anthology/Z22-Lücking_semdial_0007.pdf}
}

Maxim Konca, Andy Lücking, Alexander Mehler, Marie-Theres Nagel and Olga Zlatkin-Troitschanskaia. April, 2022. Computational educational linguistics for `Critical Online Reasoning' among young professionals in medicine, law and teaching.

BibTeX

@misc{Konca:et:al:2022,
  author    = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander and Nagel, Marie-Theres
               and Zlatkin-Troitschanskaia, Olga},
  howpublished = {Presentation given at the AERA annual meeting, 21.-26.04. 2022, WERA symposium},
  month     = {04},
  title     = {Computational educational linguistics for `Critical Online Reasoning'
               among young professionals in medicine, law and teaching},
  year      = {2022},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2022/10/BRIDGE_WERA_AERA-2022_reduce.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2022. Referential transparency as the proper treatment of quantification. Semantics and Pragmatics, 15.

BibTeX

@article{Luecking:Ginzburg:2022,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Referential transparency as the proper treatment of quantification},
  journal   = {Semantics and Pragmatics},
  year      = {2022},
  volume    = {15},
  eid       = {4},
  doi       = {10.3765/sp.15.4},
  keywords  = {gemdis}
}

Alexander Mehler, Maxim Konca, Marie-Theres Nagel, Andy Lücking and Olga Zlatkin-Troitschanskaia. March, 2022. On latent domain-specific textual preferences in solving Internet-based generic tasks among graduates/young professionals from three domains.

BibTeX

@misc{Mehler:et:al:2022,
  author    = {Mehler, Alexander and Konca, Maxim and Nagel, Marie-Theres and L\"{u}cking, Andy
               and Zlatkin-Troitschanskaia, Olga},
  year      = {2022},
  month     = {03},
  howpublished = {Presentation at BEBF 2022},
  title     = {On latent domain-specific textual preferences in solving Internet-based
               generic tasks among graduates/young professionals from three domains},
  abstract  = {Although Critical Online Reasoning (COR) is often viewed as a
               general competency (e.g. Alexander et al. 2016), studies have
               found evidence supporting their domain-specificity (Toplak et
               al. 2002). To investigate this assumption, we focus on commonalities
               and differences in textual preferences in solving COR-related
               tasks between graduates/young professionals from three domains.
               For this reason, we collected data by requiring participants to
               solve domain-specific (DOM-COR) and generic (GEN-COR) tasks in
               an authentic Internet-based COR performance assessment (CORA),
               allowing us to disentangle the assumed components of COR abilities.
               Here, we focus on GEN-COR to distinguish between different groups
               of graduates from the three disciplines in the context of generic
               COR tasks. We present a computational model for educationally
               relevant texts that combines features at multiple levels (lexical,
               syntactic, semantic). We use machine learning to predict domain-specific
               group membership based on documents consulted during task solving.
               A major contribution of our analyses is a multi-part text classification
               system that contrasts human annotation and rating of the documents
               used with a semi-automatic classification to predict the document
               type of web pages. That is, we work with competing classifications
               to support our findings. In this way, we develop a computational
               linguistic model that correlates GEN-COR abilities with properties
               of documents consulted for solving the GEN-COR tasks. Results
               show that participants from different domains indeed inquire different
               sets of online sources for the same task. Machine learning-based
               classifications show that the distributional differences can be
               reproduced by computational linguistic models.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2022/04/On_latent_domain-specific_textual_preferences_in_solving_Internet-based_generic_tasks_among_graduates__young_professionals_from_three_domains.pdf}
}

Alexander Henlein and Alexander Mehler. 2022. What do Toothbrushes do in the Kitchen? How Transformers Think our World is Structured. Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 5791–5807.

BibTeX

@inproceedings{Henlein:Mehler:2022,
  title     = {What do Toothbrushes do in the Kitchen? How Transformers Think
               our World is Structured},
  author    = {Henlein, Alexander and Mehler, Alexander},
  booktitle = {Proceedings of the 2022 Conference of the North American Chapter
               of the Association for Computational Linguistics: Human Language
               Technologies},
  year      = {2022},
  address   = {Seattle, United States},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2022.naacl-main.425},
  doi       = {10.18653/v1/2022.naacl-main.425},
  pages     = {5791--5807},
  abstract  = {Transformer-based models are now predominant in NLP.They outperform
               approaches based on static models in many respects. This success
               has in turn prompted research that reveals a number of biases
               in the language models generated by transformers. In this paper
               we utilize this research on biases to investigate to what extent
               transformer-based language models allow for extracting knowledge
               about object relations (X occurs in Y; X consists of Z; action
               A involves using X).To this end, we compare contextualized models
               with their static counterparts. We make this comparison dependent
               on the application of a number of similarity measures and classifiers.
               Our results are threefold:Firstly, we show that the models combined
               with the different similarity measures differ greatly in terms
               of the amount of knowledge they allow for extracting. Secondly,
               our results suggest that similarity measures perform much worse
               than classifier-based approaches. Thirdly, we show that, surprisingly,
               static models perform almost as well as contextualized models
               {--} in some cases even better.}
}

Giuseppe Abrami, Mevlüt Bagci, Leon Hammerla and Alexander Mehler. 2022. German Parliamentary Corpus (GerParCor). Proceedings of the Language Resources and Evaluation Conference, 1900–1906.

BibTeX

@inproceedings{Abrami:Bagci:Hammerla:Mehler:2022,
  author    = {Abrami, Giuseppe and Bagci, Mevlüt and Hammerla, Leon and Mehler, Alexander},
  editor    = {Calzolari, Nicoletta and B\'echet, Fr\'ed\'eric and Blache, Philippe
               and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara
               and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H\'el\`ene
               and Odijk, Jan and Piperidis, Stelios},
  title     = {German Parliamentary Corpus (GerParCor)},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  year      = {2022},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {1900--1906},
  abstract  = {Parliamentary debates represent a large and partly unexploited
               treasure trove of publicly accessible texts. In the German-speaking
               area, there is a certain deficit of uniformly accessible and annotated
               corpora covering all German-speaking parliaments at the national
               and federal level. To address this gap, we introduce the German
               Parliamentary Corpus (GerParCor). GerParCor is a genre-specific
               corpus of (predominantly historical) German-language parliamentary
               protocols from three centuries and four countries, including state
               and federal level data. In addition, GerParCor contains conversions
               of scanned protocols and, in particular, of protocols in Fraktur
               converted via an OCR process based on Tesseract. All protocols
               were preprocessed by means of the NLP pipeline of spaCy3 and automatically
               annotated with metadata regarding their session date. GerParCor
               is made available in the XMI format of the UIMA project. In this
               way, GerParCor can be used as a large corpus of historical texts
               in the field of political communication for various tasks in NLP.},
  url       = {https://aclanthology.org/2022.lrec-1.202},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2022/06/GerParCor_LREC_2022.pdf},
  keywords  = {gerparcor},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.202.pdf}
}

Andy Lücking, Manuel Stoeckel, Giuseppe Abrami and Alexander Mehler. 2022. I still have Time(s): Extending HeidelTime for German Texts. Proceedings of the 13th Language Resources and Evaluation Conference.

BibTeX

@inproceedings{Luecking:Stoeckel:Abrami:Mehler:2022,
  author    = {L{\"u}cking, Andy and Stoeckel, Manuel and Abrami, Giuseppe and Mehler, Alexander},
  title     = {I still have Time(s): Extending {HeidelTime} for {German} Texts},
  booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference},
  series    = {LREC 2022},
  location  = {Marseille, France},
  year      = {2022},
  url       = {https://aclanthology.org/2022.lrec-1.505},
  pdf       = {https://aclanthology.org/2022.lrec-1.505.pdf}
}

2021

Mounika Marreddy, Subba Reddy Oota, Lakshmi Sireesha Vakada, Venkata Charan Chinni and Radhika Mamidi. 2021. Clickbait detection in telugu: Overcoming nlp challenges in resource-poor languages using benchmarked techniques. 2021 International Joint Conference on Neural Networks (IJCNN), 1–8.

BibTeX

@inproceedings{Marreddy:et:al:2011,
  title     = {Clickbait detection in telugu: Overcoming nlp challenges in resource-poor
               languages using benchmarked techniques},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  booktitle = {2021 International Joint Conference on Neural Networks (IJCNN)},
  pages     = {1--8},
  year      = {2021},
  organization = {IEEE},
  doi       = {10.1109/IJCNN52387.2021.9534382},
  url       = {https://ieeexplore.ieee.org/document/9534382},
  abstract  = {Clickbait headlines have become a nudge in social media and news
               websites. The methods to identify clickbaits are largely being
               developed for En- glish. There is a need for the same in other
               languages as well with the increase in the usage of social me-
               dia platforms in different languages. In this work, we present
               an annotated clickbait dataset of 112,657 headlines that can be
               used for building an automated clickbait detection system for
               Telugu, a resource-poor language. Our contribution in this paper
               includes (i) generation of the latest pre-trained language models,
               including RoBERTa, ALBERT, and ELECTRA trained on a large Telugu
               corpora of 8,015,588 sentences that we had collected, (ii) data
               analysis and benchmarking the performance of different approaches
               ranging from hand-crafted features to state-of-the-art models.
               We show that the pre-trained language models trained on Telugu
               outperform the existing pre-trained models viz. BERT-Mulingual-Case,
               XLM-MLM, and XLM-R on clickbait task. On a large Telugu clickbait
               dataset of 112,657 samples, the Light Gradient Boosted Machines
               (LGBM) model achieves an F1- score of 0.94 for clickbait headlines.
               For Non-Clickbait headlines, F1-score of 0.93 is obtained which
               is similar to that of Clickbait class. We open-source our dataset,
               pre-trained models, and code}
}

Jonathan Ginzburg and Andy Lücking. 2021. Requesting clarifications with speech and gestures. Proceedings of the 1st Workshop on Multimodal Semantic Representations, 21–31.

BibTeX

@inproceedings{Ginzburg:Luecking:2021-clarifications,
  title     = {Requesting clarifications with speech and gestures},
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  series    = {MMSR},
  year      = {2021},
  booktitle = {Proceedings of the 1st Workshop on Multimodal Semantic Representations},
  location  = {Groningen, Netherlands (Online)},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2021.mmsr-1.3},
  pdf       = {https://aclanthology.org/2021.mmsr-1.3.pdf},
  pages     = {21--31},
  abstract  = {In multimodal natural language interaction both speech and non-speech
               gestures are involved in the basic mechanism of grounding and
               repair. We discuss a couple of multimodal clarification requests
               and argue that gestures, as well as speech expressions, underlie
               comparable parallelism constraints. In order to make this precise,
               we slightly extend the formal dialogue framework KoS to cover
               also gestural counterparts of verbal locutionary propositions.}
}

Maxim Konca, Alexander Mehler, Daniel Baumartz and Wahed Hemati. 2021. From distinguishability to informativity. A quantitative text model for detecting random texts.. Language and Text: Data, models, information and applications, 356:145–162.

BibTeX

@article{Konca:et:al:2021,
  title     = {From distinguishability to informativity. A quantitative text
               model for detecting random texts.},
  author    = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
  journal   = {Language and Text: Data, models, information and applications},
  volume    = {356},
  pages     = {145--162},
  year      = {2021},
  editor    = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
  publisher = {John Benjamins Publishing Company},
  doi       = {10.1075/cilt.356.10kon}
}

Tatiana Lokot, Olga Abramov and Alexander Mehler. November, 2021. On the asymptotic behavior of the average geodesic distance L and the compactness CB of simple connected undirected graphs whose order approaches infinity. PLOS ONE, 16(11):1–13.

BibTeX

@article{Lokot:Abramov:Mehler:2021,
  doi       = {10.1371/journal.pone.0259776},
  author    = {Lokot, Tatiana and Abramov, Olga and Mehler, Alexander},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {On the asymptotic behavior of the average geodesic distance L
               and the compactness CB of simple connected undirected graphs whose
               order approaches infinity},
  year      = {2021},
  month     = {11},
  volume    = {16},
  url       = {https://doi.org/10.1371/journal.pone.0259776},
  pages     = {1-13},
  abstract  = {The average geodesic distance L Newman (2003) and the compactness
               CB Botafogo (1992) are important graph indices in applications
               of complex network theory to real-world problems. Here, for simple
               connected undirected graphs G of order n, we study the behavior
               of L(G) and CB(G), subject to the condition that their order |V(G)|
               approaches infinity. We prove that the limit of L(G)/n and CB(G)
               lies within the interval [0;1/3] and [2/3;1], respectively. Moreover,
               for any not necessarily rational number β ∈ [0;1/3] (α ∈ [2/3;1])
               we show how to construct the sequence of graphs {G}, |V(G)| =
               n → ∞, for which the limit of L(G)/n (CB(G)) is exactly β (α)
               (Theorems 1 and 2). Based on these results, our work points to
               novel classification possibilities of graphs at the node level
               as well as to the information-theoretic classification of the
               structural complexity of graph indices.},
  number    = {11}
}

Alexander Mehler, Daniel Baumartz and Tolga Uslu. 2021. SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).

BibTeX

@inproceedings{Mehler:Uslu:Baumartz:2021,
  author    = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
  title     = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
  series    = {QUALICO 2021},
  location  = {Tokyo, Japan},
  year      = {2021},
  poster    = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}

Andy Lücking, Jonathan Ginzburg and Robin Cooper. 2021. Grammar in dialogue. Head Driven Phrase Structure Grammar: The handbook, 1155–1199.

BibTeX

@incollection{Luecking:Ginzburg:Cooper:2021,
  author    = {L\"{u}cking, Andy and Ginzburg, Jonathan and Cooper, Robin},
  title     = {Grammar in dialogue},
  chapter   = {26},
  pages     = {1155-1199},
  url       = {https://langsci-press.org/catalog/book/259},
  editor    = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
               and Koenig, Jean-Pierre},
  booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
  year      = {2021},
  series    = {Empirically Oriented Theoretical Morphology and
                  Syntax},
  number    = {9},
  address   = {Berlin},
  publisher = {Language Science Press},
  doi       = {10.5281/zenodo.5543318}
}

Andy Lücking. 2021. Gesture. Head Driven Phrase Structure Grammar: The handbook, 1201–1250.

BibTeX

@incollection{Luecking:2021,
  author    = {L\"{u}cking, Andy},
  title     = {Gesture},
  pages     = {1201-1250},
  chapter   = {27},
  url       = {https://langsci-press.org/catalog/book/259},
  editor    = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
               and Koenig, Jean-Pierre},
  booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
  year      = {2021},
  series    = {Empirically Oriented Theoretical Morphology and
                  Syntax},
  number    = {9},
  address   = {Berlin},
  publisher = {Language Science Press},
  doi       = {10.5281/zenodo.5543318}
}

Andy Lücking and Jonathan Ginzburg. 2021. Saying and shaking `No'. Proceedings of the 28th International Conference on Head-Driven Phrase Structure Grammar, Online (Frankfurt/Main), 283–299.

BibTeX

@inproceedings{Luecking:Ginzburg:2021:a,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Saying and shaking `No'},
  booktitle = {{Proceedings of the 28th International Conference on Head-Driven
               Phrase Structure Grammar, Online (Frankfurt/Main)}},
  editor    = {M{\"u}ller, Stefan and Melnik, Nurit},
  issn      = {1535-1793},
  doi       = {10.21248/hpsg.2021.15},
  publisher = {University Library},
  address   = {Frankfurt/Main},
  pages     = {283--299},
  year      = {2021}
}

Jonathan Ginzburg and Andy Lücking. 2021. I thought pointing is rude: A dialogue-semantic analysis of pointing at the addressee. Proceedings of Sinn und Bedeutung 25, 276–291. Special Session: Gestures and Natural Language Semantics.

BibTeX

@inproceedings{Ginzburg:Luecking:2021:b,
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  title     = {I thought pointing is rude: {A} dialogue-semantic analysis of
               pointing at the addressee},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung 25}},
  series    = {SuB 25},
  year      = {2021},
  pages     = {276-291},
  editor    = {Grosz, Patrick and Mart{\'i}, Luisa and Pearson, Hazel and Sudo, Yasutada
               and Zobel, Sarah},
  note      = {Special Session: Gestures and Natural Language
                  Semantics},
  location  = {University College London (Online)},
  url       = {https://ojs.ub.uni-konstanz.de/sub/index.php/sub/article/view/937}
}

Alexander Henlein, Giuseppe Abrami, Attila Kett, Christian Spiekermann and Alexander Mehler. 2021. Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine. Remote Learning in Times of Pandemic - Issues, Implications and Best Practice.

BibTeX

@incollection{Henlein:et:al:2021,
  author    = {Alexander Henlein and Giuseppe Abrami and Attila Kett and Christian Spiekermann
               and Alexander Mehler},
  title     = {Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine},
  editor    = {Linda Daniela and Anna Visvizin},
  booktitle = {Remote Learning in Times of Pandemic - Issues, Implications and Best Practice},
  publisher = {Routledge},
  address   = {Thames, Oxfordshire, England, UK},
  year      = {2021},
  chapter   = {3}
}

Andy Lücking, Christine Driller, Manuel Stoeckel, Giuseppe Abrami, Adrian Pachzelt and Alexander Mehler. 2021. Multiple Annotation for Biodiversity: Developing an annotation framework among biology, linguistics and text technology. Language Resources and Evaluation.

BibTeX

@article{Luecking:et:al:2021,
  author    = {Andy Lücking and Christine Driller and Manuel Stoeckel and Giuseppe Abrami
               and Adrian Pachzelt and Alexander Mehler},
  year      = {2021},
  journal   = {Language Resources and Evaluation},
  title     = {Multiple Annotation for Biodiversity: Developing an annotation
               framework among biology, linguistics and text technology},
  editor    = {Nancy Ide and Nicoletta Calzolari},
  doi       = {10.1007/s10579-021-09553-5},
  pdf       = {https://link.springer.com/content/pdf/10.1007/s10579-021-09553-5.pdf},
  keywords  = {biofid}
}

Pascal Fischer, Alen Smajic, Giuseppe Abrami and Alexander Mehler. 2021. Multi-Type-TD-TSR - Extracting Tables from Document Images using a Multi-stage Pipeline for Table Detection and Table Structure Recognition: from OCR to Structured Table Representations. Proceedings of the 44th German Conference on Artificial Intelligence.

BibTeX

@inproceedings{Fischer:et:al:2021,
  author    = {Fischer, Pascal and Smajic, Alen and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Multi-Type-TD-TSR - Extracting Tables from Document Images using
               a Multi-stage Pipeline for Table Detection and Table Structure
               Recognition: from OCR to Structured Table Representations},
  booktitle = {Proceedings of the 44th German Conference on Artificial Intelligence},
  series    = {KI2021},
  location  = {Berlin, Germany},
  year      = {2021},
  url       = {https://www.springerprofessional.de/multi-type-td-tsr-extracting-tables-from-document-images-using-a/19711570},
  pdf       = {https://arxiv.org/pdf/2105.11021.pdf}
}

Mark Klement, Alexander Henlein and Alexander Mehler. June, 2021. VoxML Annotation Tool Review and Suggestions for Improvement. Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-17, Note for special track on visual information annotation).

BibTeX

@inproceedings{Klement:et:al:2021,
  author    = {Klement, Mark and Henlein, Alexander and Mehler, Alexander},
  title     = {VoxML Annotation Tool Review and Suggestions for Improvement},
  booktitle = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation (ISA-17, Note for special track on visual
               information annotation)},
  series    = {ISA-17},
  location  = {Groningen, Netherlands},
  month     = {June},
  year      = {2021},
  pdf       = {https://sigsem.uvt.nl/isa17/32_Klement-Paper.pdf}
}

Giuseppe Abrami, Alexander Henlein, Andy Lücking, Attila Kett, Pascal Adeberg and Alexander Mehler. June, 2021. Unleashing annotations with TextAnnotator: Multimedia, multi-perspective document views for ubiquitous annotation. Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable Semantic Annotation, 65–75.

BibTeX

@inproceedings{Abrami:et:al:2021,
  author    = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila
               and Adeberg, Pascal and Mehler, Alexander},
  title     = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective
               document views for ubiquitous annotation},
  booktitle = {Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation},
  series    = {ISA-17},
  publisher = {Association for Computational Linguistics},
  address   = {Groningen, The Netherlands (online)},
  month     = {June},
  editor    = {Bunt, Harry},
  year      = {2021},
  url       = {https://aclanthology.org/2021.isa-1.7},
  pages     = {65--75},
  keywords  = {textannotator, biofid},
  pdf       = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf},
  abstract  = {We argue that mainly due to technical innovation in the landscape
               of annotation tools, a conceptual change in annotation models
               and processes is also on the horizon. It is diagnosed that these
               changes are bound up with multi-media and multi-perspective facilities
               of annotation tools, in particular when considering virtual reality
               (VR) and augmented reality (AR) applications, their potential
               ubiquitous use, and the exploitation of externally trained natural
               language pre-processing methods. Such developments potentially
               lead to a dynamic and exploratory heuristic construction of the
               annotation process. With TextAnnotator an annotation suite is
               introduced which focuses on multi-mediality and multi-perspectivity
               with an interoperable set of task-specific annotation modules
               (e.g., for word classification, rhetorical structures, dependency
               trees, semantic roles, and more) and their linkage to VR and mobile
               implementations. The basic architecture and usage of TextAnnotator
               is described and related to the above mentioned shifts in the
               field.}
}

Andy Lücking, Sebastian Brückner, Giuseppe Abrami, Tolga Uslu and Alexander Mehler. 2021. Computational linguistic assessment of textbooks and online texts by means of threshold concepts in economics. Frontiers in Education.

BibTeX

@article{Luecking:Brueckner:Abrami:Uslu:Mehler:2021,
  journal   = {Frontiers in Education},
  doi       = {10.3389/feduc.2020.578475},
  title     = {Computational linguistic assessment of textbooks and online texts
               by means of threshold concepts in economics},
  author    = {L{\"u}cking, Andy and Br{\"u}ckner, Sebastian and Abrami, Giuseppe
               and Uslu, Tolga and Mehler, Alexander},
  eid       = {578475},
  url       = {https://www.frontiersin.org/articles/10.3389/feduc.2020.578475/},
  year      = {2021}
}

2020

Arati Paul, Bhuvanesh Verma and Debasish Chakraborty. 2020. Estimating electrification using multi-temporal DMSP/OLS night imagery as proxy measure of human well-being in India. Spatial Information Research, 28:469–473.

BibTeX

@article{Paul:et:al:2020,
  title     = {Estimating electrification using multi-temporal DMSP/OLS night
               imagery as proxy measure of human well-being in India},
  author    = {Paul, Arati and Verma, Bhuvanesh and Chakraborty, Debasish},
  journal   = {Spatial Information Research},
  volume    = {28},
  issn      = {2366-3294},
  pages     = {469--473},
  year      = {2020},
  url       = {http://dx.doi.org/10.1007/s41324-019-00307-8},
  doi       = {10.1007/s41324-019-00307-8},
  publisher = {Springer}
}

Tolga Uslu. 2020. PhD Thesis: Multi-document analysis : semantic analysis of large text corpora beyond topic modeling.

BibTeX

@phdthesis{Uslu:2020,
  author    = {Tolga Uslu},
  title     = {Multi-document analysis : semantic analysis of large text corpora
               beyond topic modeling},
  pages     = {204},
  year      = {2020},
  url       = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56140},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/56140/Dissertation_Tolga_Uslu.pdf}
}

Wahed Hemati. 2020. PhD Thesis: TextImager-VSD : large scale verb sense disambiguation and named entity recognition in the context of TextImager.

BibTeX

@phdthesis{Hemati:2020,
  author    = {Wahed Hemati},
  title     = {TextImager-VSD : large scale verb sense disambiguation and named
               entity recognition in the context of TextImager},
  pages     = {174},
  year      = {2020},
  url       = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56089},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/56089/dissertation_Wahed_Hemati.pdf}
}

Alexander Mehler, Wahed Hemati, Pascal Welke, Maxim Konca and Tolga Uslu. 2020. Multiple Texts as a Limiting Factor in Online Learning: Quantifying (Dis-)similarities of Knowledge Networks. Frontiers in Education, 5:206.

BibTeX

@article{Mehler:Hemati:Welke:Konca:Uslu:2020,
  abstract  = {We test the hypothesis that the extent to which one obtains information
               on a given topic through Wikipedia depends on the language in
               which it is consulted. Controlling the size factor, we investigate
               this hypothesis for a number of 25 subject areas. Since Wikipedia
               is a central part of the web-based information landscape, this
               indicates a language-related, linguistic bias. The article therefore
               deals with the question of whether Wikipedia exhibits this kind
               of linguistic relativity or not. From the perspective of educational
               science, the article develops a computational model of the information
               landscape from which multiple texts are drawn as typical input
               of web-based reading. For this purpose, it develops a hybrid model
               of intra- and intertextual similarity of different parts of the
               information landscape and tests this model on the example of 35
               languages and corresponding Wikipedias. In the way it measures
               the similarities of hypertexts, the article goes beyond existing
               approaches by examining their structural and semantic aspects
               intra- and intertextually. In this way it builds a bridge between
               reading research, educational science, Wikipedia research and
               computational linguistics.},
  author    = {Mehler, Alexander and Hemati, Wahed and Welke, Pascal and Konca, Maxim
               and Uslu, Tolga},
  doi       = {10.3389/feduc.2020.562670},
  issn      = {2504-284X},
  journal   = {Frontiers in Education},
  pages     = {206},
  title     = {Multiple Texts as a Limiting Factor in Online Learning: Quantifying
               (Dis-)similarities of Knowledge Networks},
  url       = {https://www.frontiersin.org/article/10.3389/feduc.2020.562670},
  pdf       = {https://www.frontiersin.org/articles/10.3389/feduc.2020.562670/pdf},
  volume    = {5},
  year      = {2020}
}

Andy Lücking, Sebastian Brückner, Giuseppe Abrami, Tolga Uslu and Alexander Mehler. 2020. Computational linguistic assessment of textbook and online learning media by means of threshold concepts in business education. CoRR, abs/2008.02096.

BibTeX

@article{Luecking:et:al:2020,
  author    = {Andy L{\"{u}}cking and Sebastian Br{\"{u}}ckner and Giuseppe Abrami
               and Tolga Uslu and Alexander Mehler},
  title     = {Computational linguistic assessment of textbook and online learning
               media by means of threshold concepts in business education},
  journal   = {CoRR},
  volume    = {abs/2008.02096},
  year      = {2020},
  url       = {https://arxiv.org/abs/2008.02096},
  archiveprefix = {arXiv},
  eprint    = {2008.02096},
  timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2008-02096.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

Daniel Baumartz. June, 2020. BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle Studie.

BibTeX

@bathesis{Baumartz:2020,
  author    = {Baumartz, Daniel},
  title     = {{Automatic Topic Modeling in the Context of Digital Libraries:
               Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
               experimentelle Studie}},
  year      = {2020},
  month     = {6},
  school    = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
  address   = {Frankfurt, Germany},
  url       = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}

Christine Driller, Markus Koch, Giuseppe Abrami, Wahed Hemati, Andy Lücking, Alexander Mehler, Adrian Pachzelt and Gerwin Kasperek. 2020. Fast and Easy Access to Central European Biodiversity Data with BIOfid. Biodiversity Information Science and Standards, 4:e59157.

BibTeX

@article{Driller:et:al:2020,
  author    = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati
               and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek},
  title     = {Fast and Easy Access to Central European Biodiversity Data with BIOfid},
  volume    = {4},
  number    = {},
  year      = {2020},
  doi       = {10.3897/biss.4.59157},
  publisher = {Pensoft Publishers},
  abstract  = {The storage of data in public repositories such as the Global
               Biodiversity Information Facility (GBIF) or the National Center
               for Biotechnology Information (NCBI) is nowadays stipulated in
               the policies of many publishers in order to facilitate data replication
               or proliferation. Species occurrence records contained in legacy
               printed literature are no exception to this. The extent of their
               digital and machine-readable availability, however, is still far
               from matching the existing data volume (Thessen and Parr 2014).
               But precisely these data are becoming more and more relevant to
               the investigation of ongoing loss of biodiversity. In order to
               extract species occurrence records at a larger scale from available
               publications, one has to apply specialised text mining tools.
               However, such tools are in short supply especially for scientific
               literature in the German language.The Specialised Information
               Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims
               at reducing this desideratum, inter alia, by preparing a searchable
               text corpus semantically enriched by a new kind of multi-label
               annotation. For this purpose, we feed manual annotations into
               automatic, machine-learning annotators. This mixture of automatic
               and manual methods is needed, because BIOfid approaches a new
               application area with respect to language (mainly German of the
               19th century), text type (biological reports), and linguistic
               focus (technical and everyday language).We will present current
               results of the performance of BIOfid’s semantic search engine
               and the application of independent natural language processing
               (NLP) tools. Most of these are freely available online, such as
               TextImager (Hemati et al. 2016). We will show how TextImager is
               tied into the BIOfid pipeline and how it is made scalable (e.g.
               extendible by further modules) and usable on different systems
               (docker containers).Further, we will provide a short introduction
               to generating machine-learning training data using TextAnnotator
               (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility
               can be assessed by the implementation of inter-annotator agreement
               methods (Abrami et al. 2020). Beyond taxon recognition and entity
               linking, we place particular emphasis on location and time information.
               For this purpose, our annotation tag-set combines general categories
               and biology-specific categories (including taxonomic names) with
               location and time ontologies. The application of the annotation
               categories is regimented by annotation guidelines (Lücking et
               al. 2020). Within the next years, our work deliverable will be
               a semantically accessible and data-extractable text corpus of
               around two million pages. In this way, BIOfid is creating a new
               valuable resource that expands our knowledge of biodiversity and
               its determinants.},
  issn      = {},
  pages     = {e59157},
  url       = {https://doi.org/10.3897/biss.4.59157},
  eprint    = {https://doi.org/10.3897/biss.4.59157},
  journal   = {Biodiversity Information Science and Standards},
  keywords  = {biofid}
}

Jonathan Ginzburg and Andy Lücking. 2020. On Laughter and Forgetting and Reconversing: A neurologically-inspired model of conversational context. Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Ginzburg:Luecking:2020:a,
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  title     = {On Laughter and Forgetting and Reconversing: {A} neurologically-inspired
               model of conversational context},
  booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
  series    = {SemDial/WatchDial},
  year      = {2020},
  location  = {Brandeis University, Waltham, New Jersey (Online)},
  url       = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3008/},
  pdf       = {http://semdial.org/anthology/Z20-Ginzburg_semdial_0008.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2020. Towards the score of communication. Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Luecking:Ginzburg:2020,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Towards the score of communication},
  booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
  series    = {SemDial/WatchDial},
  year      = {2020},
  location  = {Brandeis University, Waltham, New Jersey (Online)},
  url       = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3016/},
  pdf       = {http://semdial.org/anthology/Z20-Luecking_semdial_0016.pdf}
}

Giuseppe Abrami, Alexander Mehler and Manuel Stoeckel. 2020. TextAnnotator: A web-based annotation suite for texts. Proceedings of the Digital Humanities 2020.

BibTeX

@inproceedings{Abrami:Mehler:Stoeckel:2020,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
  title     = {{TextAnnotator}: A web-based annotation suite for texts},
  booktitle = {Proceedings of the Digital Humanities 2020},
  series    = {DH 2020},
  location  = {Ottawa, Canada},
  year      = {2020},
  url       = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html},
  doi       = {http://dx.doi.org/10.17613/tenm-4907},
  abstract  = {The TextAnnotator is a tool for simultaneous and collaborative
               annotation of texts with visual annotation support, integration
               of knowledge bases and, by pipelining the TextImager, a rich variety
               of pre-processing and automatic annotation tools. It includes
               a variety of modules for the annotation of texts, which contains
               the annotation of argumentative, rhetorical, propositional and
               temporal structures as well as a module for named entity linking
               and rapid annotation of named entities. Especially the modules
               for annotation of temporal, argumentative and propositional structures
               are currently unique in web-based annotation tools. The TextAnnotator,
               which allows the annotation of texts as a platform, is divided
               into a front- and a backend component. The backend is a web service
               based on WebSockets, which integrates the UIMA Database Interface
               to manage and use texts. Texts are made accessible by using the
               ResourceManager and the AuthorityManager, based on user and group
               access permissions. Different views of a document can be created
               and used depending on the scenario. Once a document has been opened,
               access is gained to the annotations stored within annotation views
               in which these are organized. Any annotation view can be assigned
               with access permissions and by default, each user obtains his
               or her own user view for every annotated document. In addition,
               with sufficient access permissions, all annotation views can also
               be used and curated. This allows the possibility to calculate
               an Inter-Annotator-Agreement for a document, which shows an agreement
               between the annotators. Annotators without sufficient rights cannot
               display this value so that the annotators do not influence each
               other. This contribution is intended to reflect the current state
               of development of TextAnnotator, demonstrate the possibilities
               of an instantaneous Inter-Annotator-Agreement and trigger a discussion
               about further functions for the community.},
  keywords  = {textannotator, biofid},
  poster    = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf}
}

Giuseppe Abrami, Manuel Stoeckel and Alexander Mehler. 2020. TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts. Proceedings of The 12th Language Resources and Evaluation Conference, 891–900.

BibTeX

@inproceedings{Abrami:Stoeckel:Mehler:2020,
  author    = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
  title     = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
               Annotation of Texts},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {891--900},
  isbn      = {979-10-95546-34-4},
  abstract  = {The annotation of texts and other material in the field of digital
               humanities and Natural Language Processing (NLP) is a common task
               of research projects. At the same time, the annotation of corpora
               is certainly the most time- and cost-intensive component in research
               projects and often requires a high level of expertise according
               to the research interest. However, for the annotation of texts,
               a wide range of tools is available, both for automatic and manual
               annotation. Since the automatic pre-processing methods are not
               error-free and there is an increasing demand for the generation
               of training data, also with regard to machine learning, suitable
               annotation tools are required. This paper defines criteria of
               flexibility and efficiency of complex annotations for the assessment
               of existing annotation tools. To extend this list of tools, the
               paper describes TextAnnotator, a browser-based, multi-annotation
               system, which has been developed to perform platform-independent
               multimodal annotations and annotate complex textual structures.
               The paper illustrates the current state of development of TextAnnotator
               and demonstrates its ability to evaluate annotation quality (inter-annotator
               agreement) at runtime. In addition, it will be shown how annotations
               of different users can be performed simultaneously and collaboratively
               on the same document from different platforms using UIMA as the
               basis for annotation.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.112},
  keywords  = {textannotator, biofid},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}

Giuseppe Abrami, Alexander Henlein, Attila Kett and Alexander Mehler. 2020. Text2SceneVR: Generating Hypertexts with VAnnotatoR as a Pre-processing Step for Text2Scene Systems. Proceedings of the 31st ACM Conference on Hypertext and Social Media, 177–186.

BibTeX

@inproceedings{Abrami:Henlein:Kett:Mehler:2020,
  author    = {Abrami, Giuseppe and Henlein, Alexander and Kett, Attila and Mehler, Alexander},
  title     = {{Text2SceneVR}: Generating Hypertexts with VAnnotatoR as a Pre-processing
               Step for Text2Scene Systems},
  booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media},
  series    = {HT ’20},
  year      = {2020},
  location  = {Virtual Event, USA},
  isbn      = {9781450370981},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3372923.3404791},
  doi       = {10.1145/3372923.3404791},
  pages     = {177–186},
  numpages  = {10},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3372923.3404791}
}

Manuel Stoeckel, Alexander Henlein, Wahed Hemati and Alexander Mehler. May, 2020. Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin. Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages, 130–135.

BibTeX

@inproceedings{Stoeckel:et:al:2020,
  author    = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander},
  title     = {{Voting for POS tagging of Latin texts: Using the flair of FLAIR
               to better Ensemble Classifiers by Example of Latin}},
  booktitle = {Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies
               for Historical and Ancient Languages},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association (ELRA)},
  pages     = {130--135},
  abstract  = {Despite the great importance of the Latin language in the past,
               there are relatively few resources available today to develop
               modern NLP tools for this language. Therefore, the EvaLatin Shared
               Task for Lemmatization and Part-of-Speech (POS) tagging was published
               in the LT4HALA workshop. In our work, we dealt with the second
               EvaLatin task, that is, POS tagging. Since most of the available
               Latin word embeddings were trained on either few or inaccurate
               data, we trained several embeddings on better data in the first
               step. Based on these embeddings, we trained several state-of-the-art
               taggers and used them as input for an ensemble classifier called
               LSTMVoter. We were able to achieve the best results for both the
               cross-genre and the cross-time task (90.64\% and 87.00\%) without
               using additional annotated data (closed modality). In the meantime,
               we further improved the system and achieved even better results
               (96.91\% on classical, 90.87\% on cross-genre and 87.35\% on cross-time).},
  url       = {https://www.aclweb.org/anthology/2020.lt4hala-1.21},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/LT4HALA/pdf/2020.lt4hala-1.21.pdf}
}

Alexander Mehler, Bernhard Jussen, Tim Geelhaar, Alexander Henlein, Giuseppe Abrami, Daniel Baumartz, Tolga Uslu and Wahed Hemati. 2020. The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs. Studi e Saggi Linguistici, 58(1):121–155.

BibTeX

@article{Mehler:et:al:2020b,
  author    = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
               and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
  title     = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
               Word Embeddings to SemioGraphs}},
  journal   = {Studi e Saggi Linguistici},
  doi       = {10.4454/ssl.v58i1.276},
  year      = {2020},
  volume    = {58},
  number    = {1},
  pages     = {121--155},
  abstract  = {In this article we present the Frankfurt Latin Lexicon (FLL),
               a lexical resource for Medieval Latin that is used both for the
               lemmatization of Latin texts and for the post-editing of lemmatizations.
               We describe recent advances in the development of lemmatizers
               and test them against the Capitularies corpus (comprising Frankish
               royal edicts, mid-6th to mid-9th century), a corpus created as
               a reference for processing Medieval Latin. We also consider the
               post-correction of lemmatizations using a limited crowdsourcing
               process aimed at continuous review and updating of the FLL. Starting
               from the texts resulting from this lemmatization process, we describe
               the extension of the FLL by means of word embeddings, whose interactive
               traversing by means of SemioGraphs completes the digital enhanced
               hermeneutic circle. In this way, the article argues for a more
               comprehensive understanding of lemmatization, encompassing classical
               machine learning as well as intellectual post-corrections and,
               in particular, human computation in the form of interpretation
               processes based on graph representations of the underlying lexical
               resources.},
  url       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
  pdf       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}

Alexander Henlein, Giuseppe Abrami, Attila Kett and Alexander Mehler. May, 2020. Transfer of ISOSpace into a 3D Environment for Annotations and Applications. Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable Semantic Annotation, 32–35.

BibTeX

@inproceedings{Henlein:et:al:2020,
  author    = {Henlein, Alexander and Abrami, Giuseppe and Kett, Attila and Mehler, Alexander},
  title     = {Transfer of ISOSpace into a 3D Environment for Annotations and Applications},
  booktitle = {Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation},
  month     = {May},
  year      = {2020},
  address   = {Marseille},
  publisher = {European Language Resources Association},
  pages     = {32--35},
  abstract  = {People's visual perception is very pronounced and therefore it
               is usually no problem for them to describe the space around them
               in words. Conversely, people also have no problems imagining a
               concept of a described space. In recent years many efforts have
               been made to develop a linguistic concept for spatial and spatial-temporal
               relations. However, the systems have not really caught on so far,
               which in our opinion is due to the complex models on which they
               are based and the lack of available training data and automated
               taggers. In this paper we describe a project to support spatial
               annotation, which could facilitate annotation by its many functions,
               but also enrich it with many more information. This is to be achieved
               by an extension by means of a VR environment, with which spatial
               relations can be better visualized and connected with real objects.
               And we want to use the available data to develop a new state-of-the-art
               tagger and thus lay the foundation for future systems such as
               improved text understanding for Text2Scene.},
  url       = {https://www.aclweb.org/anthology/2020.isa-1.4},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/ISA16/pdf/2020.isa-1.4.pdf}
}

Jonathan Hildebrand, Wahed Hemati and Alexander Mehler. May, 2020. Recognizing Sentence-level Logical Document Structures with the Help of Context-free Grammars. Proceedings of The 12th Language Resources and Evaluation Conference, 5282–5290.

BibTeX

@inproceedings{Hildebrand:Hemati:Mehler:2020,
  author    = {Hildebrand, Jonathan and Hemati, Wahed and Mehler, Alexander},
  title     = {Recognizing Sentence-level Logical Document Structures with the
               Help of Context-free Grammars},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {5282--5290},
  abstract  = {Current sentence boundary detectors split documents into sequentially
               ordered sentences by detecting their beginnings and ends. Sentences,
               however, are more deeply structured even on this side of constituent
               and dependency structure: they can consist of a main sentence
               and several subordinate clauses as well as further segments (e.g.
               inserts in parentheses); they can even recursively embed whole
               sentences and then contain multiple sentence beginnings and ends.
               In this paper, we introduce a tool that segments sentences into
               tree structures to detect this type of recursive structure. To
               this end, we retrain different constituency parsers with the help
               of modified training data to transform them into sentence segmenters.
               With these segmenters, documents are mapped to sequences of sentence-related
               “logical document structures”. The resulting segmenters aim to
               improve downstream tasks by providing additional structural information.
               In this context, we experiment with German dependency parsing.
               We show that for certain sentence categories, which can be determined
               automatically, improvements in German dependency parsing can be
               achieved using our segmenter for preprocessing. The assumption
               suggests that improvements in other languages and tasks can be
               achieved.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.650},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.650.pdf}
}

Alexander Henlein and Alexander Mehler. May, 2020. On the Influence of Coreference Resolution on Word Embeddings in Lexical-semantic Evaluation Tasks. Proceedings of The 12th Language Resources and Evaluation Conference, 27–33.

BibTeX

@inproceedings{Henlein:Mehler:2020,
  author    = {Henlein, Alexander and Mehler, Alexander},
  title     = {{On the Influence of Coreference Resolution on Word Embeddings
               in Lexical-semantic Evaluation Tasks}},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {27--33},
  abstract  = {Coreference resolution (CR) aims to find all spans of a text that
               refer to the same entity. The F1-Scores on these task have been
               greatly improved by new developed End2End-approaches and transformer
               networks. The inclusion of CR as a pre-processing step is expected
               to lead to improvements in downstream tasks. The paper examines
               this effect with respect to word embeddings. That is, we analyze
               the effects of CR on six different embedding methods and evaluate
               them in the context of seven lexical-semantic evaluation tasks
               and instantiation/hypernymy detection. Especially in the last
               tasks we hoped for a significant increase in performance. We show
               that all word embedding approaches do not benefit significantly
               from pronoun substitution. The measurable improvements are only
               marginal (around 0.5\% in most test cases). We explain this result
               with the loss of contextual information, reduction of the relative
               occurrence of rare words and the lack of pronouns to be replaced.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.4},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.4.pdf}
}

Alexander Mehler, Rüdiger Gleim, Regina Gaitsch, Tolga Uslu and Wahed Hemati. 2020. From Topic Networks to Distributed Cognitive Maps: Zipfian Topic Universes in the Area of Volunteered Geographic Information. Complexity, 4:1–47.

BibTeX

@article{Mehler:Gleim:Gaitsch:Uslu:Hemati:2020,
  author    = {Alexander Mehler and R{\"{u}}diger Gleim and Regina Gaitsch and Tolga Uslu
               and Wahed Hemati},
  title     = {From Topic Networks to Distributed Cognitive Maps: {Zipfian} Topic
               Universes in the Area of Volunteered Geographic Information},
  journal   = {Complexity},
  volume    = {4},
  doi       = {10.1155/2020/4607025},
  pages     = {1-47},
  issuetitle = {Cognitive Network Science: A New Frontier},
  year      = {2020}
}

Vincent Kühn, Giuseppe Abrami and Alexander Mehler. 2020. WikNectVR: A Gesture-Based Approach for Interacting in Virtual Reality Based on WikNect and Gestural Writing. Virtual, Augmented and Mixed Reality. Design and Interaction - 12th International Conference, VAMR 2020, Held as Part of the 22nd HCI International Conference, HCII 2020, Copenhagen, Denmark, July 19-24, 2020, Proceedings, Part I, 12190:299–312.

BibTeX

@inproceedings{Kuehn:Abrami:Mehler:2020,
  author    = {Vincent K{\"{u}}hn and Giuseppe Abrami and Alexander Mehler},
  editor    = {Jessie Y. C. Chen and Gino Fragomeni},
  title     = {WikNectVR: {A} Gesture-Based Approach for Interacting in Virtual
               Reality Based on WikNect and Gestural Writing},
  booktitle = {Virtual, Augmented and Mixed Reality. Design and Interaction -
               12th International Conference, {VAMR} 2020, Held as Part of the
               22nd {HCI} International Conference, {HCII} 2020, Copenhagen,
               Denmark, July 19-24, 2020, Proceedings, Part {I}},
  series    = {Lecture Notes in Computer Science},
  volume    = {12190},
  pages     = {299--312},
  publisher = {Springer},
  year      = {2020},
  url       = {https://doi.org/10.1007/978-3-030-49695-1_20},
  doi       = {10.1007/978-3-030-49695-1_20},
  timestamp = {Tue, 14 Jul 2020 10:55:57 +0200},
  biburl    = {https://dblp.org/rec/conf/hci/KuhnAM20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

Giuseppe Abrami, Alexander Mehler, Christian Spiekermann, Attila Kett, Simon Lööck and Lukas Schwarz. 2020. Educational Technologies in the area of ubiquitous historical computing in virtual reality. In: New Perspectives on Virtual and Augmented Reality: Finding New Ways to Teach in a Transformed Learning Environment. Ed. by Linda Daniela. Taylor & Francis.

BibTeX

@inbook{Abrami:et:al:2020,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian
               and Kett, Attila and L{\"o}{\"o}ck, Simon and Schwarz, Lukas},
  editor    = {Daniela, Linda},
  title     = {Educational Technologies in the area of ubiquitous historical
               computing in virtual reality},
  booktitle = {New Perspectives on Virtual and Augmented Reality: Finding New
               Ways to Teach in a Transformed Learning Environment},
  year      = {2020},
  publisher = {Taylor \& Francis},
  abstract  = {At ever shorter intervals, new technologies are being developed
               that are opening up more and more areas of application. This regards,
               for example, Virtual Reality (VR) and Augmented Reality (AR) devices.
               In addition to the private sector, the public and education sectors,
               which already make intensive use of these devices, benefit from
               these technologies. However, especially in the field of historical
               education, there are not many frameworks for generating immersive
               virtual environments that can be used flexibly enough. This chapter
               addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile
               framework for the creation and use of virtual environments that
               serve to model historical processes in historical education. The
               paper describes the building blocks of VAnnotatoR and describes
               applications in historical education.},
  isbn      = {978-0-367-43211-9},
  url       = {https://www.routledge.com/New-Perspectives-on-Virtual-and-Augmented-Reality-Finding-New-Ways-to-Teach/Daniela/p/book/9780367432119}
}

Christian Stegbauer and Alexander Mehler. 2020. Ursachen der Entstehung von ubiquitären Zentrum-Peripheriestrukturen und ihre Folgen. Soziale Welt – Zeitschrift für sozialwissenschaftliche Forschung und Praxis (SozW), Sonderband 23:265–284.

BibTeX

@article{Stegbauer:Mehler:2020,
  author    = {Christian Stegbauer and Alexander Mehler},
  title     = {Ursachen der Entstehung von ubiquit{\"{a}}ren Zentrum-Peripheriestrukturen
               und ihre Folgen},
  journal   = {Soziale Welt -- Zeitschrift f\"{u}r sozialwissenschaftliche Forschung und Praxis (SozW)},
  volume    = {Sonderband 23},
  year      = {2020},
  pages     = {265--284}
}

2019

Olga Zlatkin-Troitschanskaia, Walter Bisang, Alexander Mehler, Mita Banerjee and Jochen Roeper. 2019. Positive Learning in the Internet Age: Developments and Perspectives in the PLATO Program. In: Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 1–5. Ed. by Olga Zlatkin-Troitschanskaia. Springer International Publishing.

BibTeX

@inbook{Zlatkin-Troitschanskaia:et:al:2019,
  author    = {Zlatkin-Troitschanskaia, Olga and Bisang, Walter and Mehler, Alexander
               and Banerjee, Mita and Roeper, Jochen},
  editor    = {Zlatkin-Troitschanskaia, Olga},
  title     = {Positive Learning in the Internet Age: Developments and Perspectives
               in the PLATO Program},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  year      = {2019},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {1--5},
  abstract  = {The Internet has become the main informational entity, i.e., a
               public source of information. The Internet offers many new benefits
               and opportunities for human learning, teaching, and research.
               However, by providing a vast amount of information from innumerable
               sources, it also enables the manipulation of information; there
               are countless examples of disseminated misinformation and false
               data in mass and social media. Much of the information presented
               online is conflicting, preselected, or algorithmically obscure,
               often colliding with fundamental humanistic values and posing
               moral or ethical problems.},
  isbn      = {978-3-030-26578-6},
  doi       = {10.1007/978-3-030-26578-6_1},
  url       = {https://doi.org/10.1007/978-3-030-26578-6_1}
}

Alexander Mehler and Visvanathan Ramesh. 2019. TextInContext: On the Way to a Framework for Measuring the Context-Sensitive Complexity of Educationally Relevant Texts—A Combined Cognitive and Computational Linguistic Approach. In: Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 167–195. Ed. by Olga Zlatkin-Troitschanskaia. Springer International Publishing.

BibTeX

@inbook{Mehler:Ramesh:2019,
  author    = {Mehler, Alexander and Ramesh, Visvanathan},
  editor    = {Zlatkin-Troitschanskaia, Olga},
  title     = {{TextInContext}: On the Way to a Framework for Measuring the Context-Sensitive
               Complexity of Educationally Relevant Texts---A Combined Cognitive
               and Computational Linguistic Approach},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  year      = {2019},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {167--195},
  abstract  = {We develop a framework for modeling the context sensitivity of
               text interpretation. As a point of reference, we focus on the
               complexity of educational texts. To open up a broader basis for
               representing phenomena of context sensitivity, we integrate a
               learning theory (i.e., the Cognitive Load Theory) with a theory
               of discourse comprehension (i.e., the Construction Integration
               Model) and a theory of cognitive semantics (i.e., the theory of
               Conceptual Spaces). The aim is to construct measures that view
               text complexity as a relational attribute by analogy to the relational
               concept of meaning in situation semantics. To this end, we reconstruct
               the situation semantic notion of relational meaning from the perspective
               of a computationally informed cognitive semantics. The aim is
               to prepare the development of measurements for predicting learning
               outcomes in the form of positive or negative learning. This prediction
               ideally depends on the underlying learning material, the learner's
               situational context, and knowledge retrieved from his or her long-term
               memory, which he or she uses to arrive at coherent mental representations
               of the underlying texts. Finally, our model refers to machine
               learning as a tool for modeling such memory content. In this way,
               the chapter integrates approaches from different disciplines (linguistic
               semantics, computational linguistics, cognitive science, and data
               science).},
  isbn      = {978-3-030-26578-6},
  doi       = {10.1007/978-3-030-26578-6_14},
  url       = {https://doi.org/10.1007/978-3-030-26578-6_14}
}

Andy Lücking. 2019. Dialogue semantics: From cognitive structures to positive and negative learning. Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 197–205.

BibTeX

@incollection{Luecking:2019:a,
  author    = {L\"{u}cking, Andy},
  title     = {Dialogue semantics: {From} cognitive structures to positive and
               negative learning},
  year      = {2019},
  pages     = {197-205},
  publisher = {Springer Nature Switzerland AG},
  address   = {Cham, Switzerland},
  editor    = {Zlatkin-Troitschankskaia, Olga},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  doi       = {10.1007/978-3-030-26578-6},
  url       = {https://link.springer.com/chapter/10.1007/978-3-030-26578-6_15}
}

Andy Lücking and Jonathan Ginzburg. 2019. Not few but all quantifiers can be negated: towards a referentially transparent semantics of quantified noun phrases. Proceedings of the Amsterdam Colloquium 2019, 269–278.

BibTeX

@inproceedings{Luecking:Ginzburg:2019,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Not few but all quantifiers can be negated: towards a referentially
               transparent semantics of quantified noun phrases},
  booktitle = {Proceedings of the Amsterdam Colloquium 2019},
  series    = {AC'19},
  location  = {University of Amsterdam},
  year      = {2019},
  pages     = {269-278},
  url       = {http://events.illc.uva.nl/AC/AC2019/},
  pdf       = {http://events.illc.uva.nl/AC/AC2019/uploaded_files/inlineitem/L_cking_and_Ginzburg_Not_few_but_all_quantifiers_ca.pdf}
}

Stefan Schweter and Sajawel Ahmed. 2019. Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection. Proceedings of the 15th Conference on Natural Language Processing (KONVENS).

BibTeX

@inproceedings{Schweter:Ahmed:2019,
  author    = {Stefan Schweter and Sajawel Ahmed},
  title     = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
  booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
  location  = {Erlangen, Germany},
  year      = {2019}
}

Manuel Stoeckel, Wahed Hemati and Alexander Mehler. November, 2019. When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in Spanish. Proceedings of The 5th Workshop on BioNLP Open Shared Tasks, 11–15.

BibTeX

@inproceedings{Stoeckel:Hemati:Mehler:2019,
  title     = {When Specialization Helps: Using Pooled Contextualized Embeddings
               to Detect Chemical and Biomedical Entities in {S}panish},
  author    = {Stoeckel, Manuel and Hemati, Wahed and Mehler, Alexander},
  booktitle = {Proceedings of The 5th Workshop on BioNLP Open Shared Tasks},
  month     = {nov},
  year      = {2019},
  address   = {Hong Kong, China},
  publisher = {Association for Computational Linguistics},
  url       = {https://www.aclweb.org/anthology/D19-5702},
  doi       = {10.18653/v1/D19-5702},
  pages     = {11--15},
  abstract  = {The recognition of pharmacological substances, compounds and proteins
               is an essential preliminary work for the recognition of relations
               between chemicals and other biomedically relevant units. In this
               paper, we describe an approach to Task 1 of the PharmaCoNER Challenge,
               which involves the recognition of mentions of chemicals and drugs
               in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF
               sequence tagger with stacked Pooled Contextualized Embeddings,
               word and sub-word embeddings using the open-source framework FLAIR.
               We present a new corpus composed of articles and papers from Spanish
               health science journals, termed the Spanish Health Corpus, and
               use it to train domain-specific embeddings which we incorporate
               in our model training. We achieve a result of 89.76{\%} F1-score
               using pre-trained embeddings and are able to improve these results
               to 90.52{\%} F1-score using specialized embeddings.}
}

Sajawel Ahmed, Manuel Stoeckel, Christine Driller, Adrian Pachzelt and Alexander Mehler. 2019. BIOfid Dataset: Publishing a German Gold Standard for Named Entity Recognition in Historical Biodiversity Literature. Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), 871–880.

BibTeX

@inproceedings{Ahmed:Stoeckel:Driller:Pachzelt:Mehler:2019,
  author    = {Sajawel Ahmed and Manuel Stoeckel and Christine Driller and Adrian Pachzelt
               and Alexander Mehler},
  title     = {{BIOfid Dataset: Publishing a German Gold Standard for Named Entity
               Recognition in Historical Biodiversity Literature}},
  publisher = {Association for Computational Linguistics},
  year      = {2019},
  booktitle = {Proceedings of the 23rd Conference on Computational Natural Language
               Learning (CoNLL)},
  address   = {Hong Kong, China},
  url       = {https://www.aclweb.org/anthology/K19-1081},
  doi       = {10.18653/v1/K19-1081},
  pages     = {871--880},
  abstract  = {The Specialized Information Service Biodiversity Research (BIOfid)
               has been launched to mobilize valuable biological data from printed
               literature hidden in German libraries for over the past 250 years.
               In this project, we annotate German texts converted by OCR from
               historical scientific literature on the biodiversity of plants,
               birds, moths and butterflies. Our work enables the automatic extraction
               of biological information previously buried in the mass of papers
               and volumes. For this purpose, we generated training data for
               the tasks of Named Entity Recognition (NER) and Taxa Recognition
               (TR) in biological documents. We use this data to train a number
               of leading machine learning tools and create a gold standard for
               TR in biodiversity literature. More specifically, we perform a
               practical analysis of our newly generated BIOfid dataset through
               various downstream-task evaluations and establish a new state
               of the art for TR with 80.23{\%} F-score. In this sense, our paper
               lays the foundations for future work in the field of information
               extraction in biology texts.},
  keywords  = {biofid}
}

Alexander Mehler and Giuseppe Abrami. October 10–11. VAnnotatoR: A framework for the multimodal reconstruction of historical situations and spaces. Proceedings of the Time Machine Conference.

BibTeX

@inproceedings{Mehler:Abrami:2019,
  author    = {Mehler, Alexander and Abrami, Giuseppe},
  title     = {{VAnnotatoR}: A framework for the multimodal reconstruction of
               historical situations and spaces},
  booktitle = {Proceedings of the Time Machine Conference},
  year      = {2019},
  date      = {October 10-11},
  address   = {Dresden, Germany},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/09/TimeMachineConference.pdf}
}

Alex Hunziker, Hasanagha Mammadov, Wahed Hemati and Alexander Mehler. 2019. Corpus2Wiki: A MediaWiki-based Tool for Automatically Generating Wikiditions in Digital Humanities. INF-DH-2019.

BibTeX

@inproceedings{Hunziker:et:al:2019,
  author    = {Hunziker, Alex and Mammadov, Hasanagha and Hemati, Wahed and Mehler, Alexander},
  title     = {{Corpus2Wiki}: A MediaWiki-based Tool for Automatically Generating
               Wikiditions in Digital Humanities},
  booktitle = {INF-DH-2019},
  year      = {2019},
  editor    = {Burghardt, Manuel AND Müller-Birn, Claudia},
  publisher = {Gesellschaft für Informatik e.V.},
  address   = {Bonn}
}

Armin Hoenen. June, 2019. Rooting through Direction – New and Old Approaches. DHd 2019.

BibTeX

@inproceedings{Hoenen:2019dhd,
  author    = {Hoenen, Armin},
  title     = {{Rooting through Direction -- New and Old Approaches}},
  booktitle = {DHd 2019},
  url       = {https://zenodo.org/record/2596095#.XKtQb3Wg-vo},
  year      = {2019},
  month     = {jun}
}

Armin Hoenen. 2019. Interpreting and Post-Correcting the Minimum Spanning Tree. DGfS 2019.

BibTeX

@inproceedings{Hoenen:2019dgfs,
  author    = {Hoenen, Armin},
  title     = {{Interpreting and Post-Correcting the Minimum Spanning Tree}},
  booktitle = {DGfS 2019},
  url       = {http://www.dgfs2019.uni-bremen.de/abstracts/poster/Hoenen.pdf},
  year      = {2019}
}

Armin Hoenen. 2019. eLearning the URLCoFi – Digital Didactics for Humanists. AIUCD 2019.

BibTeX

@inproceedings{Hoenen:2019aiucd,
  author    = {Hoenen, Armin},
  title     = {{eLearning the URLCoFi – Digital Didactics for Humanists}},
  booktitle = {AIUCD 2019},
  url       = {http://aiucd2019.uniud.it/wp-content/uploads/2019/01/BoA-2019_PROVV.pdf},
  year      = {2019}
}

Andy Lücking, Robin Cooper, Staffan Larsson and Jonathan Ginzburg. May, 2019. Distribution is not enough – Going Firther. Proceedings of Natural Language and Computer Science.

BibTeX

@inproceedings{Luecking:Cooper:Larsson:Ginzburg:2019,
  author    = {Lücking, Andy and Cooper, Robin and Larsson, Staffan and Ginzburg, Jonathan},
  title     = {Distribution is not enough -- Going {Firther}},
  booktitle = {Proceedings of Natural Language and Computer Science},
  maintitle = {The 13th International Conference on Computational
                  Semantics (IWCS 2019)},
  series    = {NLCS 6},
  location  = {Gothenburg, Sweden},
  month     = {May},
  year      = {2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/05/Distribution_is_not_enough.pdf}
}

Wahed Hemati and Alexander Mehler. March, 2019. CRFVoter: gene and protein related object recognition using a conglomerate of CRF-based tools. Journal of Cheminformatics, 11(1):11.

BibTeX

@article{Hemati:Mehler:2019b,
  author    = {Hemati, Wahed and Mehler, Alexander},
  title     = {{{CRFVoter}: gene and protein related object recognition using
               a conglomerate of CRF-based tools}},
  journal   = {Journal of Cheminformatics},
  year      = {2019},
  month     = {Mar},
  day       = {14},
  volume    = {11},
  number    = {1},
  pages     = {11},
  abstract  = {Gene and protein related objects are an important class of entities
               in biomedical research, whose identification and extraction from
               scientific articles is attracting increasing interest. In this
               work, we describe an approach to the BioCreative V.5 challenge
               regarding the recognition and classification of gene and protein
               related objects. For this purpose, we transform the task as posed
               by BioCreative V.5 into a sequence labeling problem. We present
               a series of sequence labeling systems that we used and adapted
               in our experiments for solving this task. Our experiments show
               how to optimize the hyperparameters of the classifiers involved.
               To this end, we utilize various algorithms for hyperparameter
               optimization. Finally, we present CRFVoter, a two-stage application
               of Conditional Random Field (CRF) that integrates the optimized
               sequence labelers from our study into one ensemble classifier.},
  issn      = {1758-2946},
  doi       = {10.1186/s13321-019-0343-x},
  url       = {https://doi.org/10.1186/s13321-019-0343-x}
}

Giuseppe Abrami, Alexander Mehler, Andy Lücking, Elias Rieb and Philipp Helfrich. May, 2019. TextAnnotator: A flexible framework for semantic annotations. Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation, (ISA-15).

BibTeX

@inproceedings{Abrami:et:al:2019,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias
               and Helfrich, Philipp},
  title     = {{TextAnnotator}: A flexible framework for semantic annotations},
  booktitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation, (ISA-15)},
  series    = {ISA-15},
  location  = {Gothenburg, Sweden},
  month     = {May},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf},
  year      = {2019},
  keywords  = {textannotator, biofid},
  abstract  = {Modern annotation tools should meet at least the following general
               requirements: they can handle diverse data and annotation levels
               within one tool, and they support the annotation process with
               automatic (pre-)processing outcomes as much as possible. We developed
               a framework that meets these general requirements and that enables
               versatile and browser-based annotations of texts, the TextAnnotator.
               It combines NLP methods of pre-processing with methods of flexible
               post-processing. Infact, machine learning (ML) requires a lot
               of training and test data, but is usually far from achieving perfect
               results. Producing high-level annotations for ML and post-correcting
               its results are therefore necessary. This is the purpose of TextAnnotator,
               which is entirely implemented in ExtJS and provides a range of
               interactive visualizations of annotations. In addition, it allows
               for flexibly integrating knowledge resources, e.g. in the course
               of post-processing named entity recognition. The paper describes
               TextAnnotator’s architecture together with three use cases: annotating
               temporal structures, argument structures and named entity linking.}
}

Tolga Uslu, Alexander Mehler and Daniel Baumartz. 2019. Computing Classifier-based Embeddings with the Help of text2ddc. Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019).

BibTeX

@inproceedings{Uslu:Mehler:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
  booktitle = {{Proceedings of the 20th International Conference on Computational
               Linguistics and Intelligent Text Processing, (CICLing 2019)}},
  location  = {La Rochelle, France},
  series    = {{CICLing 2019}},
  title     = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
  year      = {2019}
}

Tolga Uslu, Alexander Mehler, Clemens Schulz and Daniel Baumartz. 2019. BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).

BibTeX

@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
  booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
  location  = {Utrecht, Netherlands},
  series    = {{DH2019}},
  title     = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
  year      = {2019},
  url       = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}

Wahed Hemati and Alexander Mehler. January, 2019. LSTMVoter: chemical named entity recognition using a conglomerate of sequence labeling tools. Journal of Cheminformatics, 11(1):7.

BibTeX

@article{Hemati:Mehler:2019a,
  abstract  = {Chemical and biomedical named entity recognition (NER) is an essential
               preprocessing task in natural language processing. The identification
               and extraction of named entities from scientific articles is also
               attracting increasing interest in many scientific disciplines.
               Locating chemical named entities in the literature is an essential
               step in chemical text mining pipelines for identifying chemical
               mentions, their properties, and relations as discussed in the
               literature. In this work, we describe an approach to the BioCreative
               V.5 challenge regarding the recognition and classification of
               chemical named entities. For this purpose, we transform the task
               of NER into a sequence labeling problem. We present a series of
               sequence labeling systems that we used, adapted and optimized
               in our experiments for solving this task. To this end, we experiment
               with hyperparameter optimization. Finally, we present LSTMVoter,
               a two-stage application of recurrent neural networks that integrates
               the optimized sequence labelers from our study into a single ensemble
               classifier.},
  author    = {Hemati, Wahed and Mehler, Alexander},
  day       = {10},
  doi       = {10.1186/s13321-018-0327-2},
  issn      = {1758-2946},
  journal   = {Journal of Cheminformatics},
  month     = {Jan},
  number    = {1},
  pages     = {7},
  title     = {{{LSTMVoter}: chemical named entity recognition using a conglomerate
               of sequence labeling tools}},
  url       = {https://doi.org/10.1186/s13321-018-0327-2},
  volume    = {11},
  year      = {2019}
}

Giuseppe Abrami, Alexander Mehler and Christian Spiekermann. July, 2019. Graph-based Format for Modeling Multimodal Annotations in Virtual Reality by Means of VAnnotatoR. Proceedings of the 21th International Conference on Human-Computer Interaction, HCII 2019, 351–358.

BibTeX

@inproceedings{Abrami:Mehler:Spiekermann:2019,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian},
  title     = {{Graph-based Format for Modeling Multimodal Annotations in Virtual
               Reality by Means of VAnnotatoR}},
  booktitle = {Proceedings of the 21th International Conference on Human-Computer
               Interaction, HCII 2019},
  series    = {HCII 2019},
  location  = {Orlando, Florida, USA},
  editor    = {Stephanidis, Constantine and Antona, Margherita},
  month     = {July},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {351--358},
  abstract  = {Projects in the field of Natural Language Processing (NLP), the
               Digital Humanities (DH) and related disciplines dealing with machine
               learning of complex relationships between data objects need annotations
               to obtain sufficiently rich training and test sets. The visualization
               of such data sets and their underlying Human Computer Interaction
               (HCI) are perennial problems of computer science. However, despite
               some success stories, the clarity of information presentation
               and the flexibility of the annotation process may decrease with
               the complexity of the underlying data objects and their relationships.
               In order to face this problem, the so-called VAnnotatoR was developed,
               as a flexible annotation tool using 3D glasses and augmented reality
               devices, which enables annotation and visualization in three-dimensional
               virtual environments. In addition, multimodal objects are annotated
               and visualized within a graph-based approach.},
  isbn      = {978-3-030-30712-7},
  pdf       = {https://link.springer.com/content/pdf/10.1007\%2F978-3-030-30712-7_44.pdf},
  year      = {2019}
}

Alexander Mehler, Tolga Uslu, Rüdiger Gleim and Daniel Baumartz. 2019. text2ddc meets Literature - Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen. Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019.

BibTeX

@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
  author    = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
  title     = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
               Visualisierung thematischer Makrostrukturen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

Giuseppe Abrami, Christian Spiekermann and Alexander Mehler. 2019. VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke in dreidimensionalen virtuellen Umgebungen. Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019.

BibTeX

@inproceedings{Abrami:Spiekermann:Mehler:2019,
  author    = {Abrami, Giuseppe and Spiekermann, Christian and Mehler, Alexander},
  title     = {{VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke
               in dreidimensionalen virtuellen Umgebungen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_VAnnotatoR_DHd2019.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHDVAnnotatoRPoster.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

Wahed Hemati, Alexander Mehler, Tolga Uslu and Giuseppe Abrami. 2019. Der TextImager als Front- und Backend für das verteilte NLP von Big Digital Humanities Data. Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019.

BibTeX

@inproceedings{Hemati:Mehler:Uslu:Abrami:2019,
  author    = {Hemati, Wahed and Mehler, Alexander and Uslu, Tolga and Abrami, Giuseppe},
  title     = {{Der TextImager als Front- und Backend für das verteilte NLP von
               Big Digital Humanities Data}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Der-TextImager-als-Fron-und-Backend.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD19_TextImager.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

Rüdiger Gleim, Steffen Eger, Alexander Mehler, Tolga Uslu, Wahed Hemati, Andy Lücking, Alexander Henlein, Sven Kahlsdorf and Armin Hoenen. 2019. A practitioner's view: a survey and comparison of lemmatization and morphological tagging in German and Latin. Journal of Language Modeling.

BibTeX

@article{Gleim:Eger:Mehler:2019,
  author    = {Gleim, R\"{u}diger and Eger, Steffen and Mehler, Alexander and Uslu, Tolga
               and Hemati, Wahed and L\"{u}cking, Andy and Henlein, Alexander and Kahlsdorf, Sven
               and Hoenen, Armin},
  title     = {A practitioner's view: a survey and comparison of lemmatization
               and morphological tagging in German and Latin},
  journal   = {Journal of Language Modeling},
  year      = {2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/07/jlm-tagging.pdf},
  doi       = {10.15398/jlm.v7i1.205},
  url       = {http://jlm.ipipan.waw.pl/index.php/JLM/article/view/205}
}

2018

Armin Hoenen. May 7–12, 2018, 2018. Multi Modal Distance - An Approach to Stemma Generation With Weighting. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{HOENEN18.285,
  author    = {Armin Hoenen},
  title     = {{Multi Modal Distance - An Approach to Stemma Generation With Weighting}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/285.pdf},
  language  = {english}
}

Armin Hoenen. May 7–12, 2018, 2018. From Manuscripts to Archetypes through Iterative Clustering. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{HOENEN18.314,
  author    = {Armin Hoenen},
  title     = {{From Manuscripts to Archetypes through Iterative Clustering}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/314.pdf},
  language  = {english}
}

Armin Hoenen and Niko Schenk. May 7–12, 2018, 2018. Knowing the Author by the Company His Words Keep. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{HOENEN18.349,
  author    = {Armin Hoenen and Niko Schenk},
  title     = {{Knowing the Author by the Company His Words Keep}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/349.pdf},
  language  = {english}
}

Armin Hoenen. 7–12. Attempts at Visualization of Etymological Information. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{HOENEN18.9,
  author    = {Armin Hoenen},
  title     = {Attempts at Visualization of Etymological Information},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {may},
  date      = {7-12},
  location  = {Miyazaki, Japan},
  editor    = {Kernerman, Ilan and Krek, Simon},
  publisher = {European Language Resources Association (ELRA)},
  address   = {Paris, France},
  isbn      = {979-10-95546-28-3},
  language  = {english},
  url       = {http://lrec-conf.org/workshops/lrec2018/W33/pdf/book_of_proceedings.pdf}
}

Armin Hoenen. 2018. PhD Thesis: Tools, evaluation and preprocessing for stemmatology.

BibTeX

@phdthesis{Hoenen2018,
  type      = {Dissertation},
  author    = {Armin Hoenen},
  title     = {Tools, evaluation and preprocessing for stemmatology},
  school    = {Goethe University Frankfurt},
  year      = {2018}
}

Armin Hoenen and Lela Samushia. 2018. Principles Aiding in Reading Abbreviations in Old Georgian and Latin. DHd 2018.

BibTeX

@inproceedings{Hoenen:Samushia:2018dhd,
  author    = {Hoenen, Armin and Samushia, Lela},
  title     = {{Principles Aiding in Reading Abbreviations in Old Georgian and Latin}},
  booktitle = {DHd 2018},
  url       = {http://dhd2018.uni-koeln.de/wp-content/uploads/boa-DHd2018-web-ISBN.pdf},
  year      = {2018}
}

Armin Hoenen. 2018. Wikipedia Mention Graphs by Example. EADH 2018.

BibTeX

@inproceedings{Hoenen:2018eadh,
  author    = {Hoenen, Armin},
  title     = {{Wikipedia Mention Graphs by Example}},
  booktitle = {EADH 2018},
  url       = {https://eadh2018.exordo.com/files/papers/37/final_draft/MentionGraphsEADH.pdf},
  year      = {2018}
}

Armin Hoenen. 2018. Annotated Timelines and Stacked Area Plots for Visualization in Lexicography. Elexis workshop at EADH 2018.

BibTeX

@inproceedings{Hoenen:2018elexis,
  author    = {Hoenen, Armin},
  title     = {{Annotated Timelines and Stacked Area Plots for Visualization in Lexicography}},
  booktitle = {Elexis workshop at EADH 2018},
  url       = {https://lexdhai.insight-centre.org/Lex_DH__AI_2018_paper_2.pdf},
  year      = {2018}
}

Armin Hoenen. 2018. Recurrence Analysis Function, a Dynamic Heatmap for the Visualization of Verse Text and Beyond. In: Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools. Heidelberg University Press.

BibTeX

@inbook{Hoenen:2018,
  author    = {Hoenen, Armin},
  title     = {Recurrence Analysis Function, a Dynamic Heatmap for the Visualization
               of Verse Text and Beyond},
  booktitle = {Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools},
  publisher = {Heidelberg University Press},
  abstract  = {The Recurrence Analysis Function (ReAF) is a cross-linguistic
               visualization tool for (historical) verse text, especially handwritten
               epics. It can also provide a general visualization of various
               aspects of prose text. It aims to enable intuitive understanding
               through explorative data analysis of historical, especially bardic-oral
               texts.1 The assumption behind this is that bardic/born-oral and
               non-bardic/born-written texts differ drastically in the way they
               employ repetition. The ReAF in its first implementation, as presented
               here, is a language-independent tool that permits the visual exploration
               of such structures. Firstly, general aspects and formal characteristics
               of oral verse text are characterized, before the main technical
               details and some additional applications of the ReAF are explained
               and illustrated.},
  year      = {2018},
  editors   = {Bubenhofer, Noah and Kupietz, Marc},
  place     = {Heidelberg},
  url       = {https://heiup.uni-heidelberg.de/reader/download/345/345-69-80909-2-10-20180411.pdf}
}

Tatiana Lokot, Alexander Mehler and Olga Abramov. November, 2018. On the limit value of compactness of some graph classes. PLOS ONE, 13(11):1–8.

BibTeX

@article{Lokot:Mehler:Abramov:2018,
  author    = {Lokot, Tatiana and Mehler, Alexander and Abramov, Olga},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {On the limit value of compactness of some graph classes},
  year      = {2018},
  month     = {11},
  volume    = {13},
  url       = {https://doi.org/10.1371/journal.pone.0207536},
  pages     = {1-8},
  abstract  = {In this paper, we study the limit of compactness which is a graph
               index originally introduced for measuring structural characteristics
               of hypermedia. Applying compactness to large scale small-world
               graphs (Mehler, 2008) observed its limit behaviour to be equal
               1. The striking question concerning this finding was whether this
               limit behaviour resulted from the specifics of small-world graphs
               or was simply an artefact. In this paper, we determine the necessary
               and sufficient conditions for any sequence of connected graphs
               resulting in a limit value of CB = 1 which can be generalized
               with some consideration for the case of disconnected graph classes
               (Theorem 3). This result can be applied to many well-known classes
               of connected graphs. Here, we illustrate it by considering four
               examples. In fact, our proof-theoretical approach allows for quickly
               obtaining the limit value of compactness for many graph classes
               sparing computational costs.},
  number    = {11},
  doi       = {10.1371/journal.pone.0207536}
}

Eleanor Rutherford, Wahed Hemati and Alexander Mehler. 2018. Corpus2Wiki: A MediaWiki based Annotation & Visualisation Tool for the Digital Humanities. INF-DH-2018.

BibTeX

@inproceedings{Rutherford:et:al:2018,
  author    = {Rutherford, Eleanor AND Hemati, Wahed AND Mehler, Alexander},
  title     = {{Corpus2Wiki}: A MediaWiki based Annotation \& Visualisation Tool
               for the Digital Humanities},
  booktitle = {INF-DH-2018},
  year      = {2018},
  editor    = {Burghardt, Manuel AND Müller-Birn, Claudia},
  publisher = {Gesellschaft für Informatik e.V.},
  address   = {Bonn}
}

Giuseppe Abrami, Alexander Mehler, Philipp Helfrich and Elias Rieb. 2018. TextAnnotator: A Browser-based Framework for Annotating Textual Data in Digital Humanities. Proceedings of the Digital Humanities Austria 2018.

BibTeX

@inproceedings{Abrami:et:al:2018,
  author    = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb},
  title     = {{TextAnnotator}: A Browser-based Framework for Annotating Textual
               Data in Digital Humanities},
  booktitle = {Proceedings of the Digital Humanities Austria 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TA__A_Browser_based_Framework_for_Annotating_Textual_Data_in_Digital_Humanities.pdf},
  location  = {Salzburg, Austria},
  year      = {2018}
}

Sajawel Ahmed and Alexander Mehler. 2018. Resource-Size matters: Improving Neural Named Entity Recognition with Optimized Large Corpora. Proceedings of the 17th IEEE International Conference on Machine Learning and Applications (ICMLA).

BibTeX

@inproceedings{Ahmed:Mehler:2018,
  author    = {Sajawel Ahmed and Alexander Mehler},
  title     = {{Resource-Size matters: Improving Neural Named Entity Recognition
               with Optimized Large Corpora}},
  abstract  = {This study improves the performance of neural named entity recognition
               by a margin of up to 11\% in terms of F-score on the example of
               a low-resource language like German, thereby outperforming existing
               baselines and establishing a new state-of-the-art on each single
               open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank
               2018). Rather than designing deeper and wider hybrid neural architectures,
               we gather all available resources and perform a detailed optimization
               and grammar-dependent morphological processing consisting of lemmatization
               and part-of-speech tagging prior to exposing the raw data to any
               training process. We test our approach in a threefold monolingual
               experimental setup of a) single, b) joint, and c) optimized training
               and shed light on the dependency of downstream-tasks on the size
               of corpora used to compute word embeddings.},
  booktitle = {Proceedings of the 17th IEEE International Conference on Machine
               Learning and Applications (ICMLA)},
  location  = {Orlando, Florida, USA},
  pdf       = {https://arxiv.org/pdf/1807.10675.pdf},
  year      = {2018}
}

Claus Weiland, Christine Driller, Markus Koch, Marco Schmidt, Giuseppe Abrami, Sajawel Ahmed, Alexander Mehler, Adrian Pachzelt, Gerwin Kasperek, Angela Hausinger and Thomas Hörnschemeyer. 2018. BioFID, a platform to enhance accessibility of biodiversity data. Proceedings of the 10th International Conference on Ecological Informatics.

BibTeX

@inproceedings{Weiland:et:al:2018,
  author    = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt
               and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt
               and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
  title     = {{BioFID}, a platform to enhance accessibility of biodiversity data},
  booktitle = {Proceedings of the 10th International Conference on Ecological Informatics},
  year      = {2018},
  url       = {https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail},
  location  = {Jena, Germany}
}

Attila Kett, Giuseppe Abrami, Alexander Mehler and Christian Spiekermann. 2018. Resources2City Explorer: A System for Generating Interactive Walkable Virtual Cities out of File Systems. Proceedings of the 31st ACM User Interface Software and Technology Symposium.

BibTeX

@inproceedings{Kett:et:al:2018,
  author    = {Attila Kett and Giuseppe Abrami and Alexander Mehler and Christian Spiekermann},
  title     = {{Resources2City Explorer}: A System for Generating Interactive
               Walkable Virtual Cities out of File Systems},
  booktitle = {Proceedings of the 31st ACM User Interface Software and Technology Symposium},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/UIST2018Final.pdf},
  location  = {Berlin, Germany},
  abstract  = {We present Resources2City Explorer (R2CE), a tool for representing
               file systems as interactive, walkable virtual cities. R2CE visualizes
               file systems based on concepts of spatial, 3D information processing.
               For this purpose, it extends the range of functions of conventional
               file browsers considerably. Visual elements in a city generated
               by R2CE represent (relations of) objects of the underlying file
               system. The paper describes the functional spectrum of R2CE and
               illustrates it by visualizing a sample of 940 files.},
  year      = {2018}
}

Andy Lücking. 2018. Witness-loaded and Witness-free Demonstratives. Atypical Demonstratives.

BibTeX

@incollection{Luecking:2018:a,
  author    = {Andy L\"{u}cking},
  title     = {Witness-loaded and Witness-free Demonstratives},
  booktitle = {Atypical Demonstratives},
  publisher = {De Gruyter},
  year      = {2018},
  editor    = {Marco Coniglio and Andrew Murphy and Eva Schlachter and Tonjes Veenstra},
  isbn      = {978-3-11-056029-9},
  url       = {https://www.degruyter.com/view/product/495228},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2020/05/Luecking-witness-loading-rg.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2018. `Most people but not Bill': integrating sets, individuals and negation into a cognitively plausible account of noun phrase interpretation. Proceedings of Cognitive Structures: Linguistic, Philosophical and Psychological Perspectives.

BibTeX

@inproceedings{Luecking:Ginzburg:2018,
  title     = {`Most people but not {Bill}': integrating sets, individuals and
               negation into a cognitively plausible account of noun phrase interpretation},
  booktitle = {Proceedings of Cognitive Structures: Linguistic, Philosophical
               and Psychological Perspectives},
  series    = {CoSt'18},
  author    = {L\"{u}cking, Andy and Ginzburg, Jonathan},
  year      = {2018}
}

Tolga Uslu and Alexander Mehler. 2018. PolyViz: a Visualization System for a Special Kind of Multipartite Graphs. Proceedings of the IEEE VIS 2018.

BibTeX

@inproceedings{Uslu:Mehler:2018,
  author    = {Tolga Uslu and Alexander Mehler},
  title     = {{PolyViz}: a Visualization System for a Special Kind of Multipartite Graphs},
  booktitle = {Proceedings of the IEEE VIS 2018},
  series    = {IEEE VIS 2018},
  location  = {Berlin, Germany},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/07/polyviz-visualization-system.pdf},
  year      = {2018}
}

Daniel Baumartz, Tolga Uslu and Alexander Mehler. 2018. LTV: Labeled Topic Vector. Proceedings of COLING 2018, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26.

BibTeX

@inproceedings{Baumartz:Uslu:Mehler:2018,
  author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
  title     = {{LTV}: Labeled Topic Vector},
  booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
               on Computational Linguistics: System Demonstrations, August 20-26},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {The COLING 2018 Organizing Committee},
  abstract  = {In this paper, we present LTV, a website and an API that generate
               labeled topic classifications based on the Dewey Decimal Classification
               (DDC), an international standard for topic classification in libraries.
               We introduce nnDDC, a largely language-independent neural network-based
               classifier for DDC-related topic classification, which we optimized
               using a wide range of linguistic features to achieve an F-score
               of 87.4\%. To show that our approach is language-independent,
               we evaluate nnDDC using up to 40 different languages. We derive
               a topic model based on nnDDC, which generates probability distributions
               over semantic units for any input on sense-, word- and text-level.
               Unlike related approaches, however, these probabilities are estimated
               by means of nnDDC so that each dimension of the resulting vector
               representation is uniquely labeled by a DDC class. In this way,
               we introduce a neural network-based Classifier-Induced Semantic
               Space (nnCISS).},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}

Christine Driller, Markus Koch, Marco Schmidt, Claus Weiland, Thomas Hörnschemeyer, Thomas Hickler, Giuseppe Abrami, Sajawel Ahmed, Rüdiger Gleim, Wahed Hemati, Tolga Uslu, Alexander Mehler, Adrian Pachzelt, Jashar Rexhepi, Thomas Risse, Janina Schuster, Gerwin Kasperek and Angela Hausinger. 2018. Workflow and Current Achievements of BIOfid, an Information Service Mobilizing Biodiversity Data from Literature Sources. Biodiversity Information Science and Standards, 2:e25876.

BibTeX

@article{Driller:et:al:2018,
  author    = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland
               and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed
               and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler
               and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster
               and Gerwin Kasperek and Angela Hausinger},
  title     = {Workflow and Current Achievements of BIOfid, an Information Service
               Mobilizing Biodiversity Data from Literature Sources},
  volume    = {2},
  number    = {},
  year      = {2018},
  doi       = {10.3897/biss.2.25876},
  publisher = {Pensoft Publishers},
  abstract  = {BIOfid is a specialized information service currently being developed
               to mobilize biodiversity data dormant in printed historical and
               modern literature and to offer a platform for open access journals
               on the science of biodiversity. Our team of librarians, computer
               scientists and biologists produce high-quality text digitizations,
               develop new text-mining tools and generate detailed ontologies
               enabling semantic text analysis and semantic search by means of
               user-specific queries. In a pilot project we focus on German publications
               on the distribution and ecology of vascular plants, birds, moths
               and butterflies extending back to the Linnaeus period about 250
               years ago. The three organism groups have been selected according
               to current demands of the relevant research community in Germany.
               The text corpus defined for this purpose comprises over 400 volumes
               with more than 100,000 pages to be digitized and will be complemented
               by journals from other digitization projects, copyright-free and
               project-related literature. With TextImager (Natural Language
               Processing & Text Visualization) and TextAnnotator (Discourse
               Semantic Annotation) we have already extended and launched tools
               that focus on the text-analytical section of our project. Furthermore,
               taxonomic and anatomical ontologies elaborated by us for the taxa
               prioritized by the project’s target group - German institutions
               and scientists active in biodiversity research - are constantly
               improved and expanded to maximize scientific data output. Our
               poster describes the general workflow of our project ranging from
               literature acquisition via software development, to data availability
               on the BIOfid web portal (http://biofid.de/), and the implementation
               into existing platforms which serve to promote global accessibility
               of biodiversity data.},
  issn      = {},
  pages     = {e25876},
  url       = {https://doi.org/10.3897/biss.2.25876},
  eprint    = {https://doi.org/10.3897/biss.2.25876},
  journal   = {Biodiversity Information Science and Standards},
  keywords  = {biofid}
}

Alexander Mehler, Giuseppe Abrami, Christian Spiekermann and Matthias Jostock. 2018. VAnnotatoR: A Framework for Generating Multimodal Hypertexts. Proceedings of the 29th ACM Conference on Hypertext and Social Media.

BibTeX

@inproceedings{Mehler:Abrami:Spiekermann:Jostock:2018,
  author    = {Mehler, Alexander and Abrami, Giuseppe and Spiekermann, Christian
               and Jostock, Matthias},
  title     = {{VAnnotatoR}: {A} Framework for Generating Multimodal Hypertexts},
  booktitle = {Proceedings of the 29th ACM Conference on Hypertext and Social Media},
  series    = {Proceedings of the 29th ACM Conference on Hypertext and Social Media (HT '18)},
  year      = {2018},
  location  = {Baltimore, Maryland},
  publisher = {ACM},
  address   = {New York, NY, USA},
  pdf       = {http://delivery.acm.org/10.1145/3210000/3209572/p150-mehler.pdf}
}

Wahed Hemati, Alexander Mehler, Tolga Uslu, Daniel Baumartz and Giuseppe Abrami. 2018. Evaluating and Integrating Databases in the Area of NLP. International Quantitative Linguistics Conference (QUALICO 2018).

BibTeX

@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
               and Giuseppe Abrami},
  title     = {Evaluating and Integrating Databases in the Area of {NLP}},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
  year      = {2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
  location  = {Wroclaw, Poland}
}

Giuseppe Abrami, Gertrud Boden and Lisa Gleiß. 2018. World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data by means of a digital ontology based on OWLnotator. Proceedings of the Digital Humanities 2018.

BibTeX

@inproceedings{Abrami:Boden:Gleiss:2018,
  author    = {Abrami, Giuseppe and Boden, Gertrud and Glei\ss{}, Lisa},
  title     = {{World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data
               by means of a digital ontology based on OWLnotator}},
  booktitle = {Proceedings of the Digital Humanities 2018},
  series    = {DH2018},
  location  = {Mexico City, Mexico},
  year      = {2018}
}

Alexander Mehler, Wahed Hemati, Rüdiger Gleim and Daniel Baumartz. 2018. VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte interaktive evolutionäre Verarbeitung natürlicher Sprache. Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft, 6.

BibTeX

@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
  author    = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
  title     = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
               interaktive evolutionäre Verarbeitung natürlicher Sprache}},
  booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
               der germanistischen Sprachwissenschaft},
  publisher = {De Gruyter},
  editor    = {Henning Lobin and Roman Schneider and Andreas Witt},
  volume    = {6},
  address   = {Berlin},
  year      = {2018}
}

Alexander Mehler, Wahed Hemati, Tolga Uslu and Andy Lücking. 2018. A Multidimensional Model of Syntactic Dependency Trees for Authorship Attribution. Quantitative analysis of dependency structures.

BibTeX

@incollection{Mehler:Hemati:Uslu:Luecking:2018,
  author    = {Alexander Mehler and Wahed Hemati and Tolga Uslu and Andy Lücking},
  title     = {A Multidimensional Model of Syntactic Dependency Trees for Authorship
               Attribution},
  booktitle = {Quantitative analysis of dependency structures},
  publisher = {De Gruyter},
  editor    = {Jingyang Jiang and Haitao Liu},
  address   = {Berlin/New York},
  abstract  = {Abstract: In this chapter we introduce a multidimensional model
               of syntactic dependency trees. Our ultimate goal is to generate
               fingerprints of such trees to predict the author of the underlying
               sentences. The chapter makes a first attempt to create such fingerprints
               for sentence categorization via the detour of text categorization.
               We show that at text level, aggregated dependency structures actually
               provide information about authorship. At the same time, we show
               that this does not hold for topic detection. We evaluate our model
               using a quarter of a million sentences collected in two corpora:
               the first is sampled from literary texts, the second from Wikipedia
               articles. As a second finding of our approach, we show that quantitative
               models of dependency structure do not yet allow for detecting
               syntactic alignment in written communication. We conclude that
               this is mainly due to effects of lexical alignment on syntactic
               alignment.},
  keywords  = {Dependency structure, Authorship attribution, Text
                   categorization, Syntactic Alignment},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler and Dirk Meyer. 2018. LitViz: Visualizing Literary Data by Means of text2voronoi. Proceedings of the Digital Humanities 2018.

BibTeX

@inproceedings{Uslu:Mehler:Meyer:2018,
  author    = {Tolga Uslu and Alexander Mehler and Dirk Meyer},
  title     = {{{LitViz}: Visualizing Literary Data by Means of text2voronoi}},
  booktitle = {Proceedings of the Digital Humanities 2018},
  series    = {DH2018},
  location  = {Mexico City, Mexico},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/LitViz.pdf},
  year      = {2018}
}

Christian Spiekermann, Giuseppe Abrami and Alexander Mehler. 2018. VAnnotatoR: a Gesture-driven Annotation Framework for Linguistic and Multimodal Annotation. Proceedings of the Annotation, Recognition and Evaluation of Actions (AREA 2018) Workshop.

BibTeX

@inproceedings{Spiekerman:Abrami:Mehler:2018,
  author    = {Christian Spiekermann and Giuseppe Abrami and Alexander Mehler},
  title     = {{VAnnotatoR}: a Gesture-driven Annotation Framework for Linguistic
               and Multimodal Annotation},
  booktitle = {Proceedings of the Annotation, Recognition and Evaluation of Actions
               (AREA 2018) Workshop},
  series    = {AREA},
  location  = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/VAnnotatoR.pdf},
  year      = {2018}
}

Tolga Uslu, Lisa Miebach, Steffen Wolfsgruber, Michael Wagner, Klaus Fließbach, Rüdiger Gleim, Wahed Hemati, Alexander Henlein and Alexander Mehler. 2018. Automatic Classification in Memory Clinic Patients and in Depressive Patients. Proceedings of Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric impairments (RaPID-2).

BibTeX

@inproceedings{Uslu:et:al:2018:a,
  author    = {Tolga Uslu and Lisa Miebach and Steffen Wolfsgruber and Michael Wagner
               and Klaus Fließbach and Rüdiger Gleim and Wahed Hemati and Alexander Henlein
               and Alexander Mehler},
  title     = {{Automatic Classification in Memory Clinic Patients and in Depressive Patients}},
  booktitle = {Proceedings of Resources and ProcessIng of linguistic, para-linguistic
               and extra-linguistic Data from people with various forms of cognitive/psychiatric
               impairments (RaPID-2)},
  series    = {RaPID},
  location  = {Miyazaki, Japan},
  year      = {2018}
}

Alexander Mehler, Rüdiger Gleim, Andy Lücking, Tolga Uslu and Christian Stegbauer. 2018. On the Self-similarity of Wikipedia Talks: a Combined Discourse-analytical and Quantitative Approach. Glottometrics, 40:1–44.

BibTeX

@article{Mehler:Gleim:Luecking:Uslu:Stegbauer:2018,
  author    = {Alexander Mehler and Rüdiger Gleim and Andy Lücking and Tolga Uslu
               and Christian Stegbauer},
  title     = {On the Self-similarity of {Wikipedia} Talks: a Combined Discourse-analytical
               and Quantitative Approach},
  journal   = {Glottometrics},
  volume    = {40},
  pages     = {1-44},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Glottometrics-Mehler.pdf},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler, Andreas Niekler and Daniel Baumartz. 2018. Towards a DDC-based Topic Network Model of Wikipedia. Proceedings of 2nd International Workshop on Modeling, Analysis, and Management of Social Networks and their Applications (SOCNET 2018), February 28, 2018.

BibTeX

@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
  author    = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
  title     = {Towards a {DDC}-based Topic Network Model of Wikipedia},
  booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
               and Management of Social Networks and their Applications (SOCNET
               2018), February 28, 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler, Daniel Baumartz, Alexander Henlein and Wahed Hemati. 2018. fastSense: An Efficient Word Sense Disambiguation Classifier. Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12.

BibTeX

@inproceedings{Uslu:et:al:2018,
  author    = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
               and Wahed Hemati},
  title     = {fastSense: An Efficient Word Sense Disambiguation Classifier},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
  year      = {2018}
}

Rüdiger Gleim, Alexander Mehler and Sung Y. Song. 2018. WikiDragon: A Java Framework For Diachronic Content And Network Analysis Of MediaWikis. Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12.

BibTeX

@inproceedings{Gleim:Mehler:Song:2018,
  author    = {R{\"u}diger Gleim and Alexander Mehler and Sung Y. Song},
  title     = {WikiDragon: A Java Framework For Diachronic Content And Network
               Analysis Of MediaWikis},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/WikiDragon.pdf},
  year      = {2018}
}

Philipp Helfrich, Elias Rieb, Giuseppe Abrami, Andy Lücking and Alexander Mehler. 2018. TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations. Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12.

BibTeX

@inproceedings{Helfrich:et:al:2018,
  author    = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking
               and Alexander Mehler},
  title     = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf},
  year      = {2018}
}

Giuseppe Abrami and Alexander Mehler. May, 2018. A UIMA Database Interface for Managing NLP-related Text Annotations. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{Abrami:Mehler:2018,
  address   = {Miyazaki, Japan},
  author    = {Abrami, Giuseppe and Mehler, Alexander},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation ({LREC} 2018)},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Cieri, Christopher
               and Declerck, Thierry and Goggi, Sara and Hasida, Koiti and Isahara, Hitoshi
               and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion
               and Odijk, Jan and Piperidis, Stelios and Tokunaga, Takenobu},
  month     = {may},
  series    = {LREC 2018},
  keywords  = {UIMA},
  pdf       = {https://aclanthology.org/L18-1212.pdf},
  publisher = {European Language Resources Association (ELRA)},
  title     = {A {UIMA} Database Interface for Managing {NLP}-related Text Annotations},
  url       = {https://aclanthology.org/L18-1212},
  year      = {2018}
}

Alexander Mehler, Christian Stegbauer and Barbara Frank-Job. 2018. Ferdinand de Saussure. 1916. Cours de linguistique générale. Payot, Lausanne/Paris. In: Schlüsselwerke der Netzwerkforschung. Ed. by Christian Stegbauer and Boris Holzer. Springer VS.

BibTeX

@inbook{Mehler:Stegbauer:Frank-Job:2018,
  author    = {Alexander Mehler and Christian Stegbauer and Barbara Frank-Job},
  editor    = {Christian Stegbauer and Boris Holzer},
  title     = {{Ferdinand de Saussure. 1916. Cours de linguistique générale.
               Payot, Lausanne/Paris}},
  publisher = {Springer VS},
  address   = {Wiesbaden},
  booktitle = {Schlüsselwerke der Netzwerkforschung},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2017/11/Saussure2.pdf},
  year      = {2018}
}

Alexander Mehler, Olga Zlatkin-Troitschanskaia, Wahed Hemati, Dimitri Molerov, Andy Lücking and Susanne Schmidt. 2018. Integrating Computational Linguistic Analysis of Multilingual Learning Data and Educational Measurement Approaches to Explore Learning in Higher Education. In: Positive Learning in the Age of Information: A Blessing or a Curse?, 145–193. Ed. by Olga Zlatkin-Troitschanskaia, Gabriel Wittum and Andreas Dengel. Springer Fachmedien Wiesbaden.

BibTeX

@inbook{Mehler:et:al:2018,
  abstract  = {This chapter develops a computational linguistic model for analyzing
               and comparing multilingual data as well as its application to
               a large body of standardized assessment data from higher education.
               The approach employs both an automatic and a manual annotation
               of the data on several linguistic layers (including parts of speech,
               text structure and content). Quantitative features of the textual
               data are explored that are related to both the students' (domain-specific
               knowledge) test results and their level of academic experience.
               The respective analysis involves statistics of distance correlation,
               text categorization with respect to text types (questions and
               response options) as well as languages (English and German), and
               network analysis to assess dependencies between features. The
               correlation between correct test results of students and linguistic
               features of the verbal presentations of tests indicate to what
               extent language influences higher education test performance.
               It has also been found that this influence relates to specialized
               language. Thus, this integrative modeling approach contributes
               a test basis for a large-scale analysis of learning data and points
               to a number of subsequent, more detailed research questions.},
  address   = {Wiesbaden},
  author    = {Mehler, Alexander and Zlatkin-Troitschanskaia, Olga and Hemati, Wahed
               and Molerov, Dimitri and L{\"u}cking, Andy and Schmidt, Susanne},
  booktitle = {Positive Learning in the Age of Information: A Blessing or a Curse?},
  doi       = {10.1007/978-3-658-19567-0_10},
  editor    = {Zlatkin-Troitschanskaia, Olga and Wittum, Gabriel and Dengel, Andreas},
  isbn      = {978-3-658-19567-0},
  pages     = {145--193},
  publisher = {Springer Fachmedien Wiesbaden},
  title     = {Integrating Computational Linguistic Analysis of Multilingual
               Learning Data and Educational Measurement Approaches to Explore
               Learning in Higher Education},
  url       = {https://doi.org/10.1007/978-3-658-19567-0_10},
  year      = {2018}
}

Giuseppe Abrami, Sajawel Ahmed, Rüdiger Gleim, Wahed Hemati, Alexander Mehler and Uslu Tolga. March, 2018. Natural Language Processing and Text Mining for BIOfid.

BibTeX

@misc{Abrami:et:al:2018b,
  author    = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed
               and Mehler, Alexander and Uslu Tolga},
  title     = {{Natural Language Processing and Text Mining for BIOfid}},
  howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
  adress    = {Goethe-University, Frankfurt am Main, Germany},
  year      = {2018},
  month     = {March},
  day       = {08},
  pdf       = {}
}

2017

Alexander Mehler and Andy Lücking. 2017. Modelle sozialer Netzwerke und Natural Language Processing: eine methodologische Randnotiz. Soziologie, 46(1):43–47.

BibTeX

@article{Mehler:Luecking:2017,
  author    = {Alexander Mehler and Andy Lücking},
  title     = {Modelle sozialer Netzwerke und Natural Language Processing: eine
               methodologische Randnotiz},
  journal   = {Soziologie},
  volume    = {46},
  number    = {1},
  pages     = {43-47},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Soziologe-NetzwerkeundNLP.pdf},
  year      = {2017}
}

Wahed Hemati, Alexander Mehler and Tolga Uslu. 2017. CRFVoter: Chemical Entity Mention, Gene and Protein Related Object recognition using a conglomerate of CRF based tools. BioCreative V.5. Proceedings.

BibTeX

@inproceedings{Hemati:Mehler:Uslu:2017,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu},
  title     = {{CRFVoter}: Chemical Entity Mention, Gene and Protein Related
               Object recognition using a conglomerate of CRF based tools},
  booktitle = {BioCreative V.5. Proceedings},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/CRFVoter.pdf},
  year      = {2017}
}

Wahed Hemati, Tolga Uslu and Alexander Mehler. 2017. TextImager as an interface to BeCalm. BioCreative V.5. Proceedings.

BibTeX

@inproceedings{Hemati:Uslu:Mehler:2017,
  author    = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
  title     = {{TextImager} as an interface to {BeCalm}},
  booktitle = {BioCreative V.5. Proceedings},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager_BeCalm.pdf},
  year      = {2017}
}

Alexander Mehler, Giuseppe Abrami, Steffen Bruendel, Lisa Felder, Thomas Ostertag and Christian Spiekermann. 2017. Stolperwege: An App for a Digital Public History of the Holocaust. Proceedings of the 28th ACM Conference on Hypertext and Social Media, 319–320.

BibTeX

@inproceedings{Mehler:et:al:2017:a,
  author    = {Alexander Mehler and Giuseppe Abrami and Steffen Bruendel and Lisa Felder
               and Thomas Ostertag and Christian Spiekermann},
  title     = {{Stolperwege:} An App for a Digital Public History of the {Holocaust}},
  booktitle = {Proceedings of the 28th ACM Conference on Hypertext and Social Media},
  series    = {HT '17},
  pages     = {319--320},
  address   = {New York, NY, USA},
  publisher = {ACM},
  abstract  = {We present the Stolperwege app, a web-based framework for ubiquitous
               modeling of historical processes. Starting from the art project
               Stolpersteine of Gunter Demnig, it allows for virtually connecting
               these stumbling blocks with information about the biographies
               of victims of Nazism. According to the practice of public history,
               the aim of Stolperwege is to deepen public knowledge of the Holocaust
               in the context of our everyday environment. Stolperwege uses an
               information model that allows for modeling social networks of
               agents starting from information about portions of their life.
               The paper exemplifies how Stolperwege is informationally enriched
               by means of historical maps and 3D animations of (historical)
               buildings.},
  acmid     = {3078748},
  doi       = {10.1145/3078714.3078748},
  isbn      = {978-1-4503-4708-2},
  keywords  = {3d, geocaching, geotagging, historical maps,
                   historical processes, public history of the holocaust,
                   ubiquitous computing},
  location  = {Prague, Czech Republic},
  numpages  = {2},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2017/07/poster_ht2017.pdf},
  url       = {http://doi.acm.org/10.1145/3078714.3078748},
  year      = {2017}
}

Alexander Mehler, Rüdiger Gleim, Wahed Hemati and Tolga Uslu. 2017. Skalenfreie online soziale Lexika am Beispiel von Wiktionary. Proceedings of 53rd Annual Conference of the Institut für Deutsche Sprache (IDS), March 14-16, Mannheim, Germany. In German. Title translates into: Scale-free online-social Lexika by Example of Wiktionary.

BibTeX

@inproceedings{Mehler:Gleim:Hemati:Uslu:2017,
  author    = {Alexander Mehler and Rüdiger Gleim and Wahed Hemati and Tolga Uslu},
  title     = {{Skalenfreie online soziale Lexika am Beispiel von Wiktionary}},
  booktitle = {Proceedings of 53rd Annual Conference of the Institut für Deutsche
               Sprache (IDS), March 14-16, Mannheim, Germany},
  editor    = {Stefan Engelberg and Henning Lobin and Kathrin Steyer and Sascha Wolfer},
  address   = {Berlin},
  publisher = {De Gruyter},
  note      = {In German. Title translates into: Scale-free
                   online-social Lexika by Example of Wiktionary},
  abstract  = {In English: The paper deals with characteristics of the structural,
               thematic and participatory dynamics of collaboratively generated
               lexical networks. This is done by example of Wiktionary. Starting
               from a network-theoretical model in terms of so-called multi-layer
               networks, we describe Wiktionary as a scale-free lexicon. Systems
               of this sort are characterized by the fact that their content-related
               dynamics is determined by the underlying dynamics of collaborating
               authors. This happens in a way that social structure imprints
               on content structure. According to this conception, the unequal
               distribution of the activities of authors results in a correspondingly
               unequal distribution of the information units documented within
               the lexicon. The paper focuses on foundations for describing such
               systems starting from a parameter space which requires to deal
               with Wiktionary as an issue in big data analysis. In German: Der
               Beitrag thematisiert Eigenschaften der strukturellen, thematischen
               und partizipativen Dynamik kollaborativ erzeugter lexikalischer
               Netzwerke am Beispiel von Wiktionary. Ausgehend von einem netzwerktheoretischen
               Modell in Form so genannter Mehrebenennetzwerke wird Wiktionary
               als ein skalenfreies Lexikon beschrieben. Systeme dieser Art zeichnen
               sich dadurch aus, dass ihre inhaltliche Dynamik durch die zugrundeliegende
               Kollaborationsdynamik bestimmt wird, und zwar so, dass sich die
               soziale Struktur der entsprechenden inhaltlichen Struktur aufprägt.
               Dieser Auffassung gemäß führt die Ungleichverteilung der Aktivitäten
               von Lexikonproduzenten zu einer analogen Ungleichverteilung der
               im Lexikon dokumentierten Informationseinheiten. Der Beitrag thematisiert
               Grundlagen zur Beschreibung solcher Systeme ausgehend von einem
               Parameterraum, welcher die netzwerkanalytische Betrachtung von
               Wiktionary als Big-Data-Problem darstellt.},
  year      = {2017}
}

Armin Hoenen, Steffen Eger and Ralf Gehrke. 2017. How Many Stemmata with Root Degree k?. Proceedings of the 15th Meeting on the Mathematics of Language, 11–21.

BibTeX

@inproceedings{Hoenen:Eger:Gehrke:2017,
  author    = {Hoenen, Armin and Eger, Steffen and Gehrke, Ralf},
  title     = {{How Many Stemmata with Root Degree k?}},
  booktitle = {Proceedings of the 15th Meeting on the Mathematics of Language},
  pages     = {11--21},
  publisher = {Association for Computational Linguistics},
  location  = {London, UK},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/How_Many_Stemmata_with_Root_Degree_k.pdf},
  url       = {http://aclweb.org/anthology/W17-3402},
  year      = {2017}
}

Armin Hoenen. 2017. Using Word Embeddings for Computing Distances Between Texts and for Authorship Attribution. International Conference on Applications of Natural Language to Information Systems, 274–277.

BibTeX

@inproceedings{Hoenen:2017:b,
  author    = {Hoenen, Armin},
  title     = {{Using Word Embeddings for Computing Distances Between Texts and
               for Authorship Attribution}},
  booktitle = {International Conference on Applications of Natural Language to
               Information Systems},
  pages     = {274--277},
  organization = {Springer},
  url       = {https://link.springer.com/chapter/10.1007/978-3-319-59569-6_33},
  year      = {2017}
}

Tolga Uslu, Wahed Hemati, Alexander Mehler and Daniel Baumartz. 2017. TextImager as a Generic Interface to R. Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2017).

BibTeX

@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
  author    = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
  title     = {{TextImager} as a Generic Interface to {R}},
  booktitle = {Software Demonstrations of the 15th Conference of the European
               Chapter of the Association for Computational Linguistics (EACL
               2017)},
  location  = {Valencia, Spain},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
  year      = {2017}
}

Armin Hoenen. 2017. Beyond the tree – a theoretical model of contamination and a software to generate multilingual stemmata. Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome.

BibTeX

@incollection{Hoenen:2017,
  author    = {Hoenen, Armin},
  title     = {{Beyond the tree – a theoretical model of contamination and a
               software to generate multilingual stemmata}},
  booktitle = {{Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome}},
  publisher = {AIUCD},
  url       = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf},
  year      = {2017}
}

Andy Lücking. 2017. Indexicals as Weak Descriptors. Proceedings of the 12th International Conference on Computational Semantics.

BibTeX

@inproceedings{Luecking:2017:c,
  author    = {L\"{u}cking, Andy},
  title     = {Indexicals as Weak Descriptors},
  booktitle = {Proceedings of the 12th International Conference on Computational Semantics},
  series    = {IWCS 2017},
  address   = {Montpellier (France)},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/descriptive-indexicals_rev.pdf},
  year      = {2017}
}

2016

Steffen Eger, Armin Hoenen and Alexander Mehler. 2016. Language classification from bilingual word embedding graphs. Proceedings of COLING 2016.

BibTeX

@inproceedings{Eger:Hoenen:Mehler:2016,
  author    = {Steffen Eger and Armin Hoenen and Alexander Mehler},
  title     = {Language classification from bilingual word embedding graphs},
  booktitle = {Proceedings of COLING 2016},
  publisher = {ACL},
  location  = {Osaka},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/eger_hoenen_mehler_COLING2016.pdf},
  year      = {2016}
}

Wahed Hemati, Tolga Uslu and Alexander Mehler. 2016. TextImager: a Distributed UIMA-based System for NLP. Proceedings of the COLING 2016 System Demonstrations.

BibTeX

@inproceedings{Hemati:Uslu:Mehler:2016,
  author    = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
  title     = {TextImager: a Distributed UIMA-based System for NLP},
  booktitle = {Proceedings of the COLING 2016 System Demonstrations},
  organization = {Federated Conference on Computer Science and
                   Information Systems},
  location  = {Osaka, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager2016.pdf},
  year      = {2016}
}

Andy Lücking. 2016. Modeling Co-Verbal Gesture Perception in Type Theory with Records. Proceedings of the 2016 Federated Conference on Computer Science and Information Systems, 8:383–392. Best Paper Award.

BibTeX

@inproceedings{Luecking:2016:b,
  author    = {L\"{u}cking, Andy},
  title     = {Modeling Co-Verbal Gesture Perception in Type Theory with Records},
  booktitle = {Proceedings of the 2016 Federated Conference on Computer Science
               and Information Systems},
  editor    = {M. Ganzha and L. Maciaszek and M. Paprzycki},
  volume    = {8},
  series    = {Annals of Computer Science and Information Systems},
  pages     = {383-392},
  address   = {Gdansk, Poland},
  publisher = {IEEE},
  note      = {Best Paper Award},
  doi       = {10.15439/2016F83},
  pdf       = {http://annals-csis.org/Volume_8/pliks/83.pdf},
  url       = {http://annals-csis.org/Volume_8/drp/83.html},
  year      = {2016}
}

Alexander Mehler, Tolga Uslu and Wahed Hemati. 2016. Text2voronoi: An Image-driven Approach to Differential Diagnosis. Proceedings of the 5th Workshop on Vision and Language (VL'16) hosted by the 54th Annual Meeting of the Association for Computational Linguistics (ACL), Berlin.

BibTeX

@inproceedings{Mehler:Uslu:Hemati:2016,
  author    = {Alexander Mehler and Tolga Uslu and Wahed Hemati},
  title     = {Text2voronoi: An Image-driven Approach to Differential Diagnosis},
  booktitle = {Proceedings of the 5th Workshop on Vision and Language (VL'16)
               hosted by the 54th Annual Meeting of the Association for Computational
               Linguistics (ACL), Berlin},
  pdf       = {https://aclweb.org/anthology/W/W16/W16-3212.pdf},
  year      = {2016}
}

Steffen Eger and Alexander Mehler. 2016. On the linearity of semantic change: Investigating meaning variation via dynamic graph models. Proceedings of ACL 2016.

BibTeX

@inproceedings{Eger:Mehler:2016,
  author    = {Steffen Eger and Alexander Mehler},
  title     = {On the linearity of semantic change: {I}nvestigating meaning variation
               via dynamic graph models},
  booktitle = {Proceedings of ACL 2016},
  location  = {Berlin},
  pdf       = {https://www.aclweb.org/anthology/P/P16/P16-2009.pdf},
  year      = {2016}
}

Steffen Eger, Tim vor der Brück and Alexander Mehler. 2016. A Comparison of Four Character-Level String-to-String Translation Models for (OCR) Spelling Error Correction. The Prague Bulletin of Mathematical Linguistics, 105:77–99.

BibTeX

@article{Eger:vorDerBrueck:Mehler:2016,
  author    = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
  title     = {A Comparison of Four Character-Level String-to-String Translation
               Models for (OCR) Spelling Error Correction},
  journal   = {The Prague Bulletin of Mathematical Linguistics},
  volume    = {105},
  pages     = {77-99},
  doi       = {10.1515/pralin-2016-0004},
  pdf       = {https://ufal.mff.cuni.cz/pbml/105/art-eger-vor-der-brueck.pdf},
  year      = {2016}
}

Armin Hoenen. 2016. Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity in Stemmas. Digital Humanities 2016: Conference Abstracts. Jagiellonian University & Pedagogical University, 557–560.

BibTeX

@inproceedings{Hoenen:2016DH,
  author    = {Hoenen, Armin},
  title     = {{Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity
               in Stemmas}},
  booktitle = {Digital Humanities 2016: Conference Abstracts. Jagiellonian University
               \& Pedagogical University},
  series    = {DH 2016},
  pages     = {557-560},
  abstract  = {In 1928, the philologue Joseph Bédier explored contemporary stemmas
               and found them to contain a suspiciously large amount of bifurcations.
               In this paper, the argument is investigated that, with a large
               amount of lost manuscripts, the amount of bifurcations in the
               true stemmas would naturally be high because the probability for
               siblings to survive becomes very low is assessed via a computer
               simulation.},
  location  = {Kraków},
  url       = {http://dh2016.adho.org/abstracts/311},
  year      = {2016}
}

Alexander Mehler, Benno Wagner and Rüdiger Gleim. 2016. Wikidition: Towards A Multi-layer Network Model of Intertextuality. Proceedings of DH 2016, 12-16 July.

BibTeX

@inproceedings{Mehler:Wagner:Gleim:2016,
  author    = {Mehler, Alexander and Wagner, Benno and Gleim, R\"{u}diger},
  title     = {Wikidition: Towards A Multi-layer Network Model of Intertextuality},
  booktitle = {Proceedings of DH 2016, 12-16 July},
  series    = {DH 2016},
  abstract  = {The paper presents Wikidition, a novel text mining tool for generating
               online editions of text corpora. It explores lexical, sentential
               and textual relations to span multi-layer networks (linkification)
               that allow for browsing syntagmatic and paradigmatic relations
               among the constituents of its input texts. In this way, relations
               of text reuse can be explored together with lexical relations
               within the same literary memory information system. Beyond that,
               Wikidition contains a module for automatic lexiconisation to extract
               author specific vocabularies. Based on linkification and lexiconisation,
               Wikidition does not only allow for traversing input corpora on
               different (lexical, sentential and textual) levels. Rather, its
               readers can also study the vocabulary of authors on several levels
               of resolution including superlemmas, lemmas, syntactic words and
               wordforms. We exemplify Wikidition by a range of literary texts
               and evaluate it by means of the apparatus of quantitative network
               analysis.},
  location  = {Kraków},
  url       = {http://dh2016.adho.org/abstracts/250},
  year      = {2016}
}

Tim vor der Brück and Alexander Mehler. 2016. TLT-CRF: A Lexicon-supported Morphological Tagger for Latin Based on Conditional Random Fields. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{vorderBrueck:Mehler:2016,
  author    = {vor der Br\"{u}ck, Tim and Mehler, Alexander},
  title     = {{TLT-CRF}: A Lexicon-supported Morphological Tagger for {Latin}
               Based on Conditional Random Fields},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {{Portoro\v{z} (Slovenia)}},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016_tagger.pdf},
  year      = {2016}
}

Steffen Eger, Rüdiger Gleim and Alexander Mehler. 2016. Lemmatization and Morphological Tagging in German and Latin: A comparison and a survey of the state-of-the-art. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{Eger:Mehler:Gleim:2016,
  author    = {Eger, Steffen and Gleim, R\"{u}diger and Mehler, Alexander},
  title     = {Lemmatization and Morphological Tagging in {German} and {Latin}:
               A comparison and a survey of the state-of-the-art},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec_eger_gleim_mehler.pdf},
  year      = {2016}
}

Andy Lücking, Alexander Mehler, Désirée Walther, Marcel Mauri and Dennis Kurfürst. 2016. Finding Recurrent Features of Image Schema Gestures: the FIGURE corpus. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{Luecking:Mehler:Walther:Mauri:Kurfuerst:2016,
  author    = {L\"{u}cking, Andy and Mehler, Alexander and Walther, D\'{e}sir\'{e}e
               and Mauri, Marcel and Kurf\"{u}rst, Dennis},
  title     = {Finding Recurrent Features of Image Schema Gestures: the {FIGURE} corpus},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-gesture-study-final-version-short.pdf},
  year      = {2016}
}

Andy Lücking, Armin Hoenen and Alexander Mehler. 2016. TGermaCorp – A (Digital) Humanities Resource for (Computational) Linguistics. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{Luecking:Hoenen:Mehler:2016,
  author    = {L\"{u}cking, Andy and Hoenen, Armin and Mehler, Alexander},
  title     = {{TGermaCorp} -- A (Digital) Humanities Resource for (Computational) Linguistics},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  islrn     = {536-382-801-278-5},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-ttgermacorp-final.pdf},
  year      = {2016}
}

Benno Wagner, Alexander Mehler and Hanno Biber. 2016. Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische Erschließung und digitale Visualisierung intertextueller Beziehungen digitaler Korpora. DHd 2016.

BibTeX

@inproceedings{Wagner:Mehler:Biber:2016,
  author    = {Wagner, Benno and Mehler, Alexander and Biber, Hanno},
  title     = {{Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische
               Erschließung und digitale Visualisierung intertextueller Beziehungen
               digitaler Korpora}},
  booktitle = {DHd 2016},
  url       = {http://www.dhd2016.de/abstracts/sektionen-005.html#index.xml-body.1_div.4},
  year      = {2016}
}

Alexander Mehler, Rüdiger Gleim, Tim vor der Brück, Wahed Hemati, Tolga Uslu and Steffen Eger. 2016. Wikidition: Automatic Lexiconization and Linkiﬁcation of Text Corpora. Information Technology, 58:70–79.

BibTeX

@article{Mehler:et:al:2016,
  author    = {Alexander Mehler and Rüdiger Gleim and Tim vor der Brück and Wahed Hemati
               and Tolga Uslu and Steffen Eger},
  title     = {Wikidition: Automatic Lexiconization and Linkiﬁcation of Text Corpora},
  journal   = {Information Technology},
  volume    = {58},
  pages     = {70-79},
  abstract  = {We introduce a new text technology, called Wikidition, which automatically
               generates large scale editions of corpora of natural language
               texts. Wikidition combines a wide range of text mining tools for
               automatically linking lexical, sentential and textual units. This
               includes the extraction of corpus-specific lexica down to the
               level of syntactic words and their grammatical categories. To
               this end, we introduce a novel measure of text reuse and exemplify
               Wikidition by means of the capitularies, that is, a corpus of
               Medieval Latin texts.},
  doi       = {10.1515/itit-2015-0035},
  year      = {2016}
}

Armin Hoenen. 2016. Wikipedia Titles As Noun Tag Predictors. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{Hoenen:2016x,
  author    = {Hoenen, Armin},
  title     = {{Wikipedia Titles As Noun Tag Predictors}},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/18_Paper.pdf},
  year      = {2016}
}

Armin Hoenen. 2016. Das erste dynamische Stemma, Pionier des digitalen Zeitalters?. Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Hoenen:2016y,
  author    = {Hoenen, Armin},
  title     = {Das erste dynamische Stemma, Pionier des digitalen Zeitalters?},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  url       = {http://www.dhd2016.de/abstracts/posters-060.html},
  year      = {2016}
}

Armin Hoenen, Alexander Mehler and Jost Gippert. 2016. Corpora and Resources for (Historical) Low Resource Languages. 31(2). JLCL.

BibTeX

@collection{GSCL:JLCL:2016:2,
  bibsource = {GSCL, http://www.gscl.info/},
  editor    = {Armin Hoenen and Alexander Mehler and Jost Gippert},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2017/10/Titelblatt-Heft2-2016.png},
  issn      = {2190-6858},
  number    = {2},
  pdf       = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
  publisher = {JLCL},
  title     = {{Corpora and Resources for (Historical) Low Resource Languages}},
  volume    = {31},
  year      = {2016}
}

Armin Hoenen, Alexander Mehler and Jost Gippert. 2016. Editorial. JLCL, 31(2):iii–iv.

BibTeX

@article{Hoenen:Mehler:Gippert:2016,
  author    = {Armin Hoenen and Alexander Mehler and Jost Gippert},
  title     = {{Editorial}},
  journal   = {JLCL},
  volume    = {31},
  number    = {2},
  pages     = {iii--iv},
  pdf       = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
  year      = {2016}
}

Armin Hoenen and Lela Samushia. 2016. Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch for Aiding Reconstruction. JLCL, 31(2):25–38.

BibTeX

@article{Hoenen:Samushia:2016,
  author    = {Armin Hoenen and Lela Samushia},
  title     = {{Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch
               for Aiding Reconstruction}},
  journal   = {JLCL},
  volume    = {31},
  number    = {2},
  pages     = {25--38},
  year      = {2016}
}

2015

Armin Hoenen and Franziska Mader. 2015. A New LMF Schema Application by Example of an Austrian Lexicon Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal. Historical Corpora.

BibTeX

@inproceedings{Hoenen:Mader:2015,
  author    = {Hoenen, Armin and Mader, Franziska},
  title     = {A New LMF Schema Application by Example of an Austrian Lexicon
               Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal},
  booktitle = {Historical Corpora},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/HoenenMader2013-a-new-lmf-schema-application.pdf},
  website   = {http://www.narr-shop.de/historical-corpora.html},
  year      = {2015}
}

Chris Biemann and Alexander Mehler, eds. 2015. Text Mining: From Ontology Learning to Automated Text Processing Applications. Festschrift in Honor of Gerhard Heyer. Theory and Applications of Natural Language Processing. Springer.

BibTeX

@book{Biemann:Mehler:2015,
  editor    = {Biemann, Chris and Mehler, Alexander},
  title     = {{Text Mining: From Ontology Learning to Automated Text Processing
               Applications. Festschrift in Honor of Gerhard Heyer}},
  publisher = {Springer},
  series    = {Theory and Applications of Natural Language Processing},
  address   = {Heidelberg},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextMiningsmall.jpg},
  year      = {2015}
}

Mohammad Zahurul Islam. 2015. PhD Thesis: Multilingual text classification using information-theoretic features.

BibTeX

@phdthesis{Islam:2015,
  author    = {Mohammad Zahurul Islam},
  title     = {Multilingual text classification using information-theoretic features},
  pages     = {189},
  year      = {2015},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/38157/thesis.pdf},
  abstract  = {The number of multilingual texts in the World Wide Web (WWW) is
               increasing dramatically and a multilingual economic zone like
               the European Union (EU) requires the availability of multilingual
               Natural Language Processing (NLP) tools. Due to a rapid development
               of NLP tools, many lexical, syntactic, semantic and other linguistic
               features have been used in different NLP applications. However,
               there are some situations where these features can not be used
               due the application type or unavailability of NLP resources for
               some of the languages. That is why an application that is intended
               to handle multilingual texts must have features that are not dependent
               on a particular language and specific linguistic tools. In this
               thesis, we will focus on two such applications: text readability
               and source and translation classification. In this thesis, we
               provide 18 features that are not only suitable for both applications,
               but are also language and linguistic tools independent. In order
               to build a readability classifier, we use texts from three different
               languages: English, German and Bangla. Our proposed features achieve
               a classification accuracy that is comparable with a classifier
               using 40 linguistic features. The readability classifier achieves
               a classification F-score of 74.21\% on the English Wikipedia corpus,
               an F-score of 75.47\% on the English textbook corpus, an F-score
               of 86.46\% on the Bangla textbook corpus and an F-score of 86.26\%
               on the German GEO/GEOLino corpus. We used more than two million
               sentence pairs from 21 European languages in order to build the
               source and translation classifier. The classifier using the same
               eighteen features achieves a classification accuracy of 86.63\%.
               We also used the same features to build a classifier that classifies
               translated texts based on their origin. The classifier achieves
               classification accuracy of 75\% for texts from 10 European languages.
               In this thesis, we also provide four different corpora, three
               for text readability analysis and one for corpus based translation
               studies.}
}

Natia Dundua, Armin Hoenen and Lela Samushia. 2015. A Parallel Corpus of the Old Georgian Gospel Manuscripts and their Stemmatology. The Georgian Journal for Language Logic Computation, IV:176–185.

BibTeX

@article{Dundua:Hoenen:Samushia:2015,
  author    = {Dundua, Natia and Hoenen, Armin and Samushia, Lela},
  title     = {{A Parallel Corpus of the Old Georgian Gospel Manuscripts and
               their Stemmatology}},
  journal   = {The Georgian Journal for Language Logic Computation},
  volume    = {IV},
  pages     = {176-185},
  publisher = {CLLS, Tbilisi State University and Kurt G{\"o}del
                   Society},
  year      = {2015}
}

Tim vor der Brück, Steffen Eger and Alexander Mehler. 2015. Complex Decomposition of the Negative Distance Kernel. IEEE International Conference on Machine Learning and Applications.

BibTeX

@inproceedings{vor:der:Bruck:Eger:Mehler:2015,
  author    = {vor der Br{\"u}ck, Tim and Eger, Steffen and Mehler, Alexander},
  title     = {Complex Decomposition of the Negative Distance Kernel},
  booktitle = {IEEE International Conference on Machine Learning and Applications},
  location  = {Miami, Florida, USA},
  year      = {2015}
}

Steffen Eger. 2015. Do we need bigram alignment models? On the effect of alignment quality on transduction accuracy in G2P. Proceedings of EMNLP.

BibTeX

@inproceedings{Eger:2015_EMNLP,
  author    = {Eger, Steffen},
  title     = {Do we need bigram alignment models? On the effect of alignment
               quality on transduction accuracy in G2P},
  booktitle = {Proceedings of EMNLP},
  year      = {2015},
  pdf       = {https://www.aclweb.org/anthology/D15-1139}
}

Tim vor der Brück and Steffen Eger. 2015. Deriving a primal form for the quadratic power kernel. Proceedings of the 38th German Conference on Artificial Intelligence (KI).

BibTeX

@inproceedings{vorDerBrueck:Eger:2015,
  author    = {vor der Brück, Tim and Eger, Steffen},
  title     = {Deriving a primal form for the quadratic power kernel},
  booktitle = {Proceedings of the 38th German Conference on Artificial Intelligence ({KI})},
  year      = {2015}
}

Steffen Eger. 2015. Improving G2P from Wiktionary and other (web) resources. Proceedings of Interspeech.

BibTeX

@inproceedings{Eger:2015_Interspeech,
  author    = {Eger, Steffen},
  title     = {Improving G2P from Wiktionary and other (web) resources},
  booktitle = {Proceedings of Interspeech},
  pdf       = {https://pdfs.semanticscholar.org/bba8/30015d9cbfc40b975c25d0ec186280da6ab0.pdf},
  year      = {2015}
}

Steffen Eger, Tim vor der Brück and Alexander Mehler. 2015. Lexicon-assisted tagging and lemmatization in Latin: A comparison of six taggers and two lemmatization methods. Proceedings of the 9th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH 2015).

BibTeX

@inproceedings{Eger:vor:der:Brueck:Mehler:2015,
  author    = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
  title     = {Lexicon-assisted tagging and lemmatization in {Latin}: A comparison
               of six taggers and two lemmatization methods},
  booktitle = {Proceedings of the 9th Workshop on Language Technology for Cultural
               Heritage, Social Sciences, and Humanities ({LaTeCH 2015})},
  address   = {Beijing, China},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Lexicon-assisted_tagging.pdf},
  year      = {2015}
}

Alexander Mehler, Andy Lücking, Sven Banisch, Philippe Blanchard and Barbara Frank-Job, eds. 2015. Towards a Theoretical Framework for Analyzing Complex Linguistic Networks. Understanding Complex Systems. Springer.

BibTeX

@book{Mehler:Luecking:Banisch:Blanchard:Frank-Job:2015,
  editor    = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
               and Frank-Job, Barbara},
  title     = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
  publisher = {Springer},
  series    = {Understanding Complex Systems},
  adress    = {Berlin and New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/UCS_17-2-tmp.png},
  isbn      = {978-36-662-47237-8},
  year      = {2015}
}

Alexander Mehler and Rüdiger Gleim. 2015. Linguistic Networks – An Online Platform for Deriving Collocation Networks from Natural Language Texts. Towards a Theoretical Framework for Analyzing Complex Linguistic Networks.

BibTeX

@incollection{Mehler:Gleim:2015:a,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {Linguistic Networks -- An Online Platform for Deriving Collocation
               Networks from Natural Language Texts},
  booktitle = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
               and Frank-Job, Barbara},
  series    = {Understanding Complex Systems},
  year      = {2015}
}

Steffen Eger. 2015. Multiple Many-To-Many Sequence Alignment For Combining String-Valued Variables: A G2P Experiment. ACL.

BibTeX

@inproceedings{Eger:2015_ACL,
  author    = {Eger, Steffen},
  title     = {Multiple Many-To-Many Sequence Alignment For Combining String-Valued
               Variables: A G2P Experiment},
  booktitle = {ACL},
  publisher = {Association for Computational Linguistics},
  year      = {2015}
}

Steffen Eger. 2015. Designing and comparing G2P-type lemmatizers for a morphology-rich language. .

BibTeX

@inproceedings{Eger:2015_SFCM,
  author    = {Eger, Steffen},
  title     = {Designing and comparing G2P-type lemmatizers for a morphology-rich language},
  publisher = {Fourth International Workshop on Systems and
                   Frameworks for Computational Morphology},
  year      = {2015}
}

Steffen Eger, Niko Schenk and Alexander Mehler. June, 2015. Towards Semantic Language Classification: Inducing and Clustering Semantic Association Networks from Europarl. Proceedings of the Fourth Joint Conference on Lexical and Computational Semantics, 127–136.

BibTeX

@inproceedings{Eger:Schenk:Mehler:2015,
  author    = {Eger, Steffen and Schenk, Niko and Mehler, Alexander},
  title     = {Towards Semantic Language Classification: Inducing and Clustering
               Semantic Association Networks from Europarl},
  booktitle = {Proceedings of the Fourth Joint Conference on Lexical and Computational
               Semantics},
  pages     = {127--136},
  publisher = {Association for Computational Linguistics},
  month     = {June},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/starsem2015-corrected-version.pdf},
  url       = {http://www.aclweb.org/anthology/S15-1014},
  year      = {2015}
}

Steffen Eger. 2015. Identities for Partial Bell Polynomials Derived from Identities for Weighted Integer Compositions.. Aequationes Mathematicae.

BibTeX

@article{Eger:2015b,
  author    = {Eger, Steffen},
  title     = {Identities for Partial Bell Polynomials Derived from Identities
               for Weighted Integer Compositions.},
  journal   = {Aequationes Mathematicae},
  doi       = {10.1007/s00010-015-0338-2},
  year      = {2015}
}

Steffen Eger. 2015. Some Elementary Congruences for the Number of Weighted Integer Compositions.. Journal of Integer Sequences (electronic only), 18(4).

BibTeX

@article{Eger:2015a,
  author    = {Eger, Steffen},
  title     = {Some Elementary Congruences for the Number of Weighted Integer Compositions.},
  journal   = {Journal of Integer Sequences (electronic only)},
  volume    = {18},
  number    = {4},
  pdf       = {https://cs.uwaterloo.ca/journals/JIS/VOL18/Eger/eger11.pdf},
  publisher = {School of Computer Science, University of Waterloo,
                   Waterloo, ON},
  year      = {2015}
}

Andy Lücking, Thies Pfeiffer and Hannes Rieser. 2015. Pointing and Reference Reconsidered. Journal of Pragmatics, 77:56–79.

BibTeX

@article{Luecking:Pfeiffer:Rieser:2015,
  author    = {Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes},
  title     = {Pointing and Reference Reconsidered},
  journal   = {Journal of Pragmatics},
  volume    = {77},
  pages     = {56-79},
  abstract  = {Current semantic theory on indexical expressions claims that demonstratively
               used indexicals such as this lack a referent-determining meaning
               but instead rely on an accompanying demonstration act like a pointing
               gesture. While this view allows to set up a sound logic of demonstratives,
               the direct-referential role assigned to pointing gestures has
               never been scrutinized thoroughly in semantics or pragmatics.
               We investigate the semantics and pragmatics of co-verbal pointing
               from a foundational perspective combining experiments, statistical
               investigation, computer simulation and theoretical modeling techniques
               in a novel manner. We evaluate various referential hypotheses
               with a corpus of object identification games set up in experiments
               in which body movement tracking techniques have been extensively
               used to generate precise pointing measurements. Statistical investigation
               and computer simulations show that especially distal areas in
               the pointing domain falsify the semantic direct-referential hypotheses
               concerning pointing gestures. As an alternative, we propose that
               reference involving pointing rests on a default inference which
               we specify using the empirical data. These results raise numerous
               problems for classical semantics–pragmatics interfaces: we argue
               for pre-semantic pragmatics in order to account for inferential
               reference in addition to classical post-semantic Gricean pragmatics.},
  doi       = {10.1016/j.pragma.2014.12.013},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Luecking_Pfeiffer_Rieser_Pointing_and_Reference_Reconsiderd.pdf},
  website   = {http://www.sciencedirect.com/science/article/pii/S037821661500003X},
  year      = {2015}
}

Alexander Mehler, Tim vor der Brück, Rüdiger Gleim and Tim Geelhaar. 2015. Towards a Network Model of the Coreness of Texts: An Experiment in Classifying Latin Texts using the TTLab Latin Tagger. Text Mining: From Ontology Learning to Automated text Processing Applications, 87–112.

BibTeX

@incollection{Mehler:Brueck:Gleim:Geelhaar:2015,
  author    = {Mehler, Alexander and vor der Brück, Tim and Gleim, Rüdiger and Geelhaar, Tim},
  title     = {Towards a Network Model of the Coreness of Texts: An Experiment
               in Classifying Latin Texts using the TTLab Latin Tagger},
  booktitle = {Text Mining: From Ontology Learning to Automated text Processing Applications},
  publisher = {Springer},
  editor    = {Chris Biemann and Alexander Mehler},
  series    = {Theory and Applications of Natural Language Processing},
  pages     = {87-112},
  address   = {Berlin/New York},
  abstract  = {The analysis of longitudinal corpora of historical texts requires
               the integrated development of tools for automatically preprocessing
               these texts and for building representation models of their genre-
               and register-related dynamics. In this chapter we present such
               a joint endeavor that ranges from resource formation via preprocessing
               to network-based text representation and classification. We start
               with presenting the so-called TTLab Latin Tagger (TLT) that preprocesses
               texts of classical and medieval Latin. Its lexical resource in
               the form of the Frankfurt Latin Lexicon (FLL) is also briefly
               introduced. As a first test case for showing the expressiveness
               of these resources, we perform a tripartite classification task
               of authorship attribution, genre detection and a combination thereof.
               To this end, we introduce a novel text representation model that
               explores the core structure (the so-called coreness) of lexical
               network representations of texts. Our experiment shows the expressiveness
               of this representation format and mediately of our Latin preprocessor.},
  website   = {http://link.springer.com/chapter/10.1007/978-3-319-12655-5_5},
  year      = {2015}
}

Armin Hoenen. 2015. Das artifizielle Manuskriptkorpus TASCFE. Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Hoenen:2015,
  author    = {Hoenen, Armin},
  title     = {Das artifizielle Manuskriptkorpus TASCFE},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Hoenen_tascfeDH2015.pdf},
  year      = {2015}
}

Rüdiger Gleim and Alexander Mehler. 2015. TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung von Texten und deren Evaluation. Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Gleim:Mehler:2015,
  author    = {Gleim, Rüdiger and Mehler, Alexander},
  title     = {TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung
               von Texten und deren Evaluation},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Gleim_Mehler_PrePro_DHGraz2015.pdf},
  year      = {2015}
}

Giuseppe Abrami, Alexander Mehler and Susanne Zeunert. 2015. Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator. Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Abrami:Mehler:Zeunert:2015:a,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Zeunert, Susanne},
  title     = {Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator},
  booktitle = {Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Abrami_Mehler_Zeunert_DHd_2015_abstract.pdf},
  year      = {2015}
}

Giuseppe Abrami, Alexander Mehler and Dietmar Pravida. 2015. Fusing Text and Image Data with the Help of the OWLnotator. Human Interface and the Management of Information. Information and Knowledge Design, 9172:261–272.

BibTeX

@incollection{Abrami:Mehler:Pravida:2015:b,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar},
  title     = {Fusing Text and Image Data with the Help of the OWLnotator},
  booktitle = {Human Interface and the Management of Information. Information
               and Knowledge Design},
  publisher = {Springer International Publishing},
  editor    = {Yamamoto, Sakae},
  volume    = {9172},
  series    = {Lecture Notes in Computer Science},
  pages     = {261-272},
  doi       = {10.1007/978-3-319-20612-7_25},
  isbn      = {978-3-319-20611-0},
  language  = {English},
  website   = {http://dx.doi.org/10.1007/978-3-319-20612-7_25},
  year      = {2015}
}

Armin Hoenen. 2015. Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora. Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL HLT). Citation: Trovato is published in 2014 not in 2009..

BibTeX

@inproceedings{Hoenen:2015a,
  author    = {Hoenen, Armin},
  title     = {Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora},
  booktitle = {Proceedings of the 2015 Conference of the North American Chapter
               of the Association for Computational Linguistics: Human Language
               Technologies (NAACL HLT)},
  note      = {Citation: Trovato is published in 2014 not in 2009.},
  abstract  = {Two goals are targeted by computer philology for ancient manuscript
               corpora: firstly, making an edition, that is roughly speaking
               one text version representing the whole corpus, which contains
               variety induced through copy errors and other processes and secondly,
               producing a stemma. A stemma is a graph-based visualization of
               the copy history with manuscripts as nodes and copy events as
               edges. Its root, the so-called archetype is the supposed original
               text or urtext from which all subsequent copies are made. Our
               main contribution is to present one of the first computational
               approaches to automatic archetype reconstruction and to introduce
               the first text-based evaluation for automatically produced archetypes.
               We compare a philologically generated archetype with one generated
               by bio-informatic software.},
  website   = {http://www.aclweb.org/anthology/N15-1127},
  year      = {2015}
}

Armin Hoenen. 2015. Simulating Misreading. Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB).

BibTeX

@inproceedings{Hoenen:2015b,
  author    = {Hoenen, Armin},
  title     = {Simulating Misreading},
  booktitle = {Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS
               OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB)},
  abstract  = {Physical misreading (as opposed to interpretational misreading)
               is an unnoticed substitution in silent reading. Especially for
               legally important documents or instruction manuals, this can lead
               to serious consequences. We present a prototype of an automatic
               highlighter targeting words which can most easily be misread in
               a given text using a dynamic orthographic neighbour concept. We
               propose measures of fit of a misread token based on Natural Language
               Processing and detect a list of short most easily misread tokens
               in the English language. We design a highlighting scheme for avoidance
               of misreading.},
  website   = {http://link.springer.com/chapter/10.1007/978-3-319-19581-0_34},
  year      = {2015}
}

Giuseppe Abrami, Michael Freiberg and Paul Warner. 2015. Managing and Annotating Historical Multimodal Corpora with the eHumanities Desktop - An outline of the current state of the LOEWE project Illustrations of Goethe s Faust. Historical Corpora, 353 – 363.

BibTeX

@inproceedings{Abrami:Freiberg:Warner:2015,
  author    = {Abrami, Giuseppe and Freiberg, Michael and Warner, Paul},
  title     = {Managing and Annotating Historical Multimodal Corpora with the
               eHumanities Desktop - An outline of the current state of the LOEWE
               project Illustrations of Goethe s Faust},
  booktitle = {Historical Corpora},
  pages     = {353 - 363},
  abstract  = {Text corpora are structured sets of text segments that can be
               annotated or interrelated. Expanding on this, we can define a
               database of images as an iconographic multimodal corpus with annotated
               images and the relations between images as well as between images
               and texts. The Goethe-Museum in Frankfurt holds a significant
               collection of art work and texts relating to Goethe’s Faust from
               the early 19th century until the present. In this project we create
               a database containing digitized items from this collection, and
               extend a tool, the ImageDB in the eHumanities Desktop, to annotate
               and provide relations between resources. This article gives an
               overview of the project and provides some technical details. Furthermore
               we show newly implemented features, explain the challenge of creating
               an ontology on multimodal corpora and give a forecast for future
               work.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/AbramiFreibergWarner_HC_2012.pdf},
  website   = {http://www.narr-shop.de/historical-corpora.html},
  year      = {2015}
}

2014

Armin Hoenen. 2014. Stemmatology, an interdisciplinary endeavour. Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities.

BibTeX

@incollection{Hoenen:2014plz,
  author    = {Hoenen, Armin},
  title     = {{Stemmatology, an interdisciplinary endeavour}},
  booktitle = {{Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities}},
  publisher = {DHd},
  url       = {http://dhd-wp.hab.de/files/book_of_abstracts.pdf},
  year      = {2014}
}

Xinying Chen. 2014. Language as a whole – A new framework for linguistic knowledge integration: Comment on "Approaching human language with complex networks" by Cong and Liu. Physics of Life Reviews, 11(4):628–629.

BibTeX

@article{Chen:2014:a,
  author    = {Chen, Xinying},
  title     = {Language as a whole -- A new framework for linguistic knowledge
               integration: Comment on "Approaching human language with complex
               networks" by {Cong} and {Liu}},
  journal   = {Physics of Life Reviews},
  volume    = {11},
  number    = {4},
  pages     = {628-629},
  doi       = {10.1016/j.plrev.2014.07.011},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Language-as-a-whole-Chen.pdf},
  url       = {http://www.sciencedirect.com/science/article/pii/S1571064514001249},
  year      = {2014}
}

Tao Gong, Yau Wai Lam, Xinying Chen and Menghan Zhang. 2014. Review: Evolutionary Linguistics in the Past Two Decades – EVOLANG10: the 10th International Conference on Language Evolution. Journal of Chinese Linguistics, 42(2):499–530.

BibTeX

@article{Gong:Lam:Chen:Zhang:2014,
  author    = {Gong, Tao and Lam, Yau Wai and Chen, Xinying and Zhang, Menghan},
  title     = {Review: Evolutionary Linguistics in the Past Two Decades -- EVOLANG10:
               the 10th International Conference on Language Evolution},
  journal   = {Journal of Chinese Linguistics},
  volume    = {42},
  number    = {2},
  pages     = {499-530},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/JCL-EvolangReview.pdf},
  year      = {2014}
}

Giuseppe Abrami, Alexander Mehler, Dietmar Pravida and Susanne Zeunert. December, 2014. Rubrik: Neues aus dem Netz. Kunstchronik, 12:623.

BibTeX

@article{Abrami:Mehler:Pravida:Zeunert:2014,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar and Zeunert, Susanne},
  title     = {Rubrik: Neues aus dem Netz},
  journal   = {Kunstchronik},
  volume    = {12},
  pages     = {623},
  address   = {München},
  month     = {12},
  publisher = {Zentralinstitut für Kunstgeschichte},
  website   = {http://www.zikg.eu/publikationen/laufende-publikationen/kunstchronik},
  year      = {2014}
}

Steffen Eger. 2014. A proof of the Mann-Shanks primality criterion conjecture for extended binomial coefficients. Integers: The Electronic Journal of Combinatorial Number Theory, 14.

BibTeX

@article{Eger:2014:a,
  author    = {Eger, Steffen},
  title     = {A proof of the Mann-Shanks primality criterion conjecture for
               extended binomial coefficients},
  journal   = {Integers: The Electronic Journal of Combinatorial
                   Number Theory},
  volume    = {14},
  abstract  = {We show that the Mann-Shanks primality criterion holds for weighted
               extended binomial coefficients (which count the number of weighted
               integer compositions), not only for the ordinary binomial coefficients.},
  pdf       = {http://www.emis.de/journals/INTEGERS/papers/o60/o60.pdf},
  website   = {http://www.emis.de/journals/INTEGERS/vol14.html},
  year      = {2014}
}

Steffen Eger. 2014. Stirling's approximation for central extended binomial coefficients.. The American Mathematical Monthly, 121(4):344–349.

BibTeX

@article{Eger:2014:b,
  author    = {Eger, Steffen},
  title     = {Stirling's approximation for central extended binomial coefficients.},
  journal   = {The American Mathematical Monthly},
  volume    = {121},
  number    = {4},
  pages     = {344-349},
  abstract  = {We derive asymptotic formulas for central extended binomial coefficients,
               which are generalizations of binomial coefficients, using the
               distribution of the sum of independent discrete uniform random
               variables with the Central Limit Theorem and a local limit variant.},
  website   = {http://www.jstor.org/stable/10.4169/amer.math.monthly.121.04.344},
  year      = {2014}
}

Alexander Mehler. 2014. On the Expressiveness, Validity and Reproducibility of Models of Language Evolution. Comment on 'Modelling language evolution: Examples and predictions' by Tao Gong, Shuai Lan, and Menghan Zhang. Physics of Life Review.

BibTeX

@article{Mehler:2014,
  author    = {Mehler, Alexander},
  title     = {On the Expressiveness, Validity and Reproducibility of Models
               of Language Evolution. Comment on 'Modelling language evolution:
               Examples and predictions' by Tao Gong, Shuai Lan, and Menghan
               Zhang},
  journal   = {Physics of Life Review},
  abstract  = {},
  pdf       = {http://www.sciencedirect.com/science/article/pii/S1571064514000529/pdfft?md5=6a2cbbfc083d7bc3adfd26d431cc55d8&pid=1-s2.0-S1571064514000529-main.pdf},
  website   = {https://www.researchgate.net/publication/261290946_On_the_expressiveness_validity_and_reproducibility_of_models_of_language_evolution_Comment_on_Modelling_language_evolution_Examples_and_predictions_by_Tao_Gong_Shuai_Lan_and_Menghan_Zhang},
  year      = {2014}
}

Chris Biemann, Gregory R. Crane, Christiane D. Fellbaum and Alexander Mehler. 2014. Computational Humanities - bridging the gap between Computer Science and Digital Humanities (Dagstuhl Seminar 14301). Dagstuhl Reports, 4(7):80–111.

BibTeX

@article{Biemann:Crane:Fellbaum:Mehler:2014,
  author    = {Chris Biemann and Gregory R. Crane and Christiane D. Fellbaum
               and Alexander Mehler},
  title     = {Computational Humanities - bridging the gap between Computer Science
               and Digital Humanities (Dagstuhl Seminar 14301)},
  journal   = {Dagstuhl Reports},
  volume    = {4},
  number    = {7},
  pages     = {80-111},
  abstract  = {Research in the field of Digital Humanities, also known as Humanities
               Computing, has seen a steady increase over the past years. Situated
               at the intersection of computing science and the humanities, present
               efforts focus on making resources such as texts, images, musical
               pieces and other semiotic artifacts digitally available, searchable
               and analysable. To this end, computational tools enabling textual
               search, visual analytics, data mining, statistics and natural
               language processing are harnessed to support the humanities researcher.
               The processing of large data sets with appropriate software opens
               up novel and fruitful approaches to questions in the traditional
               humanities. This report summarizes the Dagstuhl seminar 14301
               on “Computational Humanities – bridging the gap between Computer
               Science and Digital Humanities”},
  issn      = {2192-5283},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dagrep_v004_i007_p080_s14301.pdf},
  publisher = {Schloss Dagstuhl--Leibniz-Zentrum für Informatik},
  year      = {2014}
}

Md. Zahurul Islam, Md. Rashedur Rahman and Alexander Mehler. 2014. Readability Classification of Bangla Texts. 15th International Conference on Intelligent Text Processing and Computational Linguistics (cicLing), Kathmandu, Nepal.

BibTeX

@inproceedings{Islam:Rahman:Mehler:2014,
  author    = {Islam, Md. Zahurul and Rahman, Md. Rashedur and Mehler, Alexander},
  title     = {Readability Classification of Bangla Texts},
  booktitle = {15th International Conference on Intelligent Text Processing and
               Computational Linguistics (cicLing), Kathmandu, Nepal},
  abstract  = {Readability classification is an important application of Natural
               Language Processing. It aims at judging the quality of documents
               and to assist writers to identify possible problems. This paper
               presents a readability classifier for Bangla textbooks using information-theoretic
               and lexical features. All together 18 features are explored to
               achieve an F-score of 86.46},
  year      = {2014}
}

Alexander Mehler, Tim vor der Brück and Andy Lücking. 2014. Comparing Hand Gesture Vocabularies for HCI. Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece.

BibTeX

@incollection{Mehler:vor:der:Brueck:Luecking:2014,
  author    = {Mehler, Alexander and vor der Brück, Tim and Lücking, Andy},
  title     = {Comparing Hand Gesture Vocabularies for HCI},
  booktitle = {Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece},
  publisher = {Springer},
  address   = {Berlin/New York},
  abstract  = {HCI systems are often equipped with gestural interfaces drawing
               on a predefined set of admitted gestures. We provide an assessment
               of the fitness of such gesture vocabularies in terms of their
               learnability and naturalness. This is done by example of rivaling
               gesture vocabularies of the museum information system WikiNect.
               In this way, we do not only provide a procedure for evaluating
               gesture vocabularies, but additionally contribute to design criteria
               to be followed by the gestures.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Comparing-Gesture-Vocabularies-1_1.pdf},
  website   = {{http://link.springer.com/chapter/10.1007/978-3-319-07230-2_8#page-1}},
  year      = {2014}
}

Alexander Mehler, Andy Lücking and Giuseppe Abrami. 2014. WikiNect: Image Schemata as a Basis of Gestural Writing for Kinetic Museum Wikis. Universal Access in the Information Society, 1–17.

BibTeX

@article{Mehler:Luecking:Abrami:2014,
  author    = {Mehler, Alexander and Lücking, Andy and Abrami, Giuseppe},
  title     = {{WikiNect}: Image Schemata as a Basis of Gestural Writing for
               Kinetic Museum Wikis},
  journal   = {Universal Access in the Information Society},
  pages     = {1-17},
  abstract  = {This paper provides a theoretical assessment of gestures in the
               context of authoring image-related hypertexts by example of the
               museum information system WikiNect. To this end, a first implementation
               of gestural writing based on image schemata is provided (Lakoff
               in Women, fire, and dangerous things: what categories reveal about
               the mind. University of Chicago Press, Chicago, 1987). Gestural
               writing is defined as a sort of coding in which propositions are
               only expressed by means of gestures. In this respect, it is shown
               that image schemata allow for bridging between natural language
               predicates and gestural manifestations. Further, it is demonstrated
               that gestural writing primarily focuses on the perceptual level
               of image descriptions (Hollink et al. in Int J Hum Comput Stud
               61(5):601–626, 2004). By exploring the metaphorical potential
               of image schemata, it is finally illustrated how to extend the
               expressiveness of gestural writing in order to reach the conceptual
               level of image descriptions. In this context, the paper paves
               the way for implementing museum information systems like WikiNect
               as systems of kinetic hypertext authoring based on full-fledged
               gestural writing.},
  doi       = {10.1007/s10209-014-0386-8},
  issn      = {1615-5289},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/art_10.1007_s10209-014-0386-8.pdf},
  website   = {http://dx.doi.org/10.1007/s10209-014-0386-8},
  year      = {2014}
}

Tim vor der Brück, Alexander Mehler and Md. Zahurul Islam. 2014. ColLex.EN: Automatically Generating and Evaluating a Full-form Lexicon for English. Proceedings of LREC 2014.

BibTeX

@inproceedings{vor:der:Brueck:Mehler:Islam:2014,
  author    = {vor der Brück, Tim and Mehler, Alexander and Islam, Md. Zahurul},
  title     = {ColLex.EN: Automatically Generating and Evaluating a Full-form
               Lexicon for English},
  booktitle = {Proceedings of LREC 2014},
  address   = {Reykjavik, Iceland},
  abstract  = {Currently, a large number of different lexica is available for
               English. However, substantial and freely available fullform lexica
               with a high number of named entities are rather rare even in the
               case of this lingua franca. Existing lexica are often limited
               in several respects as explained in Section 2. What is missing
               so far is a freely available substantial machine-readable lexical
               resource of English that contains a high number of word forms
               and a large collection of named entities. In this paper, we describe
               a procedure to generate such a resource by example of English.
               This lexicon, henceforth called ColLex.EN (for Collecting Lexica
               for English ), will be made freely available to the public 1.
               In this paper, we describe how ColLex.EN was collected from existing
               lexical resources and specify the statistical procedures that
               we developed to extend and adjust it. No manual modifications
               were done on the generated word forms and lemmas. Our fully automatic
               procedure has the advantage that whenever new versions of the
               source lexica are available, a new version of ColLex.EN can be
               automatically generated with low effort.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/vdbrueck_mehler_islam_collex_lrec.pdf},
  website   = {
                   http://aclanthology.info/papers/collex-en-automatically-generating-and-evaluating-a-full-form-lexicon-for-english},
  year      = {2014}
}

Armin Hoenen. 2014. Simulation of Scribal Letter Substitution. Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches.

BibTeX

@inproceedings{Hoenen:2014,
  author    = {Hoenen, Armin},
  title     = {Simulation of Scribal Letter Substitution},
  booktitle = {Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches},
  editor    = {T.L Andrews and C.Macé},
  owner     = {hoenen},
  website   = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503552682-1},
  year      = {2014}
}

2013

Ineta Sejane and Steffen Eger. 2013. Semantic typologies by means of network analysis of bilingual dictionaries. Approaches to Measuring Linguistic Differences, 447–474.

BibTeX

@incollection{Sejane:Eger:2013,
  author    = {Sejane, Ineta and Eger, Steffen},
  title     = {Semantic typologies by means of network analysis of bilingual dictionaries},
  booktitle = {Approaches to Measuring Linguistic Differences},
  publisher = {De Gruyter},
  editor    = {Borin, Lars and Saxena, Anju},
  pages     = {447-474},
  bibtexkey = {eger-sejane_network-typologies2013},
  doi       = {10.1515/9783110305258.447},
  inlg      = {English [eng]},
  src       = {degruyter},
  srctrickle = {degruyter#/books/9783110305258/9783110305258.447/9783110305258.447.xml},
  url       = {http://www.degruyter.com/view/books/9783110305258/9783110305258.447/9783110305258.447.xml},
  year      = {2013}
}

Steffen Eger. 2013. Sequence Segmentation by Enumeration: An Exploration.. Prague Bull. Math. Linguistics, 100:113–131.

BibTeX

@article{Eger:2013:a,
  author    = {Eger, Steffen},
  title     = {Sequence Segmentation by Enumeration: An Exploration.},
  journal   = {Prague Bull. Math. Linguistics},
  volume    = {100},
  pages     = {113-131},
  abstract  = {We investigate exhaustive enumeration and subsequent language
               model evaluation (E\&E approach) as an alternative to solving
               the sequence segmentation problem. We show that, under certain
               conditions (on string lengths and regarding a possibility to accurately
               estimate the number of segments), which are satisfied for important
               NLP applications, such as phonological segmentation, syllabification,
               and morphological segmentation, the E\&E approach is feasible
               and promises superior results than the standard sequence labeling
               approach to sequence segmentation.},
  pdf       = {http://ufal.mff.cuni.cz/pbml/100/art-eger.pdf},
  year      = {2013}
}

Steffen Eger. 2013. A Contribution to the Theory of Word Length Distribution Based on a Stochastic Word Length Distribution Model.. Journal of Quantitative Linguistics, 20(3):252–265.

BibTeX

@article{Eger:2013:b,
  author    = {Eger, Steffen},
  title     = {A Contribution to the Theory of Word Length Distribution Based
               on a Stochastic Word Length Distribution Model.},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {20},
  number    = {3},
  pages     = {252-265},
  abstract  = {We derive a stochastic word length distribution model based on
               the concept of compound distributions and show its relationships
               with and implications for Wimmer et al. ’s (1994) synergetic word
               length distribution model.},
  year      = {2013}
}

Steffen Eger. 2013. Sequence alignment with arbitrary steps and further generalizations, with applications to alignments in linguistics.. Information Sciences, 237:287–304.

BibTeX

@article{Eger:2013:c,
  author    = {Eger, Steffen},
  title     = {Sequence alignment with arbitrary steps and further generalizations,
               with applications to alignments in linguistics.},
  journal   = {Information Sciences},
  volume    = {237},
  pages     = {287-304},
  abstract  = {We provide simple generalizations of the classical Needleman–Wunsch
               algorithm for aligning two sequences. First, we let both sequences
               be defined over arbitrary, potentially different alphabets. Secondly,
               we consider similarity functions between elements of both sequences
               with ranges in a semiring. Thirdly, instead of considering only
               ‘match’, ‘mismatch’ and ‘skip’ operations, we allow arbitrary
               non-negative alignment ‘steps’ S. Next, we present novel combinatorial
               formulas for the number of monotone alignments between two sequences
               for selected steps S. Finally, we illustrate sample applications
               in natural language processing that require larger steps than
               available in the original Needleman–Wunsch sequence alignment
               procedure such that our generalizations can be fruitfully adopted.},
  website   = {http://www.sciencedirect.com/science/article/pii/S0020025513001485},
  year      = {2013}
}

Steffen Eger. 2013. Restricted weighted integer compositions and extended binomial coefficients.. Journal of Integer Sequences (electronic only), 16(1).

BibTeX

@article{Eger:2013:d,
  author    = {Eger, Steffen},
  title     = {Restricted weighted integer compositions and extended binomial coefficients.},
  journal   = {Journal of Integer Sequences (electronic only)},
  volume    = {16},
  number    = {1},
  abstract  = {We prove a simple relationship between extended binomial coefficients
               — natural extensions of the well-known binomial coefficients —
               and weighted restricted integer compositions. Moreover, wegiveaveryuseful
               interpretation ofextendedbinomial coefficients as representing
               distributions of sums of independent discrete random variables.
               We apply our results, e.g., to determine the distribution of the
               sum of k logarithmically distributed random variables, and to
               determining the distribution, specifying all moments, of the random
               variable whose values are part-products of random restricted integer
               compositions. Based on our findings and using the central limit
               theorem, we also give generalized Stirling formulae for central
               extended binomial coefficients. We enlarge the list of known properties
               of extended binomial coefficients.},
  issn      = {1530-7638},
  pdf       = {https://cs.uwaterloo.ca/journals/JIS/VOL16/Eger/eger6.pdf},
  publisher = {School of Computer Science, University of Waterloo,
                   Waterloo, ON},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.397.3745},
  year      = {2013}
}

Alexander Mehler, Roman Schneider and Angelika Storrer. 2013. Webkorpora in Computerlinguistik und Sprachforschung. Ed. by Roman Schneider, Angelika Storrer and Alexander Mehler.Journal for Language Technology and Computational Linguistics (JLCL), 28(2). JLCL.

BibTeX

@book{Schneider:Storrer:Mehler:2013,
  author    = {Mehler, Alexander and Schneider, Roman and Storrer, Angelika},
  editor    = {Roman Schneider and Angelika Storrer and Alexander Mehler},
  title     = {Webkorpora in Computerlinguistik und Sprachforschung},
  publisher = {JLCL},
  volume    = {28},
  number    = {2},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Webkorpora-300-20.png},
  issn      = {2190-6858},
  pagetotal = {107},
  pdf       = {http://www.jlcl.org/2013_Heft2/H2013-2.pdf},
  year      = {2013}
}

Alexander Mehler, Andy Lücking, Tim vor der Brück and Giuseppe Abrami. November, 2013. WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors.

BibTeX

@misc{Mehler:Luecking:vor:der:Brueck:2013:a,
  author    = {Mehler, Alexander and Lücking, Andy and vor der Brück, Tim and Abrami, Giuseppe},
  title     = {WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors},
  howpublished = {Poster Presentation at the Scientific Computing and
                   Cultural Heritage 2013 Conference, Heidelberg},
  keywords  = {wikinect},
  month     = {11},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SCCHPoster2013.pdf},
  url       = {http://scch2013.wordpress.com/},
  year      = {2013}
}

Andy Lücking. May, 2013. Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz von Gestik in der Aphasietherapie.

BibTeX

@misc{Luecking:2013:c,
  author    = {Lücking, Andy},
  title     = {Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz
               von Gestik in der Aphasietherapie},
  howpublished = {Talk at the BKL workshop 2013, Bochum},
  month     = {05},
  url       = {http://www.bkl-ev.de/bkl_workshop/archiv/workshop13_programm.php},
  year      = {2013}
}

Andy Lücking. October, 2013. Eclectic Semantics for Non-Verbal Signs.

BibTeX

@misc{Luecking:2013:d,
  author    = {Lücking, Andy},
  title     = {Eclectic Semantics for Non-Verbal Signs},
  howpublished = {Talk at the Conference on Investigating semantics:
                   Empirical and philosophical approaches, Bochum},
  month     = {10},
  url       = {http://www.ruhr-uni-bochum.de/phil-lang/investigating/index.html},
  year      = {2013}
}

Andy Lücking. December, 2013. Multimodal Propositions? From Semiotic to Semantic Considerations in the Case of Gestural Deictics. Poster Abstracts of the Proceedings of the 17th Workshop on the Semantics and Pragmatics of Dialogue, 221–223.

BibTeX

@inproceedings{Luecking:2013:e,
  author    = {Lücking, Andy},
  title     = {Multimodal Propositions? From Semiotic to Semantic Considerations
               in the Case of Gestural Deictics},
  booktitle = {Poster Abstracts of the Proceedings of the 17th Workshop on the
               Semantics and Pragmatics of Dialogue},
  editor    = {Fernandez, Raquel and Isard, Amy},
  series    = {SemDial 2013},
  pages     = {221-223},
  address   = {Amsterdam},
  month     = {12},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dialdam2013.pdf},
  year      = {2013}
}

Md. Zahurul Islam and Armin Hoenen. 2013. Source and Translation Classifiction using Most Frequent Words. Proceedings of the 6th International Joint Conference on Natural Language Processing (IJCNLP).

BibTeX

@inproceedings{Islam:Hoenen:2013,
  author    = {Islam, Md. Zahurul and Hoenen, Armin},
  title     = {Source and Translation Classifiction using Most Frequent Words},
  booktitle = {Proceedings of the 6th International Joint Conference on Natural
               Language Processing (IJCNLP)},
  abstract  = {Recently, translation scholars have made some general claims about
               translation properties. Some of these are source language independent
               while others are not. Koppel and Ordan (2011) performed empirical
               studies to validate both types of properties using English source
               texts and other texts translated into English. Obviously, corpora
               of this sort, which focus on a single language, are not adequate
               for claiming universality of translation prop- erties. In this
               paper, we are validating both types of translation properties
               using original and translated texts from six European languages.},
  pdf       = {http://www.aclweb.org/anthology/I/I13/I13-1185.pdf},
  website   = {http://aclanthology.info/papers/source-and-translation-classification-using-most-frequent-words},
  year      = {2013}
}

Andy Lücking and Alexander Mehler. 2013. On Three Notions of Grounding of Artificial Dialog Companions. Science, Technology & Innovation Studies, 10(1):31–36.

BibTeX

@article{Luecking:Mehler:2013:a,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {On Three Notions of Grounding of Artificial Dialog Companions},
  journal   = {Science, Technology \& Innovation Studies},
  volume    = {10},
  number    = {1},
  pages     = {31-36},
  abstract  = {We provide a new, theoretically motivated evaluation grid for
               assessing the conversational achievements of Artificial Dialog
               Companions (ADCs). The grid is spanned along three grounding problems.
               Firstly, it is argued that symbol grounding in general has to
               be instrinsic. Current approaches in this context, however, are
               limited to a certain kind of expression that can be grounded in
               this way. Secondly, we identify three requirements for conversational
               grounding, the process leading to mutual understanding. Finally,
               we sketch a test case for symbol grounding in the form of the
               philosophical grounding problem that involves the use of modal
               language. Together, the three grounding problems provide a grid
               that allows us to assess ADCs’ dialogical performances and to
               pinpoint future developments on these grounds.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/STI-final-badge.pdf},
  website   = {http://www.sti-studies.de/ojs/index.php/sti/article/view/143},
  year      = {2013}
}

Barbara Frank-Job, Alexander Mehler and Tilmann Sutter, eds. 2013. Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden und empirische Untersuchungen an Beispielen des WWW. Springer VS.

BibTeX

@book{FrankJob:Mehler:Sutter:2013,
  editor    = {Barbara Frank-Job and Alexander Mehler and Tilmann Sutter},
  title     = {Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden
               und empirische Untersuchungen an Beispielen des WWW},
  publisher = {Springer VS},
  address   = {Wiesbaden},
  abstract  = {In diesem Band pr{\"a}sentieren Medien- und Informationswissenschaftler,
               Netzwerkforscher aus Informatik, Texttechnologie und Physik, Soziologen
               und Linguisten interdisziplin{\"a}r Aspekte der Erforschung komplexer
               Mehrebenen-Netzwerke. Im Zentrum ihres Interesses stehen Untersuchungen
               zum Zusammenhang zwischen sozialen und sprachlichen Netzwerken
               und ihrer Dynamiken, aufgezeigt an empirischen Beispielen aus
               dem Bereich des Web 2.0, aber auch an historischen Dokumentenkorpora
               sowie an Rezeptions-Netzwerken aus Kunst- und Literaturwissenschaft.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/DieDynamikSozialerUndSprachlicherNetzwerke.jpg},
  pagetotal = {240},
  year      = {2013}
}

Andy Lücking. 2013. Interfacing Speech and Co-Verbal Gesture: Exemplification. Proceedings of the 35th Annual Conference of the German Linguistic Society, 284–286.

BibTeX

@inproceedings{Luecking:2013:b,
  author    = {Lücking, Andy},
  title     = {Interfacing Speech and Co-Verbal Gesture: Exemplification},
  booktitle = {Proceedings of the 35th Annual Conference of the German Linguistic Society},
  series    = {DGfS 2013},
  pages     = {284-286},
  address   = {Potsdam, Germany},
  year      = {2013}
}

Andy Lücking. 2013. Ikonische Gesten. Grundzüge einer linguistischen Theorie. De Gruyter. Zugl. Diss. Univ. Bielefeld (2011).

BibTeX

@book{Luecking:2013,
  author    = {Lücking, Andy},
  title     = {Ikonische Gesten. Grundzüge einer linguistischen Theorie},
  publisher = {De Gruyter},
  address   = {Berlin and Boston},
  note      = {Zugl. Diss. Univ. Bielefeld (2011)},
  abstract  = {Nicht-verbale Zeichen, insbesondere sprachbegleitende Gesten,
               spielen eine herausragende Rolle in der menschlichen Kommunikation.
               Um eine Analyse von Gestik innerhalb derjenigen Disziplinen, die
               sich mit der Erforschung und Modellierung von Dialogen besch{\"a}ftigen,
               zu ermöglichen, bedarf es einer entsprechenden linguistischen
               Rahmentheorie. „Ikonische Gesten“ bietet einen ersten zeichen-
               und wahrnehmungstheoretisch motivierten Rahmen an, in dem eine
               grammatische Analyse der Integration von Sprache und Gestik möglich
               ist. Ausgehend von einem Abriss semiotischer Zug{\"a}nge zu ikonischen
               Zeichen wird der vorherrschende {\"A}hnlichkeitsansatz unter Rückgriff
               auf Wahrnehmungstheorien zugunsten eines Exemplifikationsansatzes
               verworfen. Exemplifikation wird im Rahmen einer unifikationsbasierten
               Grammatik umgesetzt. Dort werden u.a. multimodale Wohlgeformtheit,
               Synchronie und multimodale Subkategorisierung als neue Gegenst{\"a}nde
               linguistischer Forschung eingeführt und im Rahmen einer integrativen
               Analyse von Sprache und Gestik modelliert.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/ikonischeGesten.jpg},
  year      = {2013}
}

Md. Zahurul Islam and Alexander Mehler. 2013. Automatic Readability Classification of Crowd-Sourced Data based on Linguistic and Information-Theoretic Features. 14th International Conference on Intelligent Text Processing and Computational Linguistics.

BibTeX

@inproceedings{Islam:Mehler:2013:a,
  author    = {Islam, Md. Zahurul and Mehler, Alexander},
  title     = {Automatic Readability Classification of Crowd-Sourced Data based
               on Linguistic and Information-Theoretic Features},
  booktitle = {14th International Conference on Intelligent Text Processing and
               Computational Linguistics},
  abstract  = {This paper presents a classifier of text readability based on
               information-theoretic features. The classifier was developed based
               on a linguistic approach to readability that explores lexical,
               syntactic and semantic features. For this evaluation we extracted
               a corpus of 645 articles from Wikipedia together with their quality
               judgments. We show that information-theoretic features perform
               as well as their linguistic counterparts even if we explore several
               linguistic levels at once.},
  owner     = {zahurul},
  pdf       = {http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/download/1516/1497},
  timestamp = {2013.01.22},
  website   = {http://www.redalyc.org/articulo.oa?id=61527437002},
  year      = {2013}
}

Md. Zahurul Islam and Rashedur Rahman. 2013. English to Bangla Name Transliteration System (Abstract). The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013).

BibTeX

@inproceedings{Islam:Rahman:2013,
  author    = {Islam, Md. Zahurul and Rahman, Rashedur},
  title     = {English to Bangla Name Transliteration System (Abstract)},
  booktitle = {The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013)},
  abstract  = {Machine translation systems always struggle transliterating names
               and unknown words during the translation process. It becomes more
               problematic when the source and the target language use different
               scripts for writing. To handle this problem, transliteration systems
               are becoming popular as additional modules of the MT systems.
               In this abstract, we are presenting an English to Bangla name
               transliteration system that outperforms Google’s transliteration
               system. The transliteration system is the same as the phrase based
               statistical machine translation system, but it works on character
               level rather than on phrase level. The performance of a statistical
               system is directly correlated with the size of the training corpus.
               In this work, 2200 names are extracted from the Wikipedia cross
               lingual links and from Geonames . Also 3694 names are manually
               transliterated and added to the data. 4716 names are used for
               training, 590 for tuning and 588 names are used for testing. If
               we consider only the candidate transliterations, the system gives
               64.28\% accuracy. The performance increases to more than 90\%,
               if we consider only the top 5 transliterations. To compare with
               the Google’s English to Bangla transliteration system, a list
               of 100 names are randomly selected from the test data and translated
               by both systems. Our system gives 63\% accuracy where the Google’s
               transliteration system does not transliterate a single name correctly.
               We have found significant improvement in terms of BLUE and TER
               score when we add the transliteration module with an English to
               Bangla machine transliteration system.},
  owner     = {zahurul},
  timestamp = {2013.01.22},
  website   = {https://www.academia.edu/3955036/English_to_Bangla_Name_Transliteration_System},
  year      = {2013}
}

Alexander Mehler, Christian Stegbauer and Rüdiger Gleim. 2013. Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation am Beispiel des GuttenPlag Wiki: eine Vorstudie. Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden und empirische Untersuchungen am Beispiel des WWW.

BibTeX

@incollection{Mehler:Stegbauer:Gleim:2013,
  author    = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
  title     = {Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation
               am Beispiel des GuttenPlag Wiki: eine Vorstudie},
  booktitle = {Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden
               und empirische Untersuchungen am Beispiel des WWW},
  publisher = {VS Verlag},
  editor    = {Frank-Job, Barbara and Mehler, Alexander and Sutter, Tilman},
  address   = {Wiesbaden},
  year      = {2013}
}

Andy Lücking, Kirsten Bergman, Florian Hahn, Stefan Kopp and Hannes Rieser. 2013. Data-based Analysis of Speech and Gesture: The Bielefeld Speech and Gesture Alignment Corpus (SaGA) and its Applications. Journal of Multimodal User Interfaces, 7(1-2):5–18.

BibTeX

@article{Luecking:Bergmann:Hahn:Kopp:Rieser:2012,
  author    = {Lücking, Andy and Bergman, Kirsten and Hahn, Florian and Kopp, Stefan
               and Rieser, Hannes},
  title     = {Data-based Analysis of Speech and Gesture: The Bielefeld Speech
               and Gesture Alignment Corpus (SaGA) and its Applications},
  journal   = {Journal of Multimodal User Interfaces},
  volume    = {7},
  number    = {1-2},
  pages     = {5-18},
  abstract  = {Communicating face-to-face, interlocutors frequently produce multimodal
               meaning packages consisting of speech and accompanying gestures.
               We discuss a systematically annotated speech and gesture corpus
               consisting of 25 route-and-landmark-description dialogues, the
               Bielefeld Speech and Gesture Alignment corpus (SaGA), collected
               in experimental face-to-face settings. We first describe the primary
               and secondary data of the corpus and its reliability assessment.
               Then we go into some of the projects carried out using SaGA demonstrating
               the wide range of its usability: on the empirical side, there
               is work on gesture typology, individual and contextual parameters
               influencing gesture production and gestures’ functions for dialogue
               structure. Speech-gesture interfaces have been established extending
               unification-based grammars. In addition, the development of a
               computational model of speech-gesture alignment and its implementation
               constitutes a research line we focus on.},
  doi       = {10.1007/s12193-012-0106-8},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/MMUI-SaGA-revision2.pdf},
  website   = {http://www.springerlink.com/content/a547448u86h3116x/?MUD=MP},
  year      = {2013}
}

Nicole Beckage, Michael S. Vitevitch, Alexander Mehler and Eliana Colunga. 2013. Using Complex Network Analysis in the Cognitive Sciences. Proceedings of the 35th Annual Meeting of the Cognitive Science Society, CogSci 2013, Berlin, Germany, July 31 - August 3, 2013.

BibTeX

@inproceedings{Beckage:et:al:2013,
  author    = {Nicole Beckage and Michael S. Vitevitch and Alexander Mehler and Eliana Colunga},
  title     = {Using Complex Network Analysis in the Cognitive Sciences},
  booktitle = {Proceedings of the 35th Annual Meeting of the Cognitive Science
               Society, CogSci 2013, Berlin, Germany, July 31 - August 3, 2013},
  editor    = {Markus Knauff and Michael Pauen and Natalie Sebanz and Ipke Wachsmuth},
  publisher = {cognitivesciencesociety.org},
  year      = {2013}
}

2012

Alexander Mehler and Laurent Romary. 2012. Handbook of Technical Communication. De Gruyter Mouton.

BibTeX

@book{Mehler:Romary:2012,
  author    = {Mehler, Alexander and Romary, Laurent},
  title     = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  address   = {Berlin},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/HandbookTechnicalCommunication.jpg},
  pagetotal = {839},
  year      = {2012}
}

Olga Abramov. 2012. PhD Thesis: Network theory applied to linguistics: new advances in language classification and typology.

BibTeX

@phdthesis{Abramov:2012,
  author    = {Abramov, Olga},
  title     = {Network theory applied to linguistics: new advances in language
               classification and typology},
  school    = {Bielefeld University, Germany},
  abstract  = {This thesis bridges between two scientific fields -- linguistics
               and computer science -- in terms of Linguistic Networks. From
               the linguistic point of view we examine whether languages can
               be distinguished when looking at network topology of different
               linguistic networks. We deal with up to 17 languages and ask how
               far the methods of network theory reveal the peculiarities of
               single languages. We present and apply network models from different
               levels of linguistic representation: syntactic, phonological and
               morphological. The network models presented here allow to integrate
               various linguistic features at once, which enables a more abstract,
               holistic view at the particular language. From the point of view
               of computer science we elaborate the instrumentarium of network
               theory applying it to a new field. We study the expressiveness
               of different network features and their ability to characterize
               language structure. We evaluate the interplay of these features
               and their goodness in the task of classifying languages genealogically.
               Among others we compare network features related to: average degree,
               average geodesic distance, clustering, entropy-based indices,
               assortativity, centrality, compactness etc. We also propose some
               new indices that can serve as additional characteristics of networks.
               The results obtained show that network models succeed in classifying
               related languages, and allow to study language structure in general.
               The mathematical analysis of the particular network indices brings
               new insights into the nature of these indices and their potential
               when applied to different networks.},
  pdf       = {https://pub.uni-bielefeld.de/download/2538828/2542368},
  website   = {http://pub.uni-bielefeld.de/publication/2538828},
  year      = {2012}
}

Armin Hoenen. 2012. Measuring Repetitiveness in Texts, a Preliminary Investigation. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 36(2):93–104.

BibTeX

@article{Hoenen:2012:a,
  author    = {Hoenen, Armin},
  title     = {Measuring Repetitiveness in Texts, a Preliminary Investigation},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {36},
  number    = {2},
  pages     = {93-104},
  abstract  = {In this paper, a model is presented for the automatic measurement
               that can systematically describe the usage and function of the
               phenomenon of repetition in written text. The motivating hypothesis
               for this study is that the more repetitive a text is, the easier
               it is to memorize. Therefore, an automated measurement index can
               provide feedback to writers and for those who design texts that
               are often memorized including songs, holy texts, theatrical plays,
               and advertising slogans. The potential benefits of this kind of
               systematic feedback are numerous, the main one being that content
               creators would be able to employ a standard threshold of memorizability.
               This study explores multiple ways of implementing and calculating
               repetitiveness across levels of analysis (such as paragraph-level
               or sub-word level) genres (such as songs, holy texts, and other
               genres) and languages, integrating these into the a model for
               the automatic measurement of repetitiveness. The Avestan language
               and some of its idiosyncratic features are explored in order to
               illuminate how the proposed index is applied in the ranking of
               texts according to their repetitiveness.},
  website   = {http://www.linse.uni-due.de/jahrgang-36-2012/articles/measuring-repetitiveness-in-texts-a-preliminary-investigation.html},
  year      = {2012}
}

Steffen Eger. 2012. The Combinatorics of String Alignments: Reconsidering the Problem.. Journal of Quantitative Linguistics, 19(1):32–53.

BibTeX

@article{Eger:2012:a,
  author    = {Eger, Steffen},
  title     = {The Combinatorics of String Alignments: Reconsidering the Problem.},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {19},
  number    = {1},
  pages     = {32-53},
  abstract  = {In recent work, Covington discusses the number of alignments of
               two strings. Thereby, Covington defines an alignment as “a way
               of pairing up elements of two strings, optionally skipping some
               but preserving the order”. This definition has drawbacks as it
               excludes many relevant situations. In this work, we specify the
               notion of an alignment so that many linguistically interesting
               situations are covered. To this end, we define an alignment in
               an abstract manner as a set of pairs and then define three properties
               on such sets. Secondly, we specify the numbers of possibilities
               of aligning two strings in each case.},
  website   = {
                   http://www.tandfonline.com/doi/full/10.1080/09296174.2011.638792#tabModule},
  year      = {2012}
}

Steffen Eger. 2012. S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications. Proceedings of COLING 2012, 781–798.

BibTeX

@inproceedings{Eger:2012:b,
  author    = {Eger, Steffen},
  title     = {S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications},
  booktitle = {Proceedings of COLING 2012},
  pages     = {781-798},
  address   = {Mumbai, India},
  publisher = {The COLING 2012 Organizing Committee},
  abstract  = {We present a simple and straightforward alignment algorithm for
               monotone many-to-many alignments in grapheme-to-phoneme conversion
               and related fields such as morphology, and discuss a few noteworthy
               extensions. Moreover, we specify combinatorial formulas for monotone
               many-to-many alignments and decoding in G2P which indicate that
               exhaustive enumeration is generally possible, so that some limitations
               of our approach can easily be overcome. Finally, we present a
               decoding scheme, within the monotone many-to-many alignment paradigm,
               that relates the decoding problem to restricted integer compositions
               and that is, putatively, superior to alternatives suggested in
               the literatur},
  pdf       = {http://aclweb.org/anthology/C/C12/C12-1048.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.370.5941},
  year      = {2012}
}

Steffen Eger. 2012. Lexical semantic typologies from bilingual corpora - A framework. SEM 2012: The First Joint Conference on Lexical and Computational Semantics – Volume 1: Proceedings of the main conference and the shared task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation (SemEval 2012), 90–94.

BibTeX

@inproceedings{Eger:2012:c,
  author    = {Eger, Steffen},
  title     = {Lexical semantic typologies from bilingual corpora - A framework},
  booktitle = {SEM 2012: The First Joint Conference on Lexical and Computational
               Semantics -- Volume 1: Proceedings of the main conference and
               the shared task, and Volume 2: Proceedings of the Sixth International
               Workshop on Semantic Evaluation (SemEval 2012)},
  pages     = {90-94},
  address   = {Montreal, Canada},
  publisher = {Association for Computational Linguistics},
  abstract  = {We present a framework, based on Sejane and Eger (2012), for inducing
               lexical semantic typologies for groups of languages. Our framework
               rests on lexical semantic association networks derived from encoding,
               via bilingual corpora, each language in a common reference language,
               the tertium comparationis, so that distances between languages
               can easily be determined.},
  pdf       = {http://www.aclweb.org/anthology/S12-1015},
  website   = {http://dl.acm.org/citation.cfm?id=2387653},
  year      = {2012}
}

Alexander Mehler, Christian Stegbauer and Rüdiger Gleim. July, 2012. Latent Barriers in Wiki-based Collaborative Writing. Proceedings of the Wikipedia Academy: Research and Free Knowledge. June 29 - July 1 2012.

BibTeX

@inproceedings{Mehler:Stegbauer:Gleim:2012:b,
  author    = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
  title     = {Latent Barriers in Wiki-based Collaborative Writing},
  booktitle = {Proceedings of the Wikipedia Academy: Research and Free Knowledge.
               June 29 - July 1 2012},
  address   = {Berlin},
  month     = {July},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/12_Paper_Alexander_Mehler_Christian_Stegbauer_Ruediger_Gleim.pdf},
  year      = {2012}
}

Armin Hoenen and Thomas Jügel. 2012. Altüberlieferte Sprachen als Gegenstand der Texttechnologie – Ancient Languages as the Object of Text Technology. Ed. by Armin Hoenen and Thomas Jügel. 27(2). JLCL.

BibTeX

@book{Hoenen:Jügel:2012,
  author    = {Hoenen, Armin and Jügel, Thomas},
  editor    = {Armin Hoenen and Thomas Jügel},
  title     = {Altüberlieferte Sprachen als Gegenstand der Texttechnologie --
               Ancient Languages as the Object of Text Technology},
  publisher = {JLCL},
  volume    = {27},
  number    = {2},
  abstract  = {‘Avestan’ is the name of the ritual language of Zor oastrianism,
               which was the state religion of the Iranian empire in Achaemenid,
               Arsacid and Sasanid times, covering a time span of more than 1200
               years. [1] It is named after the ‘Avesta’, i.e., the collection
               of holy scriptures that form the basis of the religion which was
               allegedly founded by Zarathushtra, also known as Zoroaster, by
               about the beginning of the first millennium B.C. Together with
               Vedic Sanskrit, Avestan represents one of the most archaic witnesses
               of the Indo-Iranian branch of the Indo-European languages, which
               makes it especially interesting for historical-comparative linguistics.
               This is why the texts of the Avesta were among the first objects
               of electronic corpus building that were undertaken in the framework
               of Indo-European studies, leading to the establishment of the
               TITUS database (‘Thesaurus indogermanischer Text- u nd Sprachmaterialien’).
               [2] Today, the complete Avestan corpus is available, together
               with elaborate search functions [3] and an extended version of
               the subcorpus of the so-called ‘Yasna’, which covers a great deal
               of the attestation of variant readings. [4] Right from the beginning
               of their computational work concerning the Avesta, the compilers
               [5] had to cope with the fact that the texts contained in it have
               been transmitted in a special script written from right to left,
               which was also used for printing them in the scholarly editions
               used until today. [6] It goes without saying that there was no
               way in the middle of the 1980s to encode the Avestan scriptures
               exactly as they are found in the manuscripts. Instead, we had
               to rely upon transcriptional devices that were dictated by the
               restrictions of character encoding as provided by the computer
               systems used. As the problems we had to face in this respect and
               the solutions we could apply are typical for the development of
               computational work on ancient languages, it seems worthwhile to
               sketch them out here.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AltueberlieferteSprachen-300-20.png},
  issn      = {2190-6858},
  pdf       = {http://www.jlcl.org/2012_Heft2/H2012-2.pdf},
  year      = {2012}
}

Tim vor der Brück. 2012. Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen semantischen Repräsentationen. Springer.

BibTeX

@book{vor:der:Brueck:2012:a,
  author    = {vor der Brück, Tim},
  title     = {Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen
               semantischen Repr{\"a}sentationen},
  publisher = {Springer},
  address   = {Heidelberg, Germany},
  abstract  = {Eine gro{\ss}e Wissensbasis ist eine Voraussetzung für eine Vielzahl
               von Anwendungen im Bereich der automatischen Sprachverarbeitung,
               wie Frage-Antwort- oder Information-Retrieval-Systeme. Ein Mensch
               hat sich das erforderliche Wissen, um Informationen zu suchen
               oder Fragen zu beantworten, im Laufe seines Lebens angeeignet.
               Einem Computer muss dieses Wissen explizit mitgeteilt werden.
               Tim vor der Brück beschreibt einen Ansatz, wie ein Computer dieses
               Wissen {\"a}hnlich wie ein Mensch durch die Lektüre von Texten
               erwerben kann. Dabei kommen Methoden der Logik und des maschinellen
               Lernens zum Einsatz.},
  school    = {FernUniversit{\"a}t in Hagen},
  year      = {2012}
}

Tim vor der Brück and Yu-Fang Wang. 2012. Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods. Proceedings of the 20th European Conference on Artificial Intelligence (ECAI), 822–827.

BibTeX

@inproceedings{vor:der:Brueck:Wang:2012,
  author    = {vor der Brück, Tim and Wang, Yu-Fang},
  title     = {Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods},
  booktitle = {Proceedings of the 20th European Conference on Artificial Intelligence (ECAI)},
  pages     = {822--827},
  address   = {Montpellier, France},
  abstract  = {Synonyms are a highly relevant information source for natural
               language processing. Automatic synonym extraction methods have
               in common that they are either applied on the surface representation
               of the text or on a syntactical structure derived from it. In
               this paper, however, we present a semantic synonym extraction
               approach that operates directly on semantic networks (SNs), which
               were derived from text by a deep syntactico-semantic analysis.
               Synonymy hypotheses are extracted from the SNs by graph matching.
               These hypotheses are then validated by a support vector machine
               (SVM) employing a combined graph and string kernel. Our method
               was compared to several other approaches and the evaluation has
               shown that our results are considerably superior},
  pdf       = {http://www.vdb1.de/papers/ECAI_535.pdf},
  website   = {http://ebooks.iospress.nl/publication/7076},
  year      = {2012}
}

Tim vor der Brück. 2012. Hyponym Extraction Employing a Weighted Graph Kernel. Statistical and Machine Learning Approaches for Network Analysis.

BibTeX

@incollection{vor:der:Brueck:2012:b,
  author    = {vor der Brück, Tim},
  title     = {Hyponym Extraction Employing a Weighted Graph Kernel},
  booktitle = {Statistical and Machine Learning Approaches for Network Analysis},
  publisher = {Wiley},
  editor    = {Matthias Dehmer and Subhash C. Basak},
  address   = {Hoboken, New Jersey},
  year      = {2012}
}

Md. Zahurul Islam, Alexander Mehler and Rashedur Rahman. 2012. Text Readability Classification of Textbooks of a Low-Resource Language. Accepted in the 26th Pacific Asia Conference on Language, Information, and Computation (PACLIC 26).

BibTeX

@inproceedings{Islam:Mehler:Rahman:2012,
  author    = {Islam, Md. Zahurul and Mehler, Alexander and Rahman, Rashedur},
  title     = {Text Readability Classification of Textbooks of a Low-Resource Language},
  booktitle = {Accepted in the 26th Pacific Asia Conference on Language, Information,
               and Computation (PACLIC 26)},
  abstract  = {There are many languages considered to be low-density languages,
               either because the population speaking the language is not very
               large, or because insufficient digitized text material is available
               in the language even though millions of people speak the language.
               Bangla is one of the latter ones. Readability classification is
               an important Natural Language Processing (NLP) application that
               can be used to judge the quality of documents and assist writers
               to locate possible problems. This paper presents a readability
               classifier of Bangla textbook documents based on information-theoretic
               and lexical features. The features proposed in this paper result
               in an F-score that is 50\% higher than that for traditional readability
               formulas.},
  owner     = {zahurul},
  pdf       = {http://www.aclweb.org/anthology/Y12-1059},
  timestamp = {2012.08.14},
  website   = {http://www.researchgate.net/publication/256648250_Text_Readability_Classification_of_Textbooks_of_a_Low-Resource_Language},
  year      = {2012}
}

Alexander Mehler, Laurent Romary and Dafydd Gibbon. 2012. Introduction: Framing Technical Communication. Handbook of Technical Communication, 8:1–26.

BibTeX

@incollection{Mehler:Romary:Gibbon:2012,
  author    = {Mehler, Alexander and Romary, Laurent and Gibbon, Dafydd},
  title     = {Introduction: Framing Technical Communication},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  pages     = {1-26},
  address   = {Berlin and Boston},
  year      = {2012}
}

Alexander Mehler and Andy Lücking. 2012. Pathways of Alignment between Gesture and Speech: Assessing Information Transmission in Multimodal Ensembles. Proceedings of the International Workshop on Formal and Computational Approaches to Multimodal Communication under the auspices of ESSLLI 2012, Opole, Poland, 6-10 August.

BibTeX

@inproceedings{Mehler:Luecking:2012:d,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {Pathways of Alignment between Gesture and Speech: Assessing Information
               Transmission in Multimodal Ensembles},
  booktitle = {Proceedings of the International Workshop on Formal and Computational
               Approaches to Multimodal Communication under the auspices of ESSLLI
               2012, Opole, Poland, 6-10 August},
  editor    = {Gianluca Giorgolo and Katya Alahverdzhieva},
  abstract  = {We present an empirical account of multimodal ensembles based
               on Hjelmslev’s notion of selection. This is done to get measurable
               evidence for the existence of speech-and-gesture ensembles. Utilizing
               information theory, we show that there is an information transmission
               that makes a gestures’ representation technique predictable when
               merely knowing its lexical affiliate – in line with the notion
               of the primacy of language. Thus, there is evidence for a one-way
               coupling – going from words to gestures – that leads to speech-and-gesture
               alignment and underlies the constitution of multimodal ensembles.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/06/Mehler_Luecking_FoCoMC2012-2.pdf},
  website   = {http://www.researchgate.net/publication/268368670_Pathways_of_Alignment_between_Gesture_and_Speech_Assessing_Information_Transmission_in_Multimodal_Ensembles},
  year      = {2012}
}

Andy Lücking. 2012. Towards a Conceptual, Unification-based Speech-Gesture Interface. Proceedings of the International Workshop on Formal and Computational Approaches to Multimodal Communication under the auspices of ESSLLI 2012, Opole, Poland, 6-10 August.

BibTeX

@inproceedings{Luecking:2012,
  author    = {Lücking, Andy},
  title     = {Towards a Conceptual, Unification-based Speech-Gesture Interface},
  booktitle = {Proceedings of the International Workshop on Formal and Computational
               Approaches to Multimodal Communication under the auspices of ESSLLI
               2012, Opole, Poland, 6-10 August},
  editor    = {Gianluca Giorgolo and Katya Alahverdzhieva},
  abstract  = {A framework for grounding the semantics of co-verbal iconic gestures
               is presented. A resemblance account to iconicity is discarded
               in favor of an exemplification approach. It is sketched how exemplification
               can be captured within a unification-based grammar that provides
               a conceptual interface. Gestures modeled as vector sequences are
               the exemplificational base. Some hypotheses that follow from the
               general account are pointed at and remaining challenges are discussed.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/FoCoMoC2012-1.pdf},
  year      = {2012}
}

Alexander Mehler and Andy Lücking. 2012. WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis. Proceedings of the International Workshop On User Experience in e-Learning and Augmented Technologies in Education (UXeLATE 2012) in Conjunction with ACM Multimedia 2012, 29 October- 2 November, Nara, Japan, 7–12.

BibTeX

@inproceedings{Mehler:Luecking:2012:c,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis},
  booktitle = {Proceedings of the International Workshop On User Experience in
               e-Learning and Augmented Technologies in Education (UXeLATE 2012)
               in Conjunction with ACM Multimedia 2012, 29 October- 2 November,
               Nara, Japan},
  pages     = {7-12},
  abstract  = {We introduce WikiNect as a kinetic museum information system that
               allows museum visitors to give on-site feedback about exhibitions.
               To this end, WikiNect integrates three approaches to Human-Computer
               Interaction (HCI): games with a purpose, wiki-based collaborative
               writing and kinetic text-technologies. Our aim is to develop kinetic
               technologies as a new paradigm of HCI. They dispense with classical
               interfaces (e.g., keyboards) in that they build on non-contact
               modes of communication like gestures or facial expressions as
               input displays. In this paper, we introduce the notion of gestural
               writing as a kinetic text-technology that underlies WikiNect to
               enable museum visitors to communicate their feedback. The basic
               idea is to explore sequences of gestures that share the semantic
               expressivity of verbally manifested speech acts. Our task is to
               identify such gestures that are learnable on-site in the usage
               scenario of WikiNect. This is done by referring to so-called transient
               gestures as part of multimodal ensembles, which are candidate
               gestures of the desired functionality.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/UXeLATE2012-copyright.pdf},
  website   = {http://www.researchgate.net/publication/262319200_WikiNect_towards_a_gestural_writing_system_for_kinetic_museum_wikis},
  year      = {2012}
}

Rüdiger Gleim, Alexander Mehler and Alexandra Ernst. 2012. SOA implementation of the eHumanities Desktop. Proceedings of the Workshop on Service-oriented Architectures (SOAs) for the Humanities: Solutions and Impacts, Digital Humanities 2012, Hamburg, Germany.

BibTeX

@inproceedings{Gleim:Mehler:Ernst:2012,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Ernst, Alexandra},
  title     = {SOA implementation of the eHumanities Desktop},
  booktitle = {Proceedings of the Workshop on Service-oriented Architectures
               (SOAs) for the Humanities: Solutions and Impacts, Digital Humanities
               2012, Hamburg, Germany},
  abstract  = {The eHumanities Desktop is a system which allows users to upload,
               organize and share resources using a web interface. Furthermore
               resources can be processed, annotated and analyzed in various
               ways. Registered users can organize themselves in groups and collaboratively
               work on their data. The eHumanities Desktop is platform independent
               and runs in a web browser. This paper presents the system focusing
               on its service orientation and process management.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dhc2012.pdf},
  year      = {2012}
}

Alexander Mehler and Christian Stegbauer. 2012. On the Self-similarity of Intertextual Structures in Wikipedia. Proceedings of the HotSocial '12: The First ACM International Workshop on Hot Topics on Interdisciplinary Social Networks Research, 65–68.

BibTeX

@inproceedings{Mehler:Stegbauer:2012,
  author    = {Mehler, Alexander and Stegbauer, Christian},
  title     = {On the Self-similarity of Intertextual Structures in Wikipedia},
  booktitle = {Proceedings of the HotSocial '12: The First ACM International
               Workshop on Hot Topics on Interdisciplinary Social Networks Research},
  editor    = {Xiaoming Fu and Peter Gloor and Jie Tang},
  pages     = {65-68},
  address   = {Beijing, China},
  pdf       = {http://wan.poly.edu/KDD2012/forms/workshop/HotSocial12/doc/p64_mehler.pdf},
  website   = {http://dl.acm.org/citation.cfm?id=2392633&bnc=1},
  year      = {2012}
}

Alexander Mehler, Silke Schwandt, Rüdiger Gleim and Alexandra Ernst. 2012. Inducing Linguistic Networks from Historical Corpora: Towards a New Method in Historical Semantics. Proceedings of the Conference on New Methods in Historical Corpora, 3:257–274.

BibTeX

@incollection{Mehler:Schwandt:Gleim:Ernst:2012,
  author    = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Ernst, Alexandra},
  title     = {Inducing Linguistic Networks from Historical Corpora: Towards
               a New Method in Historical Semantics},
  booktitle = {Proceedings of the Conference on New Methods in Historical Corpora},
  publisher = {Narr},
  editor    = {Paul Bennett and Martin Durrell and Silke Scheible and Richard J. Whitt},
  volume    = {3},
  series    = {Corpus linguistics and Interdisciplinary perspectives
                   on language (CLIP)},
  pages     = {257--274},
  address   = {Tübingen},
  year      = {2012}
}

Andy Lücking, Sebastian Ptock and Kirsten Bergmann. 2012. Assessing Agreement on Segmentations by Means of Staccato, the Segmentation Agreement Calculator according to Thomann. Gesture and Sign Language in Human-Computer Interaction and Embodied Communication, 7206:129–138.

BibTeX

@incollection{Luecking:Ptock:Bergmann:2012,
  author    = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
  title     = {Assessing Agreement on Segmentations by Means of Staccato, the
               Segmentation Agreement Calculator according to Thomann},
  booktitle = {Gesture and Sign Language in Human-Computer Interaction and Embodied
               Communication},
  publisher = {Springer},
  editor    = {Eleni Efthimiou and Georgios Kouroupetroglou and Stavroula-Evita Fotina},
  volume    = {7206},
  series    = {Lecture Notes in Artificial Intelligence},
  pages     = {129-138},
  address   = {Berlin and Heidelberg},
  abstract  = {Staccato, the Segmentation Agreement Calculator According to Thomann
               , is a software tool for assessing the degree of agreement of
               multiple segmentations of some time-related data (e.g., gesture
               phases or sign language constituents). The software implements
               an assessment procedure developed by Bruno Thomann and will be
               made publicly available. The article discusses the rationale of
               the agreement assessment procedure and points at future extensions
               of Staccato.},
  booksubtitle = {9th International Gesture Workshop, GW 2011, Athens,
                   Greece, May 2011, Revised Selected Papers},
  website   = {http://link.springer.com/chapter/10.1007/978-3-642-34182-3_12},
  year      = {2012}
}

Alexander Mehler, Andy Lücking and Peter Menke. 2012. Assessing Cognitive Alignment in Different Types of Dialog by means of a Network Model. Neural Networks, 32:159–164.

BibTeX

@article{Mehler:Luecking:Menke:2012,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {Assessing Cognitive Alignment in Different Types of Dialog by
               means of a Network Model},
  journal   = {Neural Networks},
  volume    = {32},
  pages     = {159-164},
  abstract  = {We present a network model of dialog lexica, called TiTAN (Two-layer
               Time-Aligned Network) series. TiTAN series capture the formation
               and structure of dialog lexica in terms of serialized graph representations.
               The dynamic update of TiTAN series is driven by the dialog-inherent
               timing of turn-taking. The model provides a link between neural,
               connectionist underpinnings of dialog lexica on the one hand and
               observable symbolic behavior on the other. On the neural side,
               priming and spreading activation are modeled in terms of TiTAN
               networking. On the symbolic side, TiTAN series account for cognitive
               alignment in terms of the structural coupling of the linguistic
               representations of dialog partners. This structural stance allows
               us to apply TiTAN in machine learning of data of dialogical alignment.
               In previous studies, it has been shown that aligned dialogs can
               be distinguished from non-aligned ones by means of TiTAN -based
               modeling. Now, we simultaneously apply this model to two types
               of dialog: task-oriented, experimentally controlled dialogs on
               the one hand and more spontaneous, direction giving dialogs on
               the other. We ask whether it is possible to separate aligned dialogs
               from non-aligned ones in a type-crossing way. Starting from a
               recent experiment (Mehler, Lücking, \& Menke, 2011a), we show
               that such a type-crossing classification is indeed possible. This
               hints at a structural fingerprint left by alignment in networks
               of linguistic items that are routinely co-activated during conversation.},
  doi       = {10.1016/j.neunet.2012.02.013},
  website   = {http://www.sciencedirect.com/science/article/pii/S0893608012000421},
  year      = {2012}
}

Md. Zahurul Islam and Alexander Mehler. 2012. Customization of the Europarl Corpus for Translation Studies. Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC).

BibTeX

@inproceedings{Islam:Mehler:2012:a,
  author    = {Islam, Md. Zahurul and Mehler, Alexander},
  title     = {Customization of the Europarl Corpus for Translation Studies},
  booktitle = {Proceedings of the 8th International Conference on Language Resources
               and Evaluation (LREC)},
  abstract  = {Currently, the area of translation studies lacks corpora by which
               translation scholars can validate their theoretical claims, for
               example, regarding the scope of the characteristics of the translation
               relation. In this paper, we describe a customized resource in
               the area of translation studies that mainly addresses research
               on the properties of the translation relation. Our experimental
               results show that the Type-Token-Ratio (TTR) is not a universally
               valid indicator of the simplification of translation.},
  owner     = {zahurul},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/729_Paper.pdf},
  timestamp = {2012.02.02},
  year      = {2012}
}

Andy Lücking and Thies Pfeiffer. 2012. Framing Multimodal Technical Communication. With Focal Points in Speech-Gesture-Integration and Gaze Recognition. Handbook of Technical Communication, 8:591–644.

BibTeX

@incollection{Luecking:Pfeiffer:2012,
  author    = {Lücking, Andy and Pfeiffer, Thies},
  title     = {Framing Multimodal Technical Communication. With Focal Points
               in Speech-Gesture-Integration and Gaze Recognition},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  chapter   = {18},
  pages     = {591-644},
  website   = {http://www.degruyter.com/view/books/9783110224948/9783110224948.591/9783110224948.591.xml},
  year      = {2012}
}

Petra Kubina, Olga Abramov and Andy Lücking. 2012. Barrier-free Communication. Handbook of Technical Communication, 8:645–706.

BibTeX

@incollection{Kubina:Abramov:Luecking:2012,
  author    = {Kubina, Petra and Abramov, Olga and Lücking, Andy},
  title     = {Barrier-free Communication},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  chapter   = {19},
  pages     = {645-706},
  address   = {Berlin and Boston},
  editora   = {Dafydd Gibbon},
  editoratype = {collaborator},
  website   = {http://www.degruyter.com/view/books/9783110224948/9783110224948.645/9783110224948.645.xml},
  year      = {2012}
}

Andy Lücking and Alexander Mehler. 2012. What's the Scope of the Naming Game? Constraints on Semantic Categorization. Proceedings of the 9th International Conference on the Evolution of Language, 196–203.

BibTeX

@inproceedings{Luecking:Mehler:2012,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {What's the Scope of the Naming Game? Constraints on Semantic Categorization},
  booktitle = {Proceedings of the 9th International Conference on the Evolution of Language},
  pages     = {196-203},
  address   = {Kyoto, Japan},
  abstract  = {The Naming Game (NG) has become a vivid research paradigm for
               simulation studies on language evolution and the establishment
               of naming conventions. Recently, NGs were used for reconstructing
               the creation of linguistic categories, most notably for color
               terms. We recap the functional principle of NGs and the latter
               Categorization Games (CGs) and evaluate them in the light of semantic
               data of linguistic categorization outside the domain of colors.
               This comparison reveals two specifics of the CG paradigm: Firstly,
               the emerging categories draw basically on the predefined topology
               of the learning domain. Secondly, the kind of categories that
               can be learnt in CGs is bound to context-independent intersective
               categories. This suggests that the NG and the CG focus on a special
               aspect of natural language categorization, which disregards context-sensitive
               categories used in a non-compositional manner.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Evolang2012-AL_AM.pdf},
  url       = {http://kyoto.evolang.org/},
  website   = {https://www.researchgate.net/publication/267858061_WHAT'S_THE_SCOPE_OF_THE_NAMING_GAME_CONSTRAINTS_ON_SEMANTIC_CATEGORIZATION},
  year      = {2012}
}

Maria Sukhareva, Md. Zahurul Islam, Armin Hoenen and Alexander Mehler. 2012. A Three-step Model of Language Detection in Multilingual Ancient Texts. Proceedings of Workshop on Annotation of Corpora for Research in the Humanities.

BibTeX

@inproceedings{Sukhareva:Islam:Hoenen:Mehler:2012,
  author    = {Sukhareva, Maria and Islam, Md. Zahurul and Hoenen, Armin and Mehler, Alexander},
  title     = {A Three-step Model of Language Detection in Multilingual Ancient Texts},
  booktitle = {Proceedings of Workshop on Annotation of Corpora for Research in the Humanities},
  address   = {Heidelberg, Germany},
  abstract  = {Ancient corpora contain various multilingual patterns. This imposes
               numerous problems on their manual annotation and automatic processing.
               We introduce a lexicon building system, called Lexicon Expander,
               that has an integrated language detection module, Language Detection
               (LD) Toolkit. The Lexicon Expander post-processes the output of
               the LD Toolkit which leads to the improvement of f-score and accuracy
               values. Furthermore, the functionality of the Lexicon Expander
               also includes manual editing of lexical entries and automatic
               morphological expansion by means of a morphological grammar.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/sukhareva_islam_hoenen_mehler_2011.pdf},
  website   = {https://www.academia.edu/2236625/A_Three-step_Model_of_Language_Detection_in_Multilingual_Ancient_Texts},
  year      = {2012}
}

2011

Andy Lücking and Alexander Mehler. 2011. A Model of Complexity Levels of Meaning Constitution in Simulation Models of Language Evolution. International Journal of Signs and Semiotic Systems, 1(1):18–38.

BibTeX

@article{Luecking:Mehler:2011,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {A Model of Complexity Levels of Meaning Constitution in Simulation
               Models of Language Evolution},
  journal   = {International Journal of Signs and Semiotic Systems},
  volume    = {1},
  number    = {1},
  pages     = {18-38},
  abstract  = {Currently, some simulative accounts exist within dynamic or evolutionary
               frameworks that are concerned with the development of linguistic
               categories within a population of language users. Although these
               studies mostly emphasize that their models are abstract, the paradigm
               categorization domain is preferably that of colors. In this paper,
               the authors argue that color adjectives are special predicates
               in both linguistic and metaphysical terms: semantically, they
               are intersective predicates, metaphysically, color properties
               can be empirically reduced onto purely physical properties. The
               restriction of categorization simulations to the color paradigm
               systematically leads to ignoring two ubiquitous features of natural
               language predicates, namely relativity and context-dependency.
               Therefore, the models for simulation models of linguistic categories
               are not able to capture the formation of categories like perspective-dependent
               predicates ‘left’ and ‘right’, subsective predicates like ‘small’
               and ‘big’, or predicates that make reference to abstract objects
               like ‘I prefer this kind of situation’. The authors develop a
               three-dimensional grid of ascending complexity that is partitioned
               according to the semiotic triangle. They also develop a conceptual
               model in the form of a decision grid by means of which the complexity
               level of simulation models of linguistic categorization can be
               assessed in linguistic terms.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_article_IJSSS.pdf},
  year      = {2011}
}

Alexander Mehler, Olga Abramov and Nils Diewald. 2011. Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf Hypothesis in the Context of Wikipedia. Computer Speech and Language, 25(3):716–740.

BibTeX

@article{Mehler:Abramov:Diewald:2011:a,
  author    = {Mehler, Alexander and Abramov, Olga and Diewald, Nils},
  title     = {Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf
               Hypothesis in the Context of Wikipedia},
  journal   = {Computer Speech and Language},
  volume    = {25},
  number    = {3},
  pages     = {716-740},
  abstract  = {In this article, we test a variant of the Sapir-Whorf Hypothesis
               in the area of complex network theory. This is done by analyzing
               social ontologies as a new resource for automatic language classification.
               Our method is to solely explore structural features of social
               ontologies in order to predict family resemblances of languages
               used by the corresponding communities to build these ontologies.
               This approach is based on a reformulation of the Sapir-Whorf Hypothesis
               in terms of distributed cognition. Starting from a corpus of 160
               Wikipedia-based social ontologies, we test our variant of the
               Sapir-Whorf Hypothesis by several experiments, and find out that
               we outperform the corresponding baselines. All in all, the article
               develops an approach to classify linguistic networks of tens of
               thousands of vertices by exploring a small range of mathematically
               well-established topological indices.},
  doi       = {10.1016/j.csl.2010.05.006},
  website   = {http://www.sciencedirect.com/science/article/pii/S0885230810000434},
  year      = {2011}
}

Alexander Mehler. 2011. Social Ontologies as Generalized Nearly Acyclic Directed Graphs: A Quantitative Graph Model of Social Ontologies by Example of Wikipedia. Towards an Information Theory of Complex Networks: Statistical Methods and Applications, 259–319.

BibTeX

@incollection{Mehler:2011:c,
  author    = {Mehler, Alexander},
  title     = {Social Ontologies as Generalized Nearly Acyclic Directed Graphs:
               A Quantitative Graph Model of Social Ontologies by Example of
               Wikipedia},
  booktitle = {Towards an Information Theory of Complex Networks: Statistical
               Methods and Applications},
  publisher = {Birkh{\"a}user},
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
  pages     = {259-319},
  address   = {Boston/Basel},
  year      = {2011}
}

Andy Lücking, Sebastian Ptock and Kirsten Bergmann. May, 2011. Staccato: Segmentation Agreement Calculator. Gesture in Embodied Communication and Human-Computer Interaction. Proceedings of the 9th International Gesture Workshop, 50–53.

BibTeX

@inproceedings{Luecking:Ptock:Bergmann:2011,
  author    = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
  title     = {Staccato: Segmentation Agreement Calculator},
  booktitle = {Gesture in Embodied Communication and Human-Computer Interaction.
               Proceedings of the 9th International Gesture Workshop},
  editor    = {Eleni Efthimiou and Georgios Kouroupetroglou},
  series    = {GW 2011},
  pages     = {50--53},
  address   = {Athens, Greece},
  publisher = {National and Kapodistrian University of Athens},
  month     = {5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/LueckingEA_final.pdf},
  year      = {2011}
}

Alexander Mehler and Andy Lücking. September, 2011. A Graph Model of Alignment in Multilog. Proceedings of IEEE Africon 2011.

BibTeX

@inproceedings{Mehler:Luecking:2011,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {A Graph Model of Alignment in Multilog},
  booktitle = {Proceedings of IEEE Africon 2011},
  series    = {IEEE Africon},
  address   = {Zambia},
  organization = {IEEE},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/africon2011-paper-Alexander_Mehler_Andy_Luecking.pdf},
  website   = {https://www.researchgate.net/publication/267941012_A_Graph_Model_of_Alignment_in_Multilog},
  year      = {2011}
}

Christian Stegbauer and Alexander Mehler. 2011. Positionssensitive Dekomposition von Potenzgesetzen am Beispiel von Wikipedia-basierten Kollaborationsnetzwerken. Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK 2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin.

BibTeX

@inproceedings{Stegbauer:Mehler:2011,
  author    = {Stegbauer, Christian and Mehler, Alexander},
  title     = {Positionssensitive Dekomposition von Potenzgesetzen am Beispiel
               von Wikipedia-basierten Kollaborationsnetzwerken},
  booktitle = {Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK
               2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin},
  pdf       = {http://www.user.tu-berlin.de/komm/CD/paper/090423.pdf},
  specialnote = {Best Paper Award},
  specialnotewebsite = {http://www.digitale-soziale-netze.de/gi-workshop/index.php?site=review2011},
  year      = {2011}
}

Mathias Lösch, Ulli Waltinger, Wolfram Horstmann and Alexander Mehler. 2011. Building a DDC-annotated Corpus from OAI Metadata. Journal of Digital Information, 12(2).

BibTeX

@article{Loesch:Waltinger:Horstmann:Mehler:2011,
  author    = {Lösch, Mathias and Waltinger, Ulli and Horstmann, Wolfram and Mehler, Alexander},
  title     = {Building a DDC-annotated Corpus from OAI Metadata},
  journal   = {Journal of Digital Information},
  volume    = {12},
  number    = {2},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surface-oriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score.},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  pdf       = {https://journals.tdl.org/jodi/index.php/jodi/article/download/1765/1767},
  website   = {http://journals.tdl.org/jodi/article/view/1765},
  year      = {2011}
}

Markus Lux, Jan Laußmann, Alexander Mehler and Christian Menßen. 2011. An Online Platform for Visualizing Time Series in Linguistic Networks. Proceedings of the Demonstrations Session of the 2011 IEEE / WIC / ACM International Conferences on Web Intelligence and Intelligent Agent Technology, 22 - 27 August 2011, Lyon, France.

BibTeX

@inproceedings{Lux:Laussmann:Mehler:Menssen:2011,
  author    = {Lux, Markus and Lau{\ss}mann, Jan and Mehler, Alexander and Men{\ss}en, Christian},
  title     = {An Online Platform for Visualizing Time Series in Linguistic Networks},
  booktitle = {Proceedings of the Demonstrations Session of the 2011 IEEE / WIC
               / ACM International Conferences on Web Intelligence and Intelligent
               Agent Technology, 22 - 27 August 2011, Lyon, France},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wi-iat-poster-2011.pdf},
  website   = {http://dl.acm.org/citation.cfm?id=2052396},
  year      = {2011}
}

Alexander Mehler, Nils Diewald, Ulli Waltinger, Rüdiger Gleim, Dietmar Esch, Barbara Job, Thomas Küchelmann, Olga Abramov and Philippe Blanchard. 2011. Evolution of Romance Language in Written Communication: Network Analysis of Late Latin and Early Romance Corpora. Leonardo, 44(3).

BibTeX

@article{Mehler:Diewald:Waltinger:et:al:2010,
  author    = {Mehler, Alexander and Diewald, Nils and Waltinger, Ulli and Gleim, Rüdiger
               and Esch, Dietmar and Job, Barbara and Küchelmann, Thomas and Abramov, Olga
               and Blanchard, Philippe},
  title     = {Evolution of Romance Language in Written Communication: Network
               Analysis of Late Latin and Early Romance Corpora},
  journal   = {Leonardo},
  volume    = {44},
  number    = {3},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_diewald_waltinger_gleim_esch_job_kuechelmann_pustylnikov_blanchard_2010.pdf},
  publisher = {MIT Press},
  year      = {2011}
}

Alexander Mehler, Andy Lücking and Peter Menke. 2011. From Neural Activation to Symbolic Alignment: A Network-Based Approach to the Formation of Dialogue Lexica. Proceedings of the International Joint Conference on Neural Networks (IJCNN 2011), San Jose, California, July 31 – August 5.

BibTeX

@inproceedings{Mehler:Luecking:Menke:2011,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {From Neural Activation to Symbolic Alignment: A Network-Based
               Approach to the Formation of Dialogue Lexica},
  booktitle = {Proceedings of the International Joint Conference on Neural Networks
               (IJCNN 2011), San Jose, California, July 31 -- August 5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/neural-align-final.pdf},
  website   = {{http://dx.doi.org/10.1109/IJCNN.2011.6033266}},
  year      = {2011}
}

Andy Lücking, Olga Abramov, Alexander Mehler and Peter Menke. 2011. The Bielefeld Jigsaw Map Game (JMG) Corpus. Abstracts of the Corpus Linguistics Conference 2011.

BibTeX

@inproceedings{Luecking:Abramov:Mehler:Menke:2011,
  author    = {Lücking, Andy and Abramov, Olga and Mehler, Alexander and Menke, Peter},
  title     = {The Bielefeld Jigsaw Map Game (JMG) Corpus},
  booktitle = {Abstracts of the Corpus Linguistics Conference 2011},
  series    = {CL2011},
  address   = {Birmingham},
  pdf       = {http://www.birmingham.ac.uk/documents/college-artslaw/corpus/conference-archives/2011/Paper-137.pdf},
  website   = {http://www.birmingham.ac.uk/research/activity/corpus/publications/conference-archives/2011-birmingham.aspx},
  year      = {2011}
}

Rüdiger Gleim, Armin Hoenen, Nils Diewald, Alexander Mehler and Alexandra Ernst. 2011. Modeling, Building and Maintaining Lexica for Corpus Linguistic Studies by Example of Late Latin. Corpus Linguistics 2011, 20-22 July, Birmingham.

BibTeX

@inproceedings{Gleim:Hoenen:Diewald:Mehler:Ernst:2011,
  author    = {Gleim, Rüdiger and Hoenen, Armin and Diewald, Nils and Mehler, Alexander
               and Ernst, Alexandra},
  title     = {Modeling, Building and Maintaining Lexica for Corpus Linguistic
               Studies by Example of Late Latin},
  booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Paper-48.pdf},
  year      = {2011}
}

Peter Menke and Alexander Mehler. 2011. From experiments to corpora: The Ariadne Corpus Management System. Corpus Linguistics 2011, 20-22 July, Birmingham.

BibTeX

@inproceedings{Menke:Mehler:2011,
  author    = {Menke, Peter and Mehler, Alexander},
  title     = {From experiments to corpora: The Ariadne Corpus Management System},
  booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
  website   = {https://www.researchgate.net/publication/260186214_From_Experiments_to_Corpora_The_Ariadne_Corpus_Management_System},
  year      = {2011}
}

Matthias Dehmer, Frank Emmert-Streib and Alexander Mehler, eds. 2011. Towards an Information Theory of Complex Networks: Statistical Methods and Applications. Birkhäuser.

BibTeX

@book{Dehmer:EmmertStreib:Mehler:2009:a,
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
  title     = {Towards an Information Theory of Complex Networks: Statistical
               Methods and Applications},
  publisher = {Birkh{\"a}user},
  address   = {Boston/Basel},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/InformationTheoryComplexNetworks.jpg},
  pagetotal = {395},
  website   = {http://link.springer.com/book/10.1007/978-0-8176-4904-3/page/1},
  year      = {2011}
}

Alexander Mehler, Andy Lücking and Peter Menke. 2011. Assessing Lexical Alignment in Spontaneous Direction Dialogue Data by Means of a Lexicon Network Model. Proceedings of 12th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing), February 20–26, Tokyo, 368–379.

BibTeX

@inproceedings{Mehler:Luecking:Menke:2011:a,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {Assessing Lexical Alignment in Spontaneous Direction Dialogue
               Data by Means of a Lexicon Network Model},
  booktitle = {Proceedings of 12th International Conference on Intelligent Text
               Processing and Computational Linguistics (CICLing), February 20--26,
               Tokyo},
  series    = {CICLing'11},
  pages     = {368-379},
  address   = {Berlin/New York},
  publisher = {Springer},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/titan-cicling-camera-ready.pdf},
  website   = {http://www.springerlink.com/content/g7p2250025u20010/},
  year      = {2011}
}

Peter Geibel, Alexander Mehler and Kai-Uwe Kühnberger. 2011. Learning Methods for Graph Models of Document Structure. Modeling, Learning and Processing of Text Technological Data Structures.

BibTeX

@incollection{Geibel:Mehler:Kuehnberger:2011:a,
  author    = {Geibel, Peter and Mehler, Alexander and Kühnberger, Kai-Uwe},
  title     = {Learning Methods for Graph Models of Document Structure},
  booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  website   = {http://www.springerlink.com/content/p095331472h76v56/},
  year      = {2011}
}

Alexander Mehler and Ulli Waltinger. 2011. Integrating Content and Structure Learning: A Model of Hypertext Zoning and Sounding. Modeling, Learning and Processing of Text Technological Data Structures.

BibTeX

@incollection{Mehler:Waltinger:2011:a,
  author    = {Mehler, Alexander and Waltinger, Ulli},
  title     = {Integrating Content and Structure Learning: A Model of Hypertext
               Zoning and Sounding},
  booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-642-22613-7_15},
  year      = {2011}
}

Olga Abramov and Alexander Mehler. 2011. Automatic Language Classification by Means of Syntactic Dependency Networks. Journal of Quantitative Linguistics, 18(4):291–336.

BibTeX

@article{Abramov:Mehler:2011:a,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Automatic Language Classification by Means of Syntactic Dependency Networks},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {18},
  number    = {4},
  pages     = {291-336},
  abstract  = {This article presents an approach to automatic language classification
               by means of linguistic networks. Networks of 11 languages were
               constructed from dependency treebanks, and the topology of these
               networks serves as input to the classification algorithm. The
               results match the genealogical similarities of these languages.
               In addition, we test two alternative approaches to automatic language
               classification – one based on n-grams and the other on quantitative
               typological indices. All three methods show good results in identifying
               genealogical groups. Beyond genetic similarities, network features
               (and feature combinations) offer a new source of typological information
               about languages. This information can contribute to a better understanding
               of the interplay of single linguistic phenomena observed in language.},
  website   = {http://www.researchgate.net/publication/220469321_Automatic_Language_Classification_by_means_of_Syntactic_Dependency_Networks},
  year      = {2011}
}

Alexander Mehler, Kai-Uwe Kühnberger, Henning Lobin, Harald Lüngen, Angelika Storrer and Andreas Witt. 2011. Modeling, Learning and Processing of Text Technological Data Structures. Ed. by Alexander Mehler, Kai-Uwe Kühnberger, Henning Lobin, Harald Lüngen, Angelika Storrer and Andreas Witt.Studies in Computational Intelligence. Springer.

BibTeX

@book{Mehler:Kuehnberger:Lobin:Luengen:Storrer:Witt:2011,
  author    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  title     = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/modelingLearningAndProcessing-medium.jpg},
  pagetotal = {400},
  website   = {/books/texttechnologybook/},
  year      = {2011}
}

Ulli Waltinger. 2011. On Social Semantics in Information Retrieval. Südwestdeutscher Verlag für Hochschulschriften. Zugl. Diss Univ. Bielefeld (2010).

BibTeX

@book{Waltinger:2011,
  author    = {Waltinger, Ulli},
  title     = {On Social Semantics in Information Retrieval},
  publisher = {Südwestdeutscher Verlag für Hochschulschriften},
  address   = {Saarbrücken},
  note      = {Zugl. Diss Univ. Bielefeld (2010)},
  abstract  = {In this thesis we analyze the performance of social semantics
               in textual information retrieval. By means of collaboratively
               constructed knowledge derived from web-based social networks,
               inducing both common-sense and domain-specific knowledge as constructed
               by a multitude of users, we will establish an improvement in performance
               of selected tasks within different areas of information retrieval.
               This work connects the concepts and the methods of social networks
               and the semantic web to support the analysis of a social semantic
               web that combines human intelligence with machine learning and
               natural language processing. In this context, social networks,
               as instances of the social web, are capable in delivering social
               network data and document collections on a tremendous scale, inducing
               thematic dynamics that cannot be achieved by traditional expert
               resources. The question of an automatic conversion, annotation
               and processing, however, is central to the debate of the benefits
               of the social semantic web. Which kind of technologies and methods
               are available, adequate and contribute to the processing of this
               rapidly rising flood of information and at the same time being
               capable of using the wealth of information in this large, but
               more importantly decentralized internet. The present work researches
               the performance of social semantic-induced categorization by means
               of different document models. We will shed light on the question,
               to which level social networks and social ontologies contribute
               to selected areas within the information retrieval area, such
               as automatically determining term and text associations, identifying
               topics, text and web genre categorization, and also the domain
               of sentiment analysis. We will show in extensive evaluations,
               comparing the classical apparatus of text categorization -- Vector
               Space Model, Latent Semantic Analysis and Support Vector Maschine
               -- that significant improvements can be obtained by considering
               the collaborative knowledge derived from the social web.},
  pdf       = {https://pub.uni-bielefeld.de/download/2302025/2302028},
  website   = {http://www.ulliwaltinger.de/on-social-semantics-in-information-retrieval/},
  year      = {2011}
}

Gerd Doeben-Henisch, Giuseppe Abrami, Marcus Pfaff and Marvin Struwe. Sept., 2011. Conscious learning semiotics systems to assist human persons (CLS2H). AFRICON, 2011, 1 –7.

BibTeX

@inproceedings{Doebenhenisch:Abrami:Pfaff:Struwe:2011,
  author    = {Doeben-Henisch, Gerd and Abrami, Giuseppe and Pfaff, Marcus and Struwe, Marvin},
  title     = {Conscious learning semiotics systems to assist human persons (CLS2H)},
  booktitle = {AFRICON, 2011},
  volume    = {},
  number    = {},
  pages     = {1 -7},
  abstract  = {Challenged by the growing societal demand for Ambient Assistive
               Living (AAL) technologies, we are dedicated to develop intelligent
               technical devices which are able to communicate with human persons
               in a truly human-like manner. The core of the project is a simulation
               environment which enables the development of conscious learning
               semiotic agents which will be able to assist human persons in
               their daily life. We are reporting first results and future perspectives.},
  doi       = {10.1109/AFRCON.2011.6072043},
  issn      = {2153-0025},
  keywords  = {ambient assistive living;conscious learning semiotic
                   agents;conscious learning semiotics systems;human
                   persons;intelligent technical devices;simulation
                   environment;learning (artificial
                   intelligence);multi-agent systems;},
  month     = {sept.},
  pdf       = {http://www.doeben-henisch.de/gdhnp/csg/africon2011.pdf},
  website   = {http://www.researchgate.net/publication/261451874_Conscious_Learning_Semiotics_Systems_to_Assist_Human_Persons_(CLS(2)H)},
  year      = {2011}
}

Ulli Waltinger, Alexander Mehler, Mathias Lösch and Wolfram Horstmann. 2011. Hierarchical Classification of OAI Metadata Using the DDC Taxonomy. Advanced Language Technologies for Digital Libraries (ALT4DL), 29–40.

BibTeX

@incollection{Waltinger:Mehler:Loesch:Horstmann:2011,
  author    = {Waltinger, Ulli and Mehler, Alexander and Lösch, Mathias and Horstmann, Wolfram},
  title     = {Hierarchical Classification of OAI Metadata Using the DDC Taxonomy},
  booktitle = {Advanced Language Technologies for Digital Libraries (ALT4DL)},
  publisher = {Springer},
  editor    = {Raffaella Bernardi and Sally Chambers and Bjoern Gottfried and Frederique Segond
               and Ilya Zaihrayeu},
  series    = {LNCS},
  pages     = {29-40},
  address   = {Berlin},
  abstract  = {In the area of digital library services, the access to subject-specific
               metadata of scholarly publications is of utmost interest. One
               of the most prevalent approaches for metadata exchange is the
               XML-based Open Archive Initiative (OAI) Protocol for Metadata
               Harvesting (OAI-PMH). However, due to its loose requirements regarding
               metadata content there is no strict standard for consistent subject
               indexing specified, which is furthermore needed in the digital
               library domain. This contribution addresses the problem of automatic
               enhancement of OAI metadata by means of the most widely used universal
               classification schemes in libraries—the Dewey Decimal Classification
               (DDC). To be more specific, we automatically classify scientific
               documents according to the DDC taxonomy within three levels using
               a machine learning-based classifier that relies solely on OAI
               metadata records as the document representation. The results show
               an asymmetric distribution of documents across the hierarchical
               structure of the DDC taxonomy and issues of data sparseness. However,
               the performance of the classifier shows promising results on all
               three levels of the DDC.},
  website   = {http://www.springerlink.com/content/x20257512g818377/},
  year      = {2011}
}

Alexander Mehler, Silke Schwandt, Rüdiger Gleim and Bernhard Jussen. 2011. Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektrum und Einsatzszenarien. Journal for Language Technology and Computational Linguistics (JLCL), 26(1):97–117.

BibTeX

@article{Mehler:Schwandt:Gleim:Jussen:2011,
  author    = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},
  title     = {Der eHumanities Desktop als Werkzeug in der historischen Semantik:
               Funktionsspektrum und Einsatzszenarien},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {26},
  number    = {1},
  pages     = {97-117},
  abstract  = {Die Digital Humanities bzw. die Computational Humanities entwickeln
               sich zu eigenst{\"a}ndigen Disziplinen an der Nahtstelle von Geisteswissenschaft
               und Informatik. Diese Entwicklung betrifft zunehmend auch die
               Lehre im Bereich der geisteswissenschaftlichen Fachinformatik.
               In diesem Beitrag thematisieren wir den eHumanities Desktop als
               ein Werkzeug für diesen Bereich der Lehre. Dabei geht es genauer
               um einen Brückenschlag zwischen Geschichtswissenschaft und Informatik:
               Am Beispiel der historischen Semantik stellen wir drei Lehrszenarien
               vor, in denen der eHumanities Desktop in der geschichtswissenschaftlichen
               Lehre zum Einsatz kommt. Der Beitrag schliesst mit einer Anforderungsanalyse
               an zukünftige Entwicklungen in diesem Bereich.},
  pdf       = {http://media.dwds.de/jlcl/2011_Heft1/8.pdf },
  year      = {2011}
}

Tiansi Dong and Tim vor der Brück. 2011. Qualitative Spatial Knowledge Acquisition Based on the Connection Relation. Proceedings of the 3rd International Conference on Advanced Cognitive Technologies and Applications (COGNITIVE), 70–75.

BibTeX

@inproceedings{Dong:vor:der:Brueck:2011,
  author    = {Dong, Tiansi and vor der Brück, Tim},
  title     = {Qualitative Spatial Knowledge Acquisition Based on the Connection Relation},
  booktitle = {Proceedings of the 3rd International Conference on Advanced Cognitive
               Technologies and Applications (COGNITIVE)},
  editor    = {Terry Bossomaier and Pascal Lorenz},
  pages     = {70--75},
  address   = {Rome, Italy},
  abstract  = {Research in cognitive psychology shows that the connection relation
               is the primitive spatial relation. This paper proposes a novel
               spatial knowledge representation of indoor environments based
               on the connection relation, and demonstrates how deictic orientation
               relations can be acquired from a map, which is constructed purely
               on connection relations between extended objects. Without loss
               of generality, we restrict indoor environments to be constructed
               by a set of rectangles, each representing either a room or a corridor.
               The term fiat cell is coined to represent a subjective partition
               along a corridor. Spatial knowledge includes rectangles, sides
               information of rectangles, connection relations among rectangles,
               and fiat cells of rectangles. Efficient algorithms are given for
               identifying one shortest path between two locations, transforming
               paths into fiat paths, and acquiring deictic orientations.},
  pdf       = {http://www.thinkmind.org/download.php?articleid=cognitive_2011_3_40_40123},
  website   = {http://www.thinkmind.org/index.php?view=article&articleid=cognitive_2011_3_40_40123},
  year      = {2011}
}

Md. Zahurul Islam, Roland Mittmann and Alexander Mehler. 2011. Multilingualism in Ancient Texts: Language Detection by Example of Old High German and Old Saxon. GSCL conference on Multilingual Resources and Multilingual Applications (GSCL 2011), 28-30 September, Hamburg, Germany.

BibTeX

@inproceedings{Zahurul:Mittmann:Mehler:2011,
  author    = {Islam, Md. Zahurul and Mittmann, Roland and Mehler, Alexander},
  title     = {Multilingualism in Ancient Texts: Language Detection by Example
               of Old High German and Old Saxon},
  booktitle = {GSCL conference on Multilingual Resources and Multilingual Applications
               (GSCL 2011), 28-30 September, Hamburg, Germany},
  abstract  = {In this paper, we present an approach to language d etection in
               streams of multilingual ancient texts. We introduce a supervised
               classifier that detects, amongst others, Old High G erman (OHG)
               and Old Saxon (OS). We evaluate our mod el by means of three experiments
               that show that language detection is po ssible even for dead languages.
               Finally, we present an experiment in unsupervised language detection
               as a tertium comparationis for o ur supervised classifier.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Multilingualism_in_Ancient_Texts_Language_Detection_by_Example_of_Old_High_German_and_Old_Saxon.pdf},
  timestamp = {2011.08.25},
  year      = {2011}
}

Veronika Ries and Andy Lücking. 2011. Multilingual Resources and Multilingual Applications: Proceedings of the German Society for Computational Linguistics 2011, 207–210.

BibTeX

@inproceedings{Ries:Luecking:2011,
  author    = {Ries, Veronika and Lücking, Andy},
  booktitle = {Multilingual Resources and Multilingual Applications: Proceedings
               of the German Society for Computational Linguistics 2011},
  year      = {2011},
  pages     = {207--210},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Ries_Luecking.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SoSaBiEC-poster.pdf}
}

2010

Alexander Mehler. 2010. Minimum Spanning Markovian Trees: Introducing Context-Sensitivity into the Generation of Spanning Trees. Structural Analysis of Complex Networks, 381–401.

BibTeX

@incollection{Mehler:2010:a,
  author    = {Mehler, Alexander},
  title     = {Minimum Spanning Markovian Trees: Introducing Context-Sensitivity
               into the Generation of Spanning Trees},
  booktitle = {Structural Analysis of Complex Networks},
  publisher = {Birkh{\"a}user Publishing},
  editor    = {Dehmer, Matthias},
  pages     = {381-401},
  address   = {Basel},
  abstract  = {This chapter introduces a novel class of graphs: Minimum Spanning
               Markovian Trees (MSMTs). The idea behind MSMTs is to provide spanning
               trees that minimize the costs of edge traversals in a Markovian
               manner, that is, in terms of the path starting with the root of
               the tree and ending at the vertex under consideration. In a second
               part, the chapter generalizes this class of spanning trees in
               order to allow for damped Markovian effects in the course of spanning.
               These two effects, (1) the sensitivity to the contexts generated
               by consecutive edges and (2) the decreasing impact of more antecedent
               (or 'weakly remembered') vertices, are well known in cognitive
               modeling [6, 10, 21, 23]. In this sense, the chapter can also
               be read as an effort to introduce a graph model to support the
               simulation of cognitive systems. Note that MSMTs are not to be
               confused with branching Markov chains or Markov trees [20] as
               we focus on generating spanning trees from given weighted undirected
               networks.},
  website   = {https://www.researchgate.net/publication/226700676_Minimum_Spanning_Markovian_Trees_Introducing_Context-Sensitivity_into_the_Generation_of_Spanning_Trees},
  year      = {2010}
}

Rüdiger Gleim and Alexander Mehler. 2010. Computational Linguistics for Mere Mortals – Powerful but Easy-to-use Linguistic Processing for Scientists in the Humanities. Proceedings of LREC 2010.

BibTeX

@inproceedings{Gleim:Mehler:2010:b,
  author    = {Gleim, Rüdiger and Mehler, Alexander},
  title     = {Computational Linguistics for Mere Mortals – Powerful but Easy-to-use
               Linguistic Processing for Scientists in the Humanities},
  booktitle = {Proceedings of LREC 2010},
  address   = {Malta},
  publisher = {ELDA},
  abstract  = {Delivering linguistic resources and easy-to-use methods to a broad
               public in the humanities is a challenging task. On the one hand
               users rightly demand easy to use interfaces but on the other hand
               want to have access to the full flexibility and power of the functions
               being offered. Even though a growing number of excellent systems
               exist which offer convenient means to use linguistic resources
               and methods, they usually focus on a specific domain, as for example
               corpus exploration or text categorization. Architectures which
               address a broad scope of applications are still rare. This article
               introduces the eHumanities Desktop, an online system for corpus
               management, processing and analysis which aims at bridging the
               gap between powerful command line tools and intuitive user interfaces.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_2010.pdf},
  year      = {2010}
}

Alexander Mehler, Andy Lücking and Petra Weiß. 2010. A Network Model of Interpersonal Alignment. Entropy, 12(6):1440–1483.

BibTeX

@article{Mehler:Weiss:Luecking:2010:a,
  author    = {Mehler, Alexander and Lücking, Andy and Wei{\ss}, Petra},
  title     = {A Network Model of Interpersonal Alignment},
  journal   = {Entropy},
  volume    = {12},
  number    = {6},
  pages     = {1440-1483},
  abstract  = {In dyadic communication, both interlocutors adapt to each other
               linguistically, that is, they align interpersonally. In this article,
               we develop a framework for modeling interpersonal alignment in
               terms of the structural similarity of the interlocutors’ dialog
               lexica. This is done by means of so-called two-layer time-aligned
               network series, that is, a time-adjusted graph model. The graph
               model is partitioned into two layers, so that the interlocutors’
               lexica are captured as subgraphs of an encompassing dialog graph.
               Each constituent network of the series is updated utterance-wise.
               Thus, both the inherent bipartition of dyadic conversations and
               their gradual development are modeled. The notion of alignment
               is then operationalized within a quantitative model of structure
               formation based on the mutual information of the subgraphs that
               represent the interlocutor’s dialog lexica. By adapting and further
               developing several models of complex network theory, we show that
               dialog lexica evolve as a novel class of graphs that have not
               been considered before in the area of complex (linguistic) networks.
               Additionally, we show that our framework allows for classifying
               dialogs according to their alignment status. To the best of our
               knowledge, this is the first approach to measuring alignment in
               communication that explores the similarities of graph-like cognitive
               representations.},
  doi       = {10.3390/e12061440},
  pdf       = {http://www.mdpi.com/1099-4300/12/6/1440/pdf},
  website   = {http://www.mdpi.com/1099-4300/12/6/1440/},
  year      = {2010}
}

Alexander Mehler, Serge Sharoff and Marina Santini. 2010. Genres on the Web: Computational Models and Empirical Studies. Ed. by Alexander Mehler, Serge Sharoff and Marina Santini. Springer.

BibTeX

@book{Mehler:Sharoff:Santini:2010:a,
  author    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  editor    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  title     = {Genres on the Web: Computational Models and Empirical Studies},
  publisher = {Springer},
  address   = {Dordrecht},
  abstract  = {The volume 'Genres on the Web' has been designed for a wide audience,
               from the expert to the novice. It is a required book for scholars,
               researchers and students who want to become acquainted with the
               latest theoretical, empirical and computational advances in the
               expanding field of web genre research. The study of web genre
               is an overarching and interdisciplinary novel area of research
               that spans from corpus linguistics, computational linguistics,
               NLP, and text-technology, to web mining, webometrics, social network
               analysis and information studies. This book gives readers a thorough
               grounding in the latest research on web genres and emerging document
               types. The book covers a wide range of web-genre focussed subjects,
               such as: -The identification of the sources of web genres -Automatic
               web genre identification -The presentation of structure-oriented
               models -Empirical case studies One of the driving forces behind
               genre research is the idea of a genre-sensitive information system,
               which incorporates genre cues complementing the current keyword-based
               search and retrieval applications.},
  booktitle = {Genres on the Web: Computational Models and Empirical Studies},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/GenresOnTheWeb.jpg},
  pagetotal = {376},
  review    = {http://www.springerlink.com/content/ym07440380524721/},
  website   = {http://www.springer.com/computer/ai/book/978-90-481-9177-2},
  year      = {2010}
}

Tilmann Sutter and Alexander Mehler. 2010. Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen zum Web 2.0. Ed. by Tilmann Sutter and Alexander Mehler. Verlag für Sozialwissenschaften.

BibTeX

@book{Sutter:Mehler:2010,
  author    = {Sutter, Tilmann and Mehler, Alexander},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  title     = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
               zum Web 2.0},
  publisher = {Verlag für Sozialwissenschaften},
  address   = {Wiesbaden},
  abstract  = {Die Beitr{\"a}ge des Bandes untersuchen den Medienwandel von frühen
               europ{\"a}ischen Medienkulturen bis zu aktuellen Formen der Internetkommunikation
               unter soziologischer, kulturwissenschaftlicher und linguistischer
               Perspektive. Zwar haben sich die Massenmedien von den Beschr{\"a}nkungen
               sozialer Interaktionen gelöst, sie weisen dem Publikum aber eine
               distanzierte, blo{\ss} rezipierende Rolle zu. Dagegen eröffnen
               neue Formen 'interaktiver' Medien gesteigerte Möglichkeiten der
               Rückmeldung und der Mitgestaltung für die Nutzer. Der vorliegende
               Band fragt nach der Qualit{\"a}t dieses Medienwandels: Werden
               Medien tats{\"a}chlich interaktiv? Was bedeutet die Interaktivit{\"a}t
               neuer Medien? Werden die durch neue Medien eröffneten Beteiligungsmöglichkeiten
               realisiert?},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Medienwandel.jpg},
  pagetotal = {289},
  website   = {http://www.springer.com/de/book/9783531156422},
  year      = {2010}
}

Tim vor der Brück and Holger Stenzhorn. 2010. Logical Ontology Validation Using an Automatic Theorem Prover. Proceedings of the 19th European Conference on Artificial Intelligence (ECAI), 491–496.

BibTeX

@inproceedings{vor:der:Brueck:Stenzhorn:2010,
  author    = {vor der Brück, Tim and Stenzhorn, Holger},
  title     = {Logical Ontology Validation Using an Automatic Theorem Prover},
  booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence (ECAI)},
  pages     = {491--496},
  address   = {Lisbon, Portugal},
  abstract  = {Ontologies are utilized for a wide range of tasks, like information
               retrieval/extraction or text generation, and in a multitude of
               domains, such as biology, medicine or business and commerce. To
               be actually usable in such real-world scenarios, ontologies usually
               have to encompass a large number of factual statements. However,
               with increasing size, it becomes very diffcult to ensure their
               complete correctness. This is particularly true in the case when
               an ontology is not hand-crafted but constructed (semi)automatically
               through text mining, for example. As a consequence, when inference
               mechanisms are applied on these ontologies, even minimal inconsistencies
               of tentimes lead to serious errors and are hard to trace back
               and find. This paper addresses this issue and describes a method
               to validate ontologies using an automatic theorem prover and MultiNet
               axioms. This logic-based approach allows to detect many inconsistencies,
               which are diffcult or even impossible to identify through statistical
               methods or by manual investigation in reasonable time. To make
               this approach accessible for ontology developers, a graphical
               user interface is provided that highlights erroneous axioms directly
               in the ontology for quicker fixing.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ECAI-216.pdf},
  year      = {2010}
}

Tim vor der Brück. 2010. Hypernymy Extraction Using a Semantic Network Representation. International Journal of Computational Linguistics and Applications, 1(1):105–119.

BibTeX

@article{vor:der:Brueck:2010,
  author    = {vor der Brück, Tim},
  title     = {Hypernymy Extraction Using a Semantic Network Representation},
  journal   = {International Journal of Computational Linguistics and
                   Applications},
  volume    = {1},
  number    = {1},
  pages     = {105--119},
  abstract  = {There are several approaches to detect hypernymy relations from
               texts by text mining. Usually these approaches are based on supervised
               learning and in a first step are extracting several patterns.
               These patterns are then applied to previously unseen texts and
               used to recognize hypernym/hyponym pairs. Normally these approaches
               are only based on a surface representation or a syntactical tree
               structure, i.e., constituency or dependency trees derived by a
               syntactical parser. In this work, however, we present an approach
               that operates directly on a semantic network (SN), which is generated
               by a deep syntactico-semantic analysis. Hyponym/hypernym pairs
               are then extracted by the application of graph matching. This
               algorithm is combined with a shallow approach enriched with semantic
               information.},
  pdf       = {http://www.gelbukh.com/ijcla/2010-1-2/Hypernymy
                   Extraction Using.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
  year      = {2010}
}

Tim vor der Brück. 2010. Learning Deep Semantic Patterns for Hypernymy Extraction Following the Minimum Description Length Principle. Proceedings of the 29th International Conference on Lexis and Grammar (LGC), 39–49.

BibTeX

@inproceedings{vor:der:Brueck:2010:a,
  author    = {vor der Brück, Tim},
  title     = {Learning Deep Semantic Patterns for Hypernymy Extraction Following
               the Minimum Description Length Principle},
  booktitle = {Proceedings of the 29th International Conference on Lexis and Grammar (LGC)},
  pages     = {39--49},
  address   = {Belgrade, Serbia},
  abstract  = {Current approaches of hypernymy acquisition are mostly based on
               syntactic or surface representations and extract hypernymy relations
               between surface word forms and not word readings. In this paper
               we present a purely semantic approach for hypernymy extraction
               based on semantic networks (SNs). This approach employs a set
               of patterns sub0 (a1,a2) <-- premise where the premise part of
               a pattern is given by a SN. Furthermore this paper describes how
               the patterns can be derived by relational statistical learning
               following the Minimum Description Length principle (MDL). The
               evaluation demonstrates the usefulness of the learned patterns
               and also of the entire hypernymy extraction system.},
  year      = {2010}
}

Tim vor der Brück. 2010. Learning Semantic Network Patterns for Hypernymy Extraction. Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex), 38–47.

BibTeX

@inproceedings{vor:der:Brueck:2010:b,
  author    = {vor der Brück, Tim},
  title     = {Learning Semantic Network Patterns for Hypernymy Extraction},
  booktitle = {Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex)},
  pages     = {38--47},
  address   = {Beijing, China},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ontolex_brueck_13_2010.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
  year      = {2010}
}

Sven Hartrumpf, Tim vor der Brück and Christian Eichhorn. 2010. Detecting Duplicates with Shallow and Parser-based Methods. Proceedings of the 6th International Conference on Natural Language Processing and Knowledge Engineering (NLPKE), 142–149.

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:a,
  author    = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
  title     = {Detecting Duplicates with Shallow and Parser-based Methods},
  booktitle = {Proceedings of the 6th International Conference on Natural Language
               Processing and Knowledge Engineering (NLPKE)},
  pages     = {142--149},
  address   = {Beijing, China},
  abstract  = {Identifying duplicate texts is important in many areas like plagiarism
               detection, information retrieval, text summarization, and question
               answering. Current approaches are mostly surface-oriented (or
               use only shallow syntactic representations) and see each text
               only as a token list. In this work however, we describe a deep,
               semantically oriented method based on semantic networks which
               are derived by a syntactico-semantic parser. Semantically identical
               or similar semantic networks for each sentence of a given base
               text are efficiently retrieved by using a specialized semantic
               network index. In order to detect many kinds of paraphrases the
               current base semantic network is varied by applying inferences:
               lexico-semantic relations, relation axioms, and meaning postulates.
               Some important phenomena occurring in difficult-to-detect duplicates
               are discussed. The deep approach profits from background knowledge,
               whose acquisition from corpora like Wikipedia is explained briefly.
               This deep duplicate recognizer is combined with two shallow duplicate
               recognizers in order to guarantee high recall for texts which
               are not fully parsable. The evaluation shows that the combined
               approach preserves recall and increases precision considerably,
               in comparison to traditional shallow methods. For the evaluation,
               a standard corpus of German plagiarisms was extended by four diverse
               components with an emphasis on duplicates (and not just plagiarisms),
               e.g., news feed articles from different web sources and two translations
               of the same short story.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-ieee.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5587838&abstractAccess=no&userType=inst},
  year      = {2010}
}

Sven Hartrumpf, Tim vor der Brück and Christian Eichhorn. September, 2010. Semantic Duplicate Identification with Parsing and Machine Learning. Proceedings of the 13th International Conference on Text, Speech and Dialogue (TSD 2010), 6231:84–92.

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:b,
  author    = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
  title     = {Semantic Duplicate Identification with Parsing and Machine Learning},
  booktitle = {Proceedings of the 13th International Conference on Text, Speech
               and Dialogue (TSD 2010)},
  editor    = {Petr Sojka and Aleš Horák and Ivan Kopeček and Karel Pala},
  volume    = {6231},
  series    = {Lecture Notes in Artificial Intelligence},
  pages     = {84--92},
  address   = {Brno, Czech Republic},
  abstract  = {Identifying duplicate texts is important in many areas like plagiarism
               detection, information retrieval, text summarization, and question
               answering. Current approaches are mostly surface-oriented (or
               use only shallow syntactic representations) and see each text
               only as a token list. In this work however, we describe a deep,
               semantically oriented method based on semantic networks which
               are derived by a syntacticosemantic parser. Semantically identical
               or similar semantic networks for each sentence of a given base
               text are efficiently retrieved by using a specialized index. In
               order to detect many kinds of paraphrases the semantic networks
               of a candidate text are varied by applying inferences: lexico-
               semantic relations, relation axioms, and meaning postulates. Important
               phenomena occurring in difficult duplicates are discussed. The
               deep approach profits from background knowledge, whose acquisition
               from corpora is explained briefly. The deep duplicate recognizer
               is combined with two shallow duplicate recognizers in order to
               guarantee a high recall for texts which are not fully parsable.
               The evaluation shows that the combined approach preserves recall
               and increases precision considerably in comparison to traditional
               shallow methods.},
  month     = {September},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-paper.pdf},
  website   = {http://link.springer.com/chapter/10.1007/978-3-642-15760-8_12},
  year      = {2010}
}

Tim vor der Brück and Hermann Helbig. 2010. Retrieving Meronyms from Texts Using An Automated Theorem Prover. Journal for Language Technology and Computational Linguistics (JLCL), 25(1):57–81.

BibTeX

@article{vor:der:Brueck:Helbig:2010:b,
  author    = {vor der Brück, Tim and Helbig, Hermann},
  title     = {Retrieving Meronyms from Texts Using An Automated Theorem Prover},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {25},
  number    = {1},
  pages     = {57--81},
  abstract  = {In this paper we present a truly semantic-oriented approach for
               meronymy relation extraction. It directly operates, instead of
               syntactic trees or surface representations, on semantic networks
               (SNs). These SNs are derived from texts (in our case, the German
               Wikip edia) by a deep linguistic syntactico-semantic analysis.
               The extraction of meronym/holonym pairs is carried out by using,
               among other components, an automated theorem prover, whose work
               is based on a set of logical axioms. The corresponding algorithm
               is combined with a shallow approach enriched with semantic information.
               Through the employment of logical methods, the recall and precision
               of the semantic patterns pertinent to the extracted relations
               can be increased considerably.},
  pdf       = {http://www.jlcl.org/2010_Heft1/tim_vorderbrueck.pdf},
  year      = {2010}
}

Andy Lücking and Kirsten Bergmann. July, 2010. Introducing the Bielefeld SaGA Corpus.

BibTeX

@misc{Luecking:Bergmann:2010,
  author    = {Andy L\"{u}cking and Kirsten Bergmann},
  title     = {Introducing the {B}ielefeld {SaGA} Corpus},
  howpublished = {Talk given at \textit{Gesture: Evolution, Brain, and
                   Linguistic Structures.} 4th Conference of the
                   International Society for Gesture Studies (ISGS).
                   Europa Universit\"{a}t Viadrina Frankfurt/Oder},
  abstract  = {People communicate multimodally. Most prominently, they co-produce
               speech and gesture. How do they do that? Studying the interplay
               of both modalities has to be informed by empirically observed
               communication behavior. We present a corpus built of speech and
               gesture data gained in a controlled study. We describe 1) the
               setting underlying the data; 2) annotation of the data; 3) reliability
               evalution methods and results; and 4) applications of the corpus
               in the research domain of speech and gesture alignment.},
  address   = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
  day       = {28},
  month     = {07},
  year      = {2010}
}

Andy Lücking. July, 2010. A Semantic Account for Iconic Gestures. Gesture: Evolution, Brain, and Linguistic Structures, 210.

BibTeX

@inproceedings{Luecking:2010,
  author    = {Lücking, Andy},
  title     = {A Semantic Account for Iconic Gestures},
  booktitle = {Gesture: Evolution, Brain, and Linguistic Structures},
  pages     = {210},
  address   = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
  organization = {4th Conference of the International Society for
                   Gesture Studies (ISGS)},
  keywords  = {own},
  month     = {7},
  pdf       = {https://pub.uni-bielefeld.de/download/2318565/2319962},
  website   = {http://pub.uni-bielefeld.de/publication/2318565},
  year      = {2010}
}

Andy Lücking, Kirsten Bergmann, Florian Hahn, Stefan Kopp and Hannes Rieser. May, 2010. The Bielefeld Speech and Gesture Alignment Corpus (SaGA). Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality, 92–98.

BibTeX

@inproceedings{Luecking:et:al:2010,
  author    = {Lücking, Andy and Bergmann, Kirsten and Hahn, Florian and Kopp, Stefan
               and Rieser, Hannes},
  title     = {The Bielefeld Speech and Gesture Alignment Corpus (SaGA)},
  booktitle = {Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality},
  pages     = {92--98},
  address   = {Malta},
  organization = {7th International Conference for Language Resources
                   and Evaluation (LREC 2010)},
  abstract  = {People communicate multimodally. Most prominently, they co-produce
               speech and gesture. How do they do that? Studying the interplay
               of both modalities has to be informed by empirically observed
               communication behavior. We present a corpus built of speech and
               gesture data gained in a controlled study. We describe 1) the
               setting underlying the data; 2) annotation of the data; 3) reliability
               evalution methods and results; and 4) applications of the corpus
               in the research domain of speech and gesture alignment.},
  keywords  = {own},
  month     = {5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/saga-corpus.pdf},
  website   = {http://pub.uni-bielefeld.de/publication/2001935},
  year      = {2010}
}

Md. Zahurul Islam, Jörg Tiedemann and Andreas Eisele. 2010. English to Bangla Phrase – Based Machine Translation. The 14th Annual Conference of The European Association for Machine Translation. Saint-Raphaël, France, 27-28 May.

BibTeX

@inproceedings{Zahurul:Tiedemann:Eisele:2010,
  author    = {Islam, Md. Zahurul and Tiedemann, Jörg and Eisele, Andreas},
  title     = {English to Bangla Phrase – Based Machine Translation},
  booktitle = {The 14th Annual Conference of The European Association for Machine
               Translation. Saint-Raphaël, France, 27-28 May},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/English_to_Bangla_Phrase–Based_Machine_Translation.pdf},
  timestamp = {2011.08.02},
  year      = {2010}
}

Ulli Waltinger. May, 2010. GermanPolarityClues: A Lexical Resource for German Sentiment Analysis. Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC '10).

BibTeX

@inproceedings{Waltinger:2010:a,
  author    = {Waltinger, Ulli},
  title     = {GermanPolarityClues: A Lexical Resource for German Sentiment Analysis},
  booktitle = {Proceedings of the Seventh conference on International Language
               Resources and Evaluation (LREC '10)},
  editor    = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Bente Maegaard
               and Joseph Mariani and Jan Odjik, Stelios Piperidis and Mike Rosner
               and Daniel Tapias},
  address   = {Valletta, Malta},
  publisher = {European Language Resources Association (ELRA)},
  date_0    = {2010-05},
  isbn      = {2-9517408-6-7},
  language  = {english},
  month     = {may},
  pdf       = {http://www.ulliwaltinger.de/pdf/91_Paper.pdf},
  website   = {http://www.ulliwaltinger.de/sentiment/},
  year      = {2010}
}

Alexander Mehler, Petra Weiß, Peter Menke and Andy Lücking. 2010. Towards a Simulation Model of Dialogical Alignment. Proceedings of the 8th International Conference on the Evolution of Language (Evolang8), 14-17 April 2010, Utrecht, 238–245.

BibTeX

@inproceedings{Mehler:Weiss:Menke:Luecking:2010,
  author    = {Mehler, Alexander and Wei{\ss}, Petra and Menke, Peter and Lücking, Andy},
  title     = {Towards a Simulation Model of Dialogical Alignment},
  booktitle = {Proceedings of the 8th International Conference on the Evolution
               of Language (Evolang8), 14-17 April 2010, Utrecht},
  pages     = {238-245},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Alexander_Mehler_Petra_Weiss_Peter_Menke_Andy_Luecking.pdf},
  website   = {http://www.let.uu.nl/evolang2010.nl/},
  year      = {2010}
}

Fiorella Foscarini, Yunhyong Kim, Christopher A. Lee, Alexander Mehler, Gillian Oliver and Seamus Ross. 2010. On the Notion of Genre in Digital Preservation. Automation in Digital Preservation.

BibTeX

@inproceedings{Foscarini:Kim:Lee:Mehler:Oliver:Ross:2010,
  author    = {Foscarini, Fiorella and Kim, Yunhyong and Lee, Christopher A.
               and Mehler, Alexander and Oliver, Gillian and Ross, Seamus},
  title     = {On the Notion of Genre in Digital Preservation},
  booktitle = {Automation in Digital Preservation},
  editor    = {Chanod, Jean-Pierre and Dobreva, Milena and Rauber, Andreas and Ross, Seamus},
  number    = {10291},
  series    = {Dagstuhl Seminar Proceedings},
  address   = {Dagstuhl, Germany},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik,
                   Germany},
  annote    = {Keywords: Digital preservation, genre analysis,
                   context modeling, diplomatics, information retrieval},
  issn      = {1862-4405},
  pdf       = {http://drops.dagstuhl.de/opus/volltexte/2010/2763/pdf/10291.MehlerAlexander.Paper.2763.pdf},
  website   = {http://drops.dagstuhl.de/opus/volltexte/2010/2763},
  year      = {2010}
}

Alexander Mehler, Rüdiger Gleim, Ulli Waltinger and Nils Diewald. 2010. Time Series of Linguistic Networks by Example of the Patrologia Latina. Proceedings of INFORMATIK 2010: Service Science, September 27 - October 01, 2010, Leipzig, 2:609–616.

BibTeX

@inproceedings{Mehler:Gleim:Waltinger:Diewald:2010,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Diewald, Nils},
  title     = {Time Series of Linguistic Networks by Example of the Patrologia Latina},
  booktitle = {Proceedings of INFORMATIK 2010: Service Science, September 27
               - October 01, 2010, Leipzig},
  editor    = {F{\"a}hnrich, Klaus-Peter and Franczyk, Bogdan},
  volume    = {2},
  series    = {Lecture Notes in Informatics},
  pages     = {609-616},
  publisher = {GI},
  pdf       = {http://subs.emis.de/LNI/Proceedings/Proceedings176/586.pdf},
  year      = {2010}
}

Rüdiger Gleim, Paul Warner and Alexander Mehler. 2010. eHumanities Desktop - An Architecture for Flexible Annotation in Iconographic Research. Proceedings of the 6th International Conference on Web Information Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia.

BibTeX

@inproceedings{Gleim:Warner:Mehler:2010,
  author    = {Gleim, Rüdiger and Warner, Paul and Mehler, Alexander},
  title     = {eHumanities Desktop - An Architecture for Flexible Annotation
               in Iconographic Research},
  booktitle = {Proceedings of the 6th International Conference on Web Information
               Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_warner_mehler_2010.pdf},
  website   = {https://www.researchgate.net/publication/220724277_eHumanities_Desktop_-_An_Architecture_for_Flexible_Annotation_in_Iconographic_Research},
  year      = {2010}
}

Peter Menke and Alexander Mehler. 2010. The Ariadne System: A flexible and extensible framework for the modeling and storage of experimental data in the humanities. Proceedings of LREC 2010.

BibTeX

@inproceedings{Menke:Mehler:2010,
  author    = {Menke, Peter and Mehler, Alexander},
  title     = {The Ariadne System: A flexible and extensible framework for the
               modeling and storage of experimental data in the humanities},
  booktitle = {Proceedings of LREC 2010},
  address   = {Malta},
  publisher = {ELDA},
  abstract  = {This paper introduces the Ariadne Corpus Management System. First,
               the underlying data model is presented which enables users to
               represent and process heterogeneous data sets within a single,
               consistent framework. Secondly, a set of automatized procedures
               is described that offers assistance to researchers in various
               data-related use cases. Finally, an approach to easy yet powerful
               data retrieval is introduced in form of a specialised querying
               language for multimodal data.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/menke_mehler_2010.pdf},
  website   = {http://arnetminer.org/publication/the-ariadne-system-a-flexible-and-extensible-framework-for-the-modeling-and-storage-of-experimental-data-in-the-humanities-2839925.html},
  year      = {2010}
}

Tilmann Sutter and Alexander Mehler. 2010. Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplinären Medienwissenschaft. In: Medienwandel als Wandel von Interaktionsformen, 7–16. Ed. by Tilmann Sutter and Alexander Mehler. VS Verlag für Sozialwissenschaften.

BibTeX

@inbook{Sutter2010,
  author    = {Sutter, Tilmann and Mehler, Alexander},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  title     = {Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplin{\"a}ren
               Medienwissenschaft},
  pages     = {7--16},
  publisher = {VS Verlag f{\"u}r Sozialwissenschaften},
  address   = {Wiesbaden},
  abstract  = {Die Herausforderung, die der Wandel von Kommunikationsmedien f{\"u}r
               die Medienwissenschaft darstellt, resultiert nicht nur aus der
               ungeheuren Beschleunigung des Medienwandels. Die Herausforderung
               stellt sich auch mit der Frage, welches die neuen Formen und Strukturen
               sind, die aus dem Wandel der Medien hervorgehen. R{\"u}ckt man
               diese Frage in den Fokus der {\"U}berlegungen, kommen erstens
               Entwicklungen im Wechsel von Massenmedien zu neuen, „interaktiven``
               Medien in den Blick. Dies betrifft den Wandel von den alten Medien
               in Form von Einwegkommunikation zu den neuen Medien in Form von
               Netzkommunikation. Dieser Wandel wurde in zahlreichen Analysen
               als eine Revolution beschrieben: Im Unterschied zur einseitigen,
               r{\"u}ckkopplungsarmen Kommunikationsform der Massenmedien sollen
               neue, computergest{\"u}tzte Formen der Medienkommunikation „interaktiv``
               sein, d.h. gesteigerte R{\"u}ckkopplungs- und Eingriffsm{\"o}glichkeiten
               f{\"u}r die Adressaten und Nutzer bieten. Sozialwissenschaftlich
               bedeutsam ist dabei die Einsch{\"a}tzung der Qualit{\"a}t und
               des Umfangs dieser neuen M{\"o}glichkeiten und Leistungen. Denn
               bislang bedeutete Medienwandel im Kern eine zunehmende Ausdifferenzierung
               alter und neuer Medien mit je spezifischen Leistungen, d.h. neue
               Medien ersetzen die {\"a}lteren nicht, sondern sie erg{\"a}nzen
               und erweitern sie. Allerdings wird im Zuge des aktuellen Medienwandels
               immer deutlicher, dass die neuen Medien durchaus imstande sind,
               die Leistungen massenmedialer Verbreitung von Kommunikation zu
               {\"u}bernehmen. Stehen wir also, wie das schon seit l{\"a}ngerem
               k{\"u}hn vorhergesagt wird, vor der Etablierung eines Universalmediums,
               das in der Lage ist, die Formen und Funktionen anderer Medien
               zu {\"u}bernehmen?},
  booktitle = {Medienwandel als Wandel von Interaktionsformen},
  doi       = {10.1007/978-3-531-92292-8_1},
  isbn      = {978-3-531-92292-8},
  url       = {https://doi.org/10.1007/978-3-531-92292-8_1},
  year      = {2010}
}

Steffen Eger and Ineta Sejane. 2010. Computing Semantic Similarity from Bilingual Dictionaries. Proceedings of the 10th International Conference on the Statistical Analysis of Textual Data (JADT-2010), 1217–1225.

BibTeX

@inproceedings{Eger:Sejane:2010,
  author    = {Eger, Steffen and Sejane, Ineta},
  title     = {Computing Semantic Similarity from Bilingual Dictionaries},
  booktitle = {Proceedings of the 10th International Conference on the Statistical
               Analysis of Textual Data (JADT-2010)},
  pages     = {1217-1225},
  address   = {Rome, Italy},
  publisher = {JADT-2010},
  pdf       = {http://www.ledonline.it/ledonline/JADT-2010/allegati/JADT-2010-1217-1226_167-Eger.pdf},
  year      = {2010}
}

Tim vor der Brück and Hermann Helbig. 2010. Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels. Proceedings of the Ninth International Conference on Machine Learning and Applications (ICMLA), 243–250.

BibTeX

@inproceedings{vor:der:Brueck:Helbig:2010:a,
  author    = {vor der Brück, Tim and Helbig, Hermann},
  title     = {Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels},
  booktitle = {Proceedings of the Ninth International Conference on Machine Learning
               and Applications (ICMLA)},
  pages     = {243--250},
  address   = {Washington, D.C.},
  publisher = {IEEE Press},
  abstract  = {There is a substantial body of work on the extraction of relations
               from texts, most of which is based on pattern matching or on applying
               tree kernel functions to syntactic structures. Whereas pattern
               application is usually more efficient, tree kernels can be superior
               when assessed by the F-measure. In this paper, we introduce a
               hybrid approach to extracting meronymy relations, which is based
               on both patterns and kernel functions. In a first step, meronymy
               relation hypotheses are extracted from a text corpus by applying
               patterns. In a second step these relation hypotheses are validated
               by using several shallow features and a graph kernel approach.
               In contrast to other meronymy extraction and validation methods
               which are based on surface or syntactic representations we use
               a purely semantic approach based on semantic networks. This involves
               analyzing each sentence of the Wikipedia corpus by a deep syntactico-semantic
               parser and converting it into a semantic network. Meronymy relation
               hypotheses are extracted from the semantic networks by means of
               an automated theorem prover, which employs a set of logical axioms
               and patterns in the form of semantic networks. The meronymy candidates
               are then validated by means of a graph kernel approach based on
               common walks. The evaluation shows that this method achieves considerably
               higher accuracy, recall, and F-measure than a method using purely
               shallow validation.},
  website   = {http://www.computer.org/csdl/proceedings/icmla/2010/4300/00/4300a243-abs.html},
  year      = {2010}
}

2009

Marina Santini, Alexander Mehler and Serge Sharoff. 2009. Riding the Rough Waves of Genre on the Web: Concepts and Research Questions. Genres on the Web: Computational Models and Empirical Studies, 3–32.

BibTeX

@incollection{Santini:Mehler:Sharoff:2009,
  author    = {Santini, Marina and Mehler, Alexander and Sharoff, Serge},
  title     = {Riding the Rough Waves of Genre on the Web: Concepts and Research Questions},
  booktitle = {Genres on the Web: Computational Models and Empirical Studies},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  pages     = {3-32},
  address   = {Berlin/New York},
  abstract  = {This chapter outlines the state of the art of empirical and computational
               webgenre research. First, it highlights why the concept of genre
               is profitable for a range of disciplines. At the same time, it
               lists a number of recent interpretations that can inform and influence
               present and future genre research. Last but not least, it breaks
               down a series of open issues that relate to the modelling of the
               concept of webgenre in empirical and computational studies.},
  year      = {2009}
}

Alexander Mehler, Rüdiger Gleim, Ulli Waltinger, Alexandra Ernst, Dietmar Esch and Tobias Feith. 2009. eHumanities Desktop – eine webbasierte Arbeitsumgebung für die geisteswissenschaftliche Fachinformatik. Proceedings of the Symposium "Sprachtechnologie und eHumanities", 26.–27. Februar, Duisburg-Essen University.

BibTeX

@inproceedings{Mehler:Gleim:Waltinger:Ernst:Esch:Feith:2009,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra
               and Esch, Dietmar and Feith, Tobias},
  title     = {eHumanities Desktop – eine webbasierte Arbeitsumgebung für die
               geisteswissenschaftliche Fachinformatik},
  booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
               26.–27. Februar, Duisburg-Essen University},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_gleim_waltinger_ernst_esch_feith_2009.pdf},
  website   = {http://duepublico.uni-duisburg-essen.de/servlets/DocumentServlet?id=37041},
  year      = {2009}
}

Benno Wagner, Alexander Mehler, Christian Wolff and Bernhard Dotzler. 2009. Bausteine eines Literary Memory Information System (LiMeS) am Beispiel der Kafka-Forschung. Proceedings of the Symposium "Sprachtechnologie und eHumanities", 26.–27. Februar, Duisburg-Essen University.

BibTeX

@inproceedings{Wagner:Mehler:Wolff:Dotzler:2009,
  author    = {Wagner, Benno and Mehler, Alexander and Wolff, Christian and Dotzler, Bernhard},
  title     = {Bausteine eines Literary Memory Information System (LiMeS) am
               Beispiel der Kafka-Forschung},
  booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
               26.–27. Februar, Duisburg-Essen University},
  abstract  = {In dem Paper beschreiben wir Bausteine eines Literary Memory Information
               System (LiMeS), das die literaturwissenschaftliche Erforschung
               von so genannten Matrixtexten – das sind Prim{\"a}rtexte eines
               bestimmten literarischen Gesamtwerks – unter dem Blickwinkel gro{\ss}er
               Mengen so genannter Echotexte (Topia 1984; Wagner/Reinhard 2007)
               – das sind Subtexte im Sinne eines literaturwissenschaftlichen
               Intertextualit{\"a}tsbegriffs – ermöglicht. Den Ausgangspunkt
               dieses computerphilologischen Informationssystems bildet ein Text-Mining-Modell
               basierend auf dem Intertextualit{\"a}tsbegriff in Verbindung mit
               dem Begriff des Semantic Web (Mehler, 2004b, 2005a, b, Wolff 2005).
               Wir zeigen, inwiefern dieses Modell über bestehende Informationssystemarchitekturen
               hinausgeht und schlie{\ss}en einen Brückenschlag zur derzeitigen
               Entwicklung von Arbeitsumgebungen in der geisteswissenschaftlichen
               Fachinformatik in Form eines eHumanities Desktop.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wagner_mehler_wolff_dotzler_2009.pdf},
  website   = {http://epub.uni-regensburg.de/6795/},
  year      = {2009}
}

Ulli Waltinger, Alexander Mehler and Armin Wegner. 2009. A Two-Level Approach to Web Genre Classification. Proceedings of the 5th International Conference on Web Information Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa.

BibTeX

@inproceedings{Waltinger:Mehler:Wegner:2009,
  author    = {Waltinger, Ulli and Mehler, Alexander and Wegner, Armin},
  title     = {A Two-Level Approach to Web Genre Classification},
  booktitle = {Proceedings of the 5th International Conference on Web Information
               Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa},
  abstract  = {This paper presents an approach of two-level categorization of
               web pages. In contrast to related approaches the model additionally
               explores and categorizes functionally and thematically demarcated
               segments of the hypertext types to be categorized. By classifying
               these segments conclusions can be drawn about the type of the
               corresponding compound web document.},
  pdf       = {http://www.ulliwaltinger.de/pdf/Webist_2009_TwoLevel_Genre_Classification_WaltingerMehlerWegner.pdf},
  year      = {2009}
}

Alexander Mehler. 2009. Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types. Linguistic Modeling of Information and Markup Languages. Contributions to Language Technology.

BibTeX

@incollection{Mehler:2009:b,
  author    = {Mehler, Alexander},
  title     = {Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types},
  booktitle = {Linguistic Modeling of Information and Markup Languages. Contributions
               to Language Technology},
  publisher = {Springer},
  editor    = {Witt, Andreas and Metzing, Dieter},
  series    = {Text, Speech and Language Technology},
  address   = {Dordrecht},
  abstract  = {In this chapter we develop a representation model of web document
               networks. Based on the notion of uncertain web document structures,
               the model is defined as a template which grasps nested manifestation
               levels of hypertext types. Further, we specify the model on the
               conceptual, formal and physical level and exemplify it by reconstructing
               competing web document models.},
  website   = {http://www.springerlink.com/content/t27782w8j2125112/},
  year      = {2009}
}

Rüdiger Gleim, Alexander Mehler, Ulli Waltinger and Peter Menke. 2009. eHumanities Desktop – An extensible Online System for Corpus Management and Analysis. 5th Corpus Linguistics Conference, University of Liverpool.

BibTeX

@inproceedings{Gleim:Mehler:Waltinger:Menke:2009,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Waltinger, Ulli and Menke, Peter},
  title     = {eHumanities Desktop – An extensible Online System for Corpus Management
               and Analysis},
  booktitle = {5th Corpus Linguistics Conference, University of Liverpool},
  abstract  = {This paper presents the eHumanities Desktop - an online system
               for corpus management and analysis in support of computing in
               the humanities. Design issues and the overall architecture are
               described, as well as an outline of the applications offered by
               the system.},
  pdf       = {http://www.ulliwaltinger.de/pdf/eHumanitiesDesktop-AnExtensibleOnlineSystem-CL2009.pdf},
  website   = {http://www.ulliwaltinger.de/ehumanities-desktop-an-extensible-online-system-for-corpus-management-and-analysis/},
  year      = {2009}
}

Alexander Mehler and Andy Lücking. 2009. A Structural Model of Semiotic Alignment: The Classification of Multimodal Ensembles as a Novel Machine Learning Task. Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya.

BibTeX

@inproceedings{Mehler:Luecking:2009,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {A Structural Model of Semiotic Alignment: The Classification of
               Multimodal Ensembles as a Novel Machine Learning Task},
  booktitle = {Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  publisher = {IEEE},
  abstract  = {In addition to the well-known linguistic alignment processes in
               dyadic communication – e.g., phonetic, syntactic, semantic alignment
               – we provide evidence for a genuine multimodal alignment process,
               namely semiotic alignment. Communicative elements from different
               modalities 'routinize into' cross-modal 'super-signs', which we
               call multimodal ensembles. Computational models of human communication
               are in need of expressive models of multimodal ensembles. In this
               paper, we exemplify semiotic alignment by means of empirical examples
               of the building of multimodal ensembles. We then propose a graph
               model of multimodal dialogue that is expressive enough to capture
               multimodal ensembles. In line with this model, we define a novel
               task in machine learning with the aim of training classifiers
               that can detect semiotic alignment in dialogue. This model is
               in support of approaches which need to gain insights into realistic
               human-machine communication.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_luecking_2009.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?reload=true&arnumber=5308098},
  year      = {2009}
}

Alexander Mehler. 2009. Generalized Shortest Paths Trees: A Novel Graph Class Applied to Semiotic Networks. Analysis of Complex Networks: From Biology to Linguistics, 175–220.

BibTeX

@incollection{Mehler:2009:c,
  author    = {Mehler, Alexander},
  title     = {Generalized Shortest Paths Trees: A Novel Graph Class Applied
               to Semiotic Networks},
  booktitle = {Analysis of Complex Networks: From Biology to Linguistics},
  publisher = {Wiley-VCH},
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank},
  pages     = {175-220},
  address   = {Weinheim},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2009_b.pdf},
  website   = {https://www.researchgate.net/publication/255666602_1_Generalised_Shortest_Paths_Trees_A_Novel_Graph_Class_Applied_to_Semiotic_Networks},
  year      = {2009}
}

Tim vor der Brück and Sven Hartrumpf. 2009. A Readability Checker Based on Deep Semantic Indicators. Human Language Technology. Challenges of the Information Society, 5603:232–244.

BibTeX

@incollection{vor:der:Brueck:Hartrumpf:2009,
  author    = {vor der Brück, Tim and Hartrumpf, Sven},
  title     = {A Readability Checker Based on Deep Semantic Indicators},
  booktitle = {Human Language Technology. Challenges of the Information Society},
  publisher = {Springer},
  editor    = {Zygmunt Vetulani and Hans Uszkoreit},
  volume    = {5603},
  series    = {Lecture Notes in Computer Science (LNCS)},
  pages     = {232--244},
  address   = {Berlin, Germany},
  abstract  = {One major reason that readability checkers are still far away
               from judging the understandability of texts consists in the fact
               that no semantic information is used. Syntactic, lexical, or morphological
               information can only give limited access for estimating the cognitive
               difficulties for a human being to comprehend a text. In this paper
               however, we present a readability checker which uses semantic
               information in addition. This information is represented as semantic
               networks and is derived by a deep syntactico-semantic analysis.
               We investigate in which situations a semantic readability indicator
               can lead to superior results in comparison with ordinary surface
               indicators like sentence length. Finally, we compute the weights
               of our semantic indicators in the readability function based on
               the user ratings collected in an online evaluation.},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-642-04235-5_20},
  year      = {2009}
}

Tim vor der Brück. 2009. Hypernymy Extraction Based on Shallow and Deep Patterns. From Form To Meaning: Processing Texts Automatically, Proceedings of the Biennial GSCL Conference 2009, 41–52.

BibTeX

@inproceedings{vor:der:Brueck:2009:b,
  author    = {vor der Brück, Tim},
  title     = {Hypernymy Extraction Based on Shallow and Deep Patterns},
  booktitle = {From Form To Meaning: Processing Texts Automatically, Proceedings
               of the Biennial GSCL Conference 2009},
  editor    = {Christian Chiarcos and Richard Eckart de Castilho},
  pages     = {41--52},
  address   = {Potsdam, Germany},
  abstract  = {There exist various approaches to construct taxonomies by text
               mining. Usually these approaches are based on supervised learning
               and extract in a first step several patterns. These patterns are
               then applied to previously unseen texts and used to recognize
               hypernym/hyponym pairs. Normally these approaches are only based
               on a surface representation or a syntactic tree structure, i.e.,
               a constituency or dependency tree derived by a syntactical parser.
               In this work we present an approach which, additionally to shallow
               patterns, directly operates on semantic networks which are derived
               by a deep linguistic syntacticosemantic analysis. Furthermore,
               the shallow approach heavily depends on semantic information,
               too. It is shown that recall and precision can be improved considerably
               than by relying on shallow patterns alone.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gscl09_12_brueck.pdf},
  year      = {2009}
}

Gosse Bouma, Sergio Duarte and Md. Zahurul Islam. 2009. Cross-lingual Alignment and Completion of Wikipedia Templates. Third International Workshop on Cross Lingual Information Access: Addressing the Information Need of Multilingual Societies (CLIAWS3), Boulder, Colorado, USA, June 4.

BibTeX

@inproceedings{Bouma:Duarte:Zahurul:2009,
  author    = {Bouma, Gosse and Duarte, Sergio and Islam, Md. Zahurul},
  title     = {Cross-lingual Alignment and Completion of Wikipedia Templates},
  booktitle = {Third International Workshop on Cross Lingual Information Access:
               Addressing the Information Need of Multilingual Societies (CLIAWS3),
               Boulder, Colorado, USA, June 4},
  abstract  = {For many languages, the size of Wikipedia is an order of magnitude
               smaller than the English Wikipedia. We present a method for cross-lingual
               alignment of template and infobox attributes in Wikipedia. The
               alignment is used to add and complete templates and infoboxes
               in one language with information derived from Wikipedia in another
               language. We show that alignment between English and Dutch Wikipedia
               is accurate and that the result can be used to expand the number
               of template attribute-value pairs in Dutch Wikipedia by 50\%.
               Furthermore, the alignment provides valuable information for normalization
               of template and attribute names and can be used to detect potential
               inconsistencies},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Cross-lingual_Alignment_and_Completion_of_Wikipedia_Templates.pdf},
  timestamp = {2011.08.02},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.148.1418},
  year      = {2009}
}

Ulli Waltinger. 2009. Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.

BibTeX

@inproceedings{Waltinger:2009:a,
  author    = {Waltinger, Ulli},
  title     = {Polarity Reinforcement: Sentiment Polarity Identification By Means
               Of Social Semantics},
  booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  date_0    = {2009},
  pdf       = {http://www.ulliwaltinger.de/pdf/AfriconIEEE_2009_SentimentPolarity_Waltinger.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5308104},
  year      = {2009}
}

Ulli Waltinger, Irene Cramer and Tonio Wandmacher. 2009. From Social Networks To Distributional Properties: A Comparative Study On Computing Semantic Relatedness. Proceedings of the 31th Annual Conference of the Cognitive Science Society, 3016–3021.

BibTeX

@inproceedings{Waltinger:Cramer:Wandmacher:2009:a,
  author    = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
  title     = {From Social Networks To Distributional Properties: A Comparative
               Study On Computing Semantic Relatedness},
  booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
  editor    = {Taatgen, N.A. and van Rijn, H.},
  pages     = {3016-3021},
  address   = {Austin, TX},
  publisher = {Cognitive Science Society},
  date_0    = {2009},
  pdf       = {http://csjarchive.cogsci.rpi.edu/proceedings/2009/papers/661/paper661.pdf},
  year      = {2009}
}

Ulli Waltinger. 2009. Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.

BibTeX

@inproceedings{Waltinger:2009:b,
  author    = {Waltinger, Ulli},
  title     = {Polarity Reinforcement: Sentiment Polarity Identification By Means
               Of Social Semantics},
  booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  date_0    = {2009},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Cramer:Wandmacher:2009:b,
  author    = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
  title     = {From Social Networks To Distributional Properties: A Comparative
               Study On Computing Semantic Relatedness},
  booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
  editor    = {N.A. Taatgen and H. van Rijn},
  pages     = {3016-3021},
  address   = {Austin, TX},
  publisher = {Cognitive Science Society},
  date_0    = {2009},
  year      = {2009}
}

Alexander Mehler and Ulli Waltinger. 2009. Enhancing Document Modeling by Means of Open Topic Models: Crossing the Frontier of Classification Schemes in Digital Libraries by Example of the DDC. Library Hi Tech, 27(4):520–539.

BibTeX

@article{Mehler:Waltinger:2009:b,
  author    = {Mehler, Alexander and Waltinger, Ulli},
  title     = {Enhancing Document Modeling by Means of Open Topic Models: Crossing
               the Frontier of Classification Schemes in Digital Libraries by
               Example of the DDC},
  journal   = {Library Hi Tech},
  volume    = {27},
  number    = {4},
  pages     = {520-539},
  abstract  = {Purpose: We present a topic classification model using the Dewey
               Decimal Classification (DDC) as the target scheme. This is done
               by exploring metadata as provided by the Open Archives Initiative
               (OAI) to derive document snippets as minimal document representations.
               The reason is to reduce the effort of document processing in digital
               libraries. Further, we perform feature selection and extension
               by means of social ontologies and related web-based lexical resources.
               This is done to provide reliable topic-related classifications
               while circumventing the problem of data sparseness. Finally, we
               evaluate our model by means of two language-specific corpora.
               This paper bridges digital libraries on the one hand and computational
               linguistics on the other. The aim is to make accessible computational
               linguistic methods to provide thematic classifications in digital
               libraries based on closed topic models as the DDC. Design/methodology/approach:
               text classification, text-technology, computational linguistics,
               computational semantics, social semantics. Findings: We show that
               SVM-based classifiers perform best by exploring certain selections
               of OAI document metadata. Research limitations/implications: The
               findings show that it is necessary to further develop SVM-based
               DDC-classifiers by using larger training sets possibly for more
               than two languages in order to get better F-measure values. Practical
               implications: We can show that DDC-classifications come into reach
               which primarily explore OAI metadata. Originality/value: We provide
               algorithmic and formal-mathematical information how to build DDC-classifiers
               for digital libraries.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_waltinger_2009_b.pdf},
  website   = {http://biecoll.ub.uni-bielefeld.de/frontdoor.php?source_opus=5001&la=de},
  year      = {2009}
}

Rüdiger Gleim, Ulli Waltinger, Alexandra Ernst, Alexander Mehler, Dietmar Esch and Tobias Feith. 2009. The eHumanities Desktop – An Online System for Corpus Management and Analysis in Support of Computing in the Humanities. Proceedings of the Demonstrations Session of the 12th Conference of the European Chapter of the Association for Computational Linguistics EACL 2009, 30 March – 3 April, Athens.

BibTeX

@inproceedings{Gleim:Waltinger:Ernst:Mehler:Esch:Feith:2009,
  author    = {Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra and Mehler, Alexander
               and Esch, Dietmar and Feith, Tobias},
  title     = {The eHumanities Desktop – An Online System for Corpus Management
               and Analysis in Support of Computing in the Humanities},
  booktitle = {Proceedings of the Demonstrations Session of the 12th Conference
               of the European Chapter of the Association for Computational Linguistics
               EACL 2009, 30 March – 3 April, Athens},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_waltinger_ernst_mehler_esch_feith_2009.pdf},
  year      = {2009}
}

Alexander Mehler. 2009. Artifizielle Interaktivität. Eine semiotische Betrachtung. Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen zum Web 2.0.

BibTeX

@incollection{Mehler:2009:d,
  author    = {Mehler, Alexander},
  title     = {Artifizielle Interaktivit{\"a}t. Eine semiotische Betrachtung},
  booktitle = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
               zum Web 2.0},
  publisher = {VS},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  address   = {Wiesbaden},
  year      = {2009}
}

Ulli Waltinger and Alexander Mehler. 2009. The Feature Difference Coefficient: Classification by Means of Feature Distributions. Proceedings of the Conference on Text Mining Services (TMS 2009), 159–168.

BibTeX

@inproceedings{Waltinger:Mehler:2009:a,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {The Feature Difference Coefficient: Classification by Means of
               Feature Distributions},
  booktitle = {Proceedings of the Conference on Text Mining Services (TMS 2009)},
  series    = {Leipziger Beitr{\"a}ge zur Informatik: Band XIV},
  pages     = {159–168},
  address   = {Leipzig},
  publisher = {Leipzig University},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/waltinger_mehler_2009_a.pdf},
  year      = {2009}
}

Marina Santini, Georg Rehm, Serge Sharoff and Alexander Mehler. 2009. Automatic Genre Identification: Issues and Prospects. Ed. by Marina Santini, Georg Rehm, Serge Sharoff and Alexander Mehler.Journal for Language Technology and Computational Linguistics (JLCL), 24(1). GSCL.

BibTeX

@book{Santini:Rehm:Sharoff:Mehler:2009,
  author    = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
  editor    = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
  title     = {Automatic Genre Identification: Issues and Prospects},
  publisher = {GSCL},
  volume    = {24(1)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AutomaticGenreIdentification.png},
  pagetotal = {148},
  pdf       = {http://www.jlcl.org/2009_Heft1/JLCL24(1).pdf},
  year      = {2009}
}

Ulli Waltinger, Alexander Mehler and Rüdiger Gleim. 2009. Social Semantics And Its Evaluation By Means of Closed Topic Models: An SVM-Classification Approach Using Semantic Feature Replacement By Topic Generalization. Proceedings of the Biennial GSCL Conference 2009, September 30 – October 2, Universität Potsdam.

BibTeX

@inproceedings{Waltinger:Mehler:Gleim:2009:a,
  author    = {Waltinger, Ulli and Mehler, Alexander and Gleim, Rüdiger},
  title     = {Social Semantics And Its Evaluation By Means of Closed Topic Models:
               An SVM-Classification Approach Using Semantic Feature Replacement
               By Topic Generalization},
  booktitle = {Proceedings of the Biennial GSCL Conference 2009, September 30
               – October 2, Universit{\"a}t Potsdam},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/GSCL_2009_WaltingerMehlerGleim_camera_ready.pdf},
  year      = {2009}
}

Ulli Waltinger and Alexander Mehler. 2009. Social Semantics and Its Evaluation By Means Of Semantic Relatedness And Open Topic Models. IEEE/WIC/ACM International Conference on Web Intelligence, September 15–18, Milano.

BibTeX

@inproceedings{Waltinger:Mehler:2009:c,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {Social Semantics and Its Evaluation By Means Of Semantic Relatedness
               And Open Topic Models},
  booktitle = {IEEE/WIC/ACM International Conference on Web Intelligence, September
               15–18, Milano},
  abstract  = {This paper presents an approach using social semantics for the
               task of topic labelling by means of Open Topic Models. Our approach
               utilizes a social ontology to create an alignment of documents
               within a social network. Comprised category information is used
               to compute a topic generalization. We propose a feature-frequency-based
               method for measuring semantic relatedness which is needed in order
               to reduce the number of document features for the task of topic
               labelling. This method is evaluated against multiple human judgement
               experiments comprising two languages and three different resources.
               Overall the results show that social ontologies provide a rich
               source of terminological knowledge. The performance of the semantic
               relatedness measure with correlation values of up to .77 are quite
               promising. Results on the topic labelling experiment show, with
               an accuracy of up to .79, that our approach can be a valuable
               method for various NLP applications.},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5284920&abstractAccess=no&userType=inst},
  year      = {2009}
}

Tim vor der Brück. 2009. Approximation of the Parameters of a Readability Formula by Robust Regression. Machine Learning and Data Mining in Pattern recognition: Poster Proceedings of the International Conference on Machine Learning and Data Mining (MLDM), 115–125.

BibTeX

@inproceedings{vor:der:Brueck:2009,
  author    = {vor der Brück, Tim},
  title     = {Approximation of the Parameters of a Readability Formula by Robust Regression},
  booktitle = {Machine Learning and Data Mining in Pattern recognition: Poster
               Proceedings of the International Conference on Machine Learning
               and Data Mining (MLDM)},
  pages     = {115--125},
  address   = {Leipzig, Germany},
  abstract  = {Most readability formulas calculate a global readability score
               by combining several indicator values by a linear combination.
               Typical indicators are Average sentence length, Average number
               of syllables per word, etc. Usually the parameters of the linear
               combination are determined by a linear OLS (ordinary least square
               estimation) minimizing the sum of the squared residuals in comparison
               with human ratings for a given set of texts. The usage of OLS
               leads to several drawbacks. First, the parameters are not constraint
               in any way and are therefore not intuitive and difficult to interpret.
               Second, if the number of parameters become large, the effect of
               overfitting easily occurs. Finally, OLS is quite sensitive to
               outliers. Therefore, an alternative method is presented which
               avoids these drawbacks and is based on robust regression.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mldm_2009_brueck_142.pdf},
  year      = {2009}
}

2008

Maik Stührenberg, Michael Beißwenger, Kai-Uwe Kühnberger, Alexander Mehler, Harald Lüngen, Dieter Metzing and Uwe Mönnich. 2008. Sustainability of Text-Technological Resources. Proceedings of the Post LREC-2008 Workshop: Sustainability of Language Resources and Tools for Natural Language Processing Marrakech, Morocco.

BibTeX

@inproceedings{Stuehrenberg:Beisswenger:Kuehnberger:Mehler:Luengen:Metzing:Moennich:2008,
  author    = {Stührenberg, Maik and Bei{\ss}wenger, Michael and Kühnberger, Kai-Uwe
               and Mehler, Alexander and Lüngen, Harald and Metzing, Dieter and Mönnich, Uwe},
  title     = {Sustainability of Text-Technological Resources},
  booktitle = {Proceedings of the Post LREC-2008 Workshop: Sustainability of
               Language Resources and Tools for Natural Language Processing Marrakech,
               Morocco},
  abstract  = {We consider that there are obvious relationships between research
               on sustainability of language and linguistic resources on the
               one hand and work undertaken in the Research Unit 'Text-Technological
               Modelling of Information' on the other. Currently the main focus
               in sustainability research is concerned with archiving methods
               of textual resources, i.e. methods for sustainability of primary
               and secondary data; these aspects are addressed in our work as
               well. However, we believe that there are additional certain aspects
               of sustainability on which new light is shed on by procedures,
               algorithms and dynamic processes undertaken in our Research Unit},
  pdf       = {http://www.michael-beisswenger.de/pub/lrec-sustainability.pdf},
  year      = {2008}
}

Alexander Mehler, Barbara Job, Philippe Blanchard and Hans-Jürgen Eikmeyer. 2008. Sprachliche Netzwerke. Netzwerkanalyse und Netzwerktheorie, 413–427.

BibTeX

@incollection{Mehler:Job:Blanchard:Eikmeyer:2008,
  author    = {Mehler, Alexander and Job, Barbara and Blanchard, Philippe and Eikmeyer, Hans-Jürgen},
  title     = {Sprachliche Netzwerke},
  booktitle = {Netzwerkanalyse und Netzwerktheorie},
  publisher = {VS},
  editor    = {Stegbauer, Christian},
  pages     = {413-427},
  address   = {Wiesbaden},
  abstract  = {In diesem Kapitel beschreiben wir so genannte sprachliche Netzwerke.
               Dabei handelt es sich um Netzwerke sprachlicher Einheiten, die
               in Zusammenhang mit ihrer Einbettung in das Netzwerk jener Sprachgemeinschaft
               analysiert werden, welche diese Einheiten und deren Vernetzung
               hervorgebracht hat. Wir erörtern ein Dreistufenmodell zur Analyse
               solcher Netzwerke und exemplifizieren dieses Modell anhand mehrerer
               Spezialwikis. Ein Hauptaugenmerk des Kapitels liegt dabei auf
               einem Mehrebenennetzwerkmodell, und zwar in Abkehr von den unipartiten
               Graphmodellen der Theorie komplexer Netzwerke.},
  year      = {2008}
}

Olga Abramov, Alexander Mehler and Rüdiger Gleim. 2008. A Unified Database of Dependency Treebanks. Integrating, Quantifying and Evaluating Dependency Data. Proceedings of the 6th Language Resources and Evaluation Conference (LREC 2008), Marrakech (Morocco).

BibTeX

@inproceedings{Pustylnikov:Mehler:Gleim:2008,
  author    = {Abramov, Olga and Mehler, Alexander and Gleim, Rüdiger},
  title     = {A Unified Database of Dependency Treebanks. Integrating, Quantifying
               and Evaluating Dependency Data},
  booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
               (LREC 2008), Marrakech (Morocco)},
  abstract  = {This paper describes a database of 11 dependency treebanks which
               were unified by means of a two-dimensional graph format. The format
               was evaluated with respect to storage-complexity on the one hand,
               and efficiency of data access on the other hand. An example of
               how the treebanks can be integrated within a unique interface
               is given by means of the DTDB interface.},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/LREC08_full.pdf},
  year      = {2008}
}

Alexander Mehler. 2008. Structural Similarities of Complex Networks: A Computational Model by Example of Wiki Graphs. Applied Artificial Intelligence, 22(7&8):619–683.

BibTeX

@article{Mehler:2008:a,
  author    = {Mehler, Alexander},
  title     = {Structural Similarities of Complex Networks: A Computational Model
               by Example of Wiki Graphs},
  journal   = {Applied Artificial Intelligence},
  volume    = {22},
  number    = {7\&8},
  pages     = {619–683},
  abstract  = {This article elaborates a framework for representing and classifying
               large complex networks by example of wiki graphs. By means of
               this framework we reliably measure the similarity of document,
               agent, and word networks by solely regarding their topology. In
               doing so, the article departs from classical approaches to complex
               network theory which focuses on topological characteristics in
               order to check their small world property. This does not only
               include characteristics that have been studied in complex network
               theory, but also some of those which were invented in social network
               analysis and hypertext theory. We show that network classifications
               come into reach which go beyond the hypertext structures traditionally
               analyzed in web mining. The reason is that we focus on networks
               as a whole as units to be classified—above the level of websites
               and their constitutive pages. As a consequence, we bridge classical
               approaches to text and web mining on the one hand and complex
               network theory on the other hand. Last but not least, this approach
               also provides a framework for quantifying the linguistic notion
               of intertextuality.},
  doi       = {10.1080/08839510802164085},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/mehler_2008_Structural_Similarities_of_Complex_Networks.pdf},
  website   = {https://www.researchgate.net/publication/200772675_Structural_similarities_of_complex_networks_A_computational_model_by_example_of_wiki_graphs},
  year      = {2008}
}

Alexander Mehler. 2008. Lexical-Semantic Resources in Automated Discourse Analysis. Ed. by Harald Lüngen, Alexander Mehler and Angelika Storrer.Journal for Language Technology and Computational Linguistics (JLCL), 23(2). GSCL.

BibTeX

@book{Luengen:Mehler:Storrer:2008:a,
  author    = {Mehler, Alexander},
  editor    = {Lüngen, Harald and Mehler, Alexander and Storrer, Angelika},
  title     = {Lexical-Semantic Resources in Automated Discourse Analysis},
  publisher = {GSCL},
  volume    = {23(2)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/LexicalSemanticResources-300-20.png},
  pagetotal = {111},
  pdf       = {{http://www.jlcl.org/2008_Heft2/JLCL23(2).pdf}},
  website   = {https://www.researchgate.net/publication/228956889_Lexical-Semantic_Resources_in_Automated_Discourse_Analysis},
  year      = {2008}
}

Alexander Mehler. 2008. Large Text Networks as an Object of Corpus Linguistic Studies. Corpus Linguistics. An International Handbook of the Science of Language and Society, 328–382.

BibTeX

@incollection{Mehler:2008:b,
  author    = {Mehler, Alexander},
  title     = {Large Text Networks as an Object of Corpus Linguistic Studies},
  booktitle = {Corpus Linguistics. An International Handbook of the Science of
               Language and Society},
  publisher = {De Gruyter},
  editor    = {Lüdeling, Anke and Kytö, Merja},
  pages     = {328–382},
  address   = {Berlin/New York},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_a.pdf},
  year      = {2008}
}

Tim vor der Brück and Holger Stenzhorn. July, 2008. A Dynamic Approach for Automatic Error Detection in Generation Grammars. Proceedings of the 18th European Conference on Artificial Intelligence (ECAI).

BibTeX

@inproceedings{vor:der:Brueck:Stenzhorn:2008,
  author    = {vor der Brück, Tim and Stenzhorn, Holger},
  title     = {A Dynamic Approach for Automatic Error Detection in Generation Grammars},
  booktitle = {Proceedings of the 18th European Conference on Artificial Intelligence (ECAI)},
  address   = {Patras, Greece},
  abstract  = {In any real world application scenario, natural language generation
               (NLG) systems have to employ grammars consisting of tremendous
               amounts of rules. Detecting and fixing errors in such grammars
               is therefore a highly tedious task. In this work we present a
               data mining algorithm which deduces incorrect grammar rules by
               abductive reasoning out of positive and negative training examples.
               More specifcally, the constituency trees belonging to successful
               generation processes and the incomplete trees of failed ones are
               analyzed. From this a quality score is derived for each grammar
               rule by analyzing the occurrences of the rules in the trees and
               by spotting the exact error locations in the incomplete trees.
               In prior work on automatic error detection v.d.Brück et al. [5]
               proposed a static error detection algorithm for generation grammars.
               The approach of Cussens et al. creates missing grammar rules for
               parsing using abduction [1]. Zeller introduced a dynamic approach
               in the related area of detecting errors in computer programs [6].},
  isbn      = {978-1-58603-891-5},
  month     = {July},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/xtragen_egram.pdf},
  year      = {2008}
}

Tim vor der Brück, Sven Hartrumpf and Hermann Helbig. October, 2008. A Readability Checker with Supervised Learning using Deep Syntactic and Semantic Indicators. Proceedings of the 11th International Multiconference: Information Society - IS 2008 - Language Technologies, 92–97.

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Helbig:2008:a,
  author    = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
  title     = {A Readability Checker with Supervised Learning using Deep Syntactic
               and Semantic Indicators},
  booktitle = {Proceedings of the 11th International Multiconference: Information
               Society - IS 2008 - Language Technologies},
  editor    = {Erjavec, Tomaž and Gros, Jerneja Žganec},
  pages     = {92--97},
  address   = {Ljubljana, Slovenia},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surfaceoriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score. Povzetek:
               Strojno učenje z odvisnostnimi drevesi je uporabljeno za ugotavljanje
               berljivosti besedil. 1},
  isbn      = {987-961-264-006-4},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf_helbig08.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/brueck_hartrumpf_helbig08.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.5878},
  year      = {2008}
}

Tim vor der Brück, Sven Hartrumpf and Hermann Helbig. 2008. A Readability Checker with Supervised Learning using Deep Indicators. Informatica, 32(4):429–435.

BibTeX

@article{vor:der:Brueck:Hartrumpf:Helbig:2008:b,
  author    = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
  title     = {A Readability Checker with Supervised Learning using Deep Indicators},
  journal   = {Informatica},
  volume    = {32},
  number    = {4},
  pages     = {429--435},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surface-oriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score.},
  website   = {http://connection.ebscohost.com/c/articles/36288796/readability-checker-supervised-learning-using-deep-indicators},
  year      = {2008}
}

Olga Pustylnikov and Alexander Mehler. 2008. Text classification by means of structural features. What kind of information about texts is captured by their structure?. Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia.

BibTeX

@inproceedings{Pustylnikov:Mehler:2008:c,
  author    = {Pustylnikov, Olga and Mehler, Alexander},
  title     = {Text classification by means of structural features. What kind
               of information about texts is captured by their structure?},
  booktitle = {Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia},
  pdf       = {http://www.www.texttechnologylab.org/data/pdf/mehler_geibel_pustylnikov_2007.pdf},
  year      = {2008}
}

Ulli Waltinger, Alexander Mehler and Maik Stührenberg. 2008. An Integrated Model of Lexical Chaining: Applications, Resources and their Format. Proceedings of KONVENS 2008 – Ergänzungsband Textressourcen und lexikalisches Wissen, 59–70.

BibTeX

@inproceedings{Waltinger:Mehler:Stuehrenberg:2008,
  author    = {Waltinger, Ulli and Mehler, Alexander and Stührenberg, Maik},
  title     = {An Integrated Model of Lexical Chaining: Applications, Resources
               and their Format},
  booktitle = {Proceedings of KONVENS 2008 – Erg{\"a}nzungsband Textressourcen
               und lexikalisches Wissen},
  editor    = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander
               and Würzner, Kay-Michael},
  pages     = {59-70},
  pdf       = {http://www.ulliwaltinger.de/pdf/Konvens_2008_Integrated_Model_of_Lexical_Chaining_WaltingerMehlerStuehrenberg.pdf},
  year      = {2008}
}

Alexander Mehler. 2008. A Model of the Distribution of the Distances of Alike Elements in Dialogical Communication. Proceedings of the International Conference on Information Theory and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas, 45–50.

BibTeX

@inproceedings{Mehler:2008:c,
  author    = {Mehler, Alexander},
  title     = {A Model of the Distribution of the Distances of Alike Elements
               in Dialogical Communication},
  booktitle = {Proceedings of the International Conference on Information Theory
               and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas},
  pages     = {45-50},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_c.pdf},
  year      = {2008}
}

Ulli Waltinger, Alexander Mehler and Gerhard Heyer. 2008. Towards Automatic Content Tagging: Enhanced Web Services in Digital Libraries Using Lexical Chaining. 4th Int. Conf. on Web Information Systems and Technologies (WEBIST '08), 4-7 May, Funchal, Portugal, 231–236.

BibTeX

@inproceedings{Waltinger:Mehler:Heyer:2008,
  author    = {Waltinger, Ulli and Mehler, Alexander and Heyer, Gerhard},
  title     = {Towards Automatic Content Tagging: Enhanced Web Services in Digital
               Libraries Using Lexical Chaining},
  booktitle = {4th Int. Conf. on Web Information Systems and Technologies (WEBIST
               '08), 4-7 May, Funchal, Portugal},
  editor    = {Cordeiro, José and Filipe, Joaquim and Hammoudi, Slimane},
  pages     = {231-236},
  address   = {Barcelona},
  publisher = {INSTICC Press},
  pdf       = {http://www.ulliwaltinger.de/pdf/Webist_2008_Towards_Automatic_Content_Tagging_WaltingerMehlerHeyer.pdf},
  url       = {http://dblp.uni-trier.de/db/conf/webist/webist2008-2.html#WaltingerMH08},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.463.3097},
  year      = {2008}
}

Alexander Mehler. 2008. A Short Note on Social-Semiotic Networks from the Point of View of Quantitative Semantics. Proceedings of the Dagstuhl Seminar on Social Web Communities, September 21-26, Dagstuhl.

BibTeX

@inproceedings{Mehler:2008:f,
  author    = {Mehler, Alexander},
  title     = {A Short Note on Social-Semiotic Networks from the Point of View
               of Quantitative Semantics},
  booktitle = {Proceedings of the Dagstuhl Seminar on Social Web Communities,
               September 21-26, Dagstuhl},
  editor    = {Alani, Harith and Staab, Steffen and Stumme, Gerd},
  pdf       = {http://drops.dagstuhl.de/opus/volltexte/2008/1788/pdf/08391.MehlerAlexander.ExtAbstract.1788.pdf},
  year      = {2008}
}

Alexander Mehler, Rüdiger Gleim, Alexandra Ernst and Ulli Waltinger. 2008. WikiDB: Building Interoperable Wiki-Based Knowledge Resources for Semantic Databases. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 32(1):47–70.

BibTeX

@article{Mehler:Gleim:Ernst:Waltinger:2008,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Ernst, Alexandra and Waltinger, Ulli},
  title     = {WikiDB: Building Interoperable Wiki-Based Knowledge Resources
               for Semantic Databases},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {32},
  number    = {1},
  pages     = {47-70},
  abstract  = {This article describes an API for exploring the logical document
               and the logical network structure of wikis. It introduces an algorithm
               for the semantic preprocessing, filtering and typing of these
               building blocks. Further, this article models the process of wiki
               generation based on a unified format of syntactic, semantic and
               pragmatic representations. This three-level approach to make accessible
               syntactic, semantic and pragmatic aspects of wiki-based structure
               formation is complemented by a corresponding database model –
               called WikiDB – and an API operating thereon. Finally, the article
               provides an empirical study of using the three-fold representation
               format in conjunction with WikiDB.},
  pdf       = {http://www.ulliwaltinger.de/pdf/Konvens_2008_WikiDB_Building_Semantic_Databases_MehlerGleimErnstWaltinger.pdf},
  year      = {2008}
}

Ulli Waltinger and Alexander Mehler. 2008. Who is it? Context sensitive named entity and instance recognition by means of Wikipedia. Proceedings of the 2008 IEEE/WIC/ACM International Conference on Web Intelligence (WI-2008), 381–384.

BibTeX

@inproceedings{Waltinger:Mehler:2008:a,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {Who is it? Context sensitive named entity and instance recognition
               by means of Wikipedia},
  booktitle = {Proceedings of the 2008 IEEE/WIC/ACM International Conference
               on Web Intelligence (WI-2008)},
  pages     = {381–384},
  publisher = {IEEE Computer Society},
  pdf       = {http://www.ulliwaltinger.de/pdf/WI_2008_Context_Sensitive_Instance_Recognition_WaltingerMehler.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.324.5881},
  year      = {2008}
}

Andy Lücking, Alexander Mehler and Peter Menke. June 2–4, 2008. Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching Empirical Evidence of Intrapersonal Alignment in Multimodal Communication. LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics and Pragmatics of Dialogue (SEMDIAL), 157–164.

BibTeX

@inproceedings{Luecking:Mehler:Menke:2008,
  author    = {Lücking, Andy and Mehler, Alexander and Menke, Peter},
  title     = {Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching
               Empirical Evidence of Intrapersonal Alignment in Multimodal Communication},
  booktitle = {LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics
               and Pragmatics of Dialogue (SEMDIAL)},
  pages     = {157–164},
  address   = {King's College London},
  month     = {June 2–4},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_menke_2008.pdf},
  website   = {https://www.researchgate.net/publication/237305375_Taking_Fingerprints_of_Speech-and-Gesture_Ensembles_Approaching_Empirical_Evidence_of_Intrapersonal_Alignment_in_Multimodal_Communication},
  year      = {2008}
}

Alexander Mehler and Tilmann Sutter. 2008. Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen. Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs, Wikis, Podcasts und Communities aus interdisziplinärer Sicht, 267–300.

BibTeX

@incollection{Mehler:Sutter:2008,
  author    = {Mehler, Alexander and Sutter, Tilmann},
  title     = {Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen},
  booktitle = {Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs,
               Wikis, Podcasts und Communities aus interdisziplin{\"a}rer Sicht},
  publisher = {Herbert von Halem},
  editor    = {Zerfa{\ss}, Ansgar and Welker, Martin and Schmidt, Jan},
  pages     = {267-300},
  address   = {Köln},
  abstract  = {This article addresses challenges in maintaining and annotating
               image resources in the field of iconographic research. We focus
               on the task of bringing together generic and extensible techniques
               for resource and anno- tation management with the highly specific
               demands in this area of research. Special emphasis is put on the
               interrelation of images, image segements and textual contents.
               In addition, we describe the architecture, data model and user
               interface of the open annotation system used in the image database
               application that is a part of the eHumanities Desktop.},
  year      = {2008}
}

Alexander Mehler. 2008. On the Impact of Community Structure on Self-Organizing Lexical Networks. Proceedings of the 7th Evolution of Language Conference (Evolang 2008), March 11-15, 2008, Barcelona, 227–234.

BibTeX

@inproceedings{Mehler:2008:e,
  author    = {Mehler, Alexander},
  title     = {On the Impact of Community Structure on Self-Organizing Lexical Networks},
  booktitle = {Proceedings of the 7th Evolution of Language Conference (Evolang
               2008), March 11-15, 2008, Barcelona},
  editor    = {Smith, Andrew D. M. and Smith, Kenny and Cancho, Ramon Ferrer i},
  pages     = {227-234},
  publisher = {World Scientific},
  abstract  = {This paper presents a simulation model of self-organizing lexical
               networks. Its starting point is the notion of an association game
               in which the impact of varying community models is studied on
               the emergence of lexical networks. The paper reports on experiments
               whose results are in accordance with findings in the framework
               of the naming game. This is done by means of a multilevel network
               model in which the correlation of social and of linguistic networks
               is studied},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_b.pdf},
  website   = {http://stel.ub.edu/evolang2008/evo10.htm},
  year      = {2008}
}

Olga Abramov and Alexander Mehler. 2008. Towards a Uniform Representation of Treebanks: Providing Interoperability for Dependency Tree Data. Proceedings of First International Conference on Global Interoperability for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11.

BibTeX

@inproceedings{Pustylnikov:Mehler:2008:a,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Towards a Uniform Representation of Treebanks: Providing Interoperability
               for Dependency Tree Data},
  booktitle = {Proceedings of First International Conference on Global Interoperability
               for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11},
  abstract  = {In this paper we present a corpus representation format which
               unifies the representation of a wide range of dependency treebanks
               within a single model. This approach provides interoperability
               and reusability of annotated syntactic data which in turn extends
               its applicability within various research contexts. We demonstrate
               our approach by means of dependency treebanks of 11 languages.
               Further, we perform a comparative quantitative analysis of these
               treebanks in order to demonstrate the interoperability of our
               approach.},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/acl07.1.0.pdf},
  website   = {https://www.researchgate.net/publication/242681771_Towards_a_Uniform_Representation_of_Treebanks_Providing_Interoperability_for_Dependency_Tree_Data},
  year      = {2008}
}

Georg Rehm, Marina Santini, Alexander Mehler, Pavel Braslavski, Rüdiger Gleim, Andrea Stubbe, Svetlana Symonenko, Mirko Tavosanis and Vedrana Vidulin. 2008. Towards a Reference Corpus of Web Genres for the Evaluation of Genre Identification Systems. Proceedings of the 6th Language Resources and Evaluation Conference (LREC 2008), Marrakech (Morocco).

BibTeX

@inproceedings{Rehm:Santini:Mehler:Braslavski:Gleim:Stubbe:Symonenko:Tavosanis:Vidulin:2008,
  author    = {Rehm, Georg and Santini, Marina and Mehler, Alexander and Braslavski, Pavel
               and Gleim, Rüdiger and Stubbe, Andrea and Symonenko, Svetlana and Tavosanis, Mirko
               and Vidulin, Vedrana},
  title     = {Towards a Reference Corpus of Web Genres for the Evaluation of
               Genre Identification Systems},
  booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
               (LREC 2008), Marrakech (Morocco)},
  abstract  = {We present initial results from an international and multi-disciplinary
               research collaboration that aims at the construction of a reference
               corpus of web genres. The primary application scenario for which
               we plan to build this resource is the automatic identification
               of web genres. Web genres are rather difficult to capture and
               to describe in their entirety, but we plan for the finished reference
               corpus to contain multi-level tags of the respective genre or
               genres a web document or a website instantiates. As the construction
               of such a corpus is by no means a trivial task, we discuss several
               alternatives that are, for the time being, mostly based on existing
               collections. Furthermore, we discuss a shared set of genre categories
               and a multi-purpose tool as two additional prerequisites for a
               reference corpus of web genres.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/rehm_santini_mehler_braslavski_gleim_stubbe_symonenko_tavosanis_vidulin_2008.pdf},
  website   = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/94.html},
  year      = {2008}
}

2007

Rüdiger Gleim, Alexander Mehler, Matthias Dehmer and Olga Abramov. 2007. Aisles through the Category Forest – Utilising the Wikipedia Category System for Corpus Building in Machine Learning. 3rd International Conference on Web Information Systems and Technologies (WEBIST '07), March 3-6, 2007, Barcelona, 142–149.

BibTeX

@inproceedings{Gleim:Mehler:Dehmer:Abramov:2007,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Dehmer, Matthias and Abramov, Olga},
  title     = {Aisles through the Category Forest – Utilising the Wikipedia Category
               System for Corpus Building in Machine Learning},
  booktitle = {3rd International Conference on Web Information Systems and Technologies
               (WEBIST '07), March 3-6, 2007, Barcelona},
  editor    = {Filipe, Joaquim and Cordeiro, José and Encarnação, Bruno and Pedrosa, Vitor},
  pages     = {142-149},
  address   = {Barcelona},
  abstract  = {The Word Wide Web is a continuous challenge to machine learning.
               Established approaches have to be enhanced and new methods be
               developed in order to tackle the problem of finding and organising
               relevant information. It has often been motivated that semantic
               classifications of input documents help solving this task. But
               while approaches of supervised text categorisation perform quite
               well on genres found in written text, newly evolved genres on
               the web are much more demanding. In order to successfully develop
               approaches to web mining, respective corpora are needed. However,
               the composition of genre- or domain-specific web corpora is still
               an unsolved problem. It is time consuming to build large corpora
               of good quality because web pages typically lack reliable meta
               information. Wikipedia along with similar approaches of collaborative
               text production offers a way out of this dilemma. We examine how
               social tagging, as supported by the MediaWiki software, can be
               utilised as a source of corpus building. Further, we describe
               a representation format for social ontologies and present the
               Wikipedia Category Explorer, a tool which supports categorical
               views to browse through the Wikipedia and to construct domain
               specific corpora for machine learning.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/webist_2007-gleim_mehler_dehmer_pustylnikov.pdf},
  year      = {2007}
}

Alexander Mehler, Rüdiger Gleim and Armin Wegner. 2007. Structural Uncertainty of Hypertext Types. An Empirical Study. Proceedings of the Workshop "Towards Genre-Enabled Search Engines: The Impact of NLP", September, 30, 2007, in conjunction with RANLP 2007, Borovets, Bulgaria, 13–19.

BibTeX

@inproceedings{Mehler:Gleim:Wegner:2007,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Wegner, Armin},
  title     = {Structural Uncertainty of Hypertext Types. An Empirical Study},
  booktitle = {Proceedings of the Workshop "Towards Genre-Enabled Search Engines:
               The Impact of NLP", September, 30, 2007, in conjunction with RANLP
               2007, Borovets, Bulgaria},
  editor    = {Rehm, Georg and Santini, Marina},
  pages     = {13-19},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/RANLP.pdf},
  year      = {2007}
}

Alexander Mehler. 2007. Evolving Lexical Networks. A Simulation Model of Terminological Alignment. Proceedings of the Workshop on Language, Games, and Evolution at the 9th European Summer School in Logic, Language and Information (ESSLLI 2007), Trinity College, Dublin, 6-17 August, 57–67.

BibTeX

@inproceedings{Mehler:2007:d,
  author    = {Mehler, Alexander},
  title     = {Evolving Lexical Networks. A Simulation Model of Terminological Alignment},
  booktitle = {Proceedings of the Workshop on Language, Games, and Evolution
               at the 9th European Summer School in Logic, Language and Information
               (ESSLLI 2007), Trinity College, Dublin, 6-17 August},
  editor    = {Benz, Anton and Ebert, Christian and van Rooij, Robert},
  pages     = {57-67},
  abstract  = {In this paper we describe a simulation model of terminological
               alignment in a multiagent community. It is based on the notion
               of an association game which is used instead of the classical
               notion of a naming game (Steels, 1996). The simulation model integrates
               a small world-like agent community which restricts agent communication.
               We hypothesize that this restriction is decisive when it comes
               to simulate terminological alignment based on lexical priming.
               The paper presents preliminary experimental results in support
               of this hypothesis.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_d.pdf},
  year      = {2007}
}

Alexander Mehler, Peter Geibel, Rüdiger Gleim, Sebastian Herold, Brijnesh-Johannes Jain and Olga Abramov. 2007. Much Ado About Text Content. Learning Text Types Solely by Structural Differentiae. Proceedings of OTT '06 – Ontologies in Text Technology: Approaches to Extract Semantic Knowledge from Structured Information, 63–71.

BibTeX

@inproceedings{Mehler:Geibel:Gleim:Herold:Jain:Pustylnikov:2007,
  author    = {Mehler, Alexander and Geibel, Peter and Gleim, Rüdiger and Herold, Sebastian
               and Jain, Brijnesh-Johannes and Abramov, Olga},
  title     = {Much Ado About Text Content. Learning Text Types Solely by Structural
               Differentiae},
  booktitle = {Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
               to Extract Semantic Knowledge from Structured Information},
  editor    = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
  series    = {Publications of the Institute of Cognitive Science
                   (PICS)},
  pages     = {63-71},
  address   = {Osnabrück},
  abstract  = {In this paper, we deal with classifying texts into classes which
               denote text types whose textual instances serve more or less homogeneous
               functions. Other than mainstream approaches to text classification,
               which rely on the vector space model [30] or some of its descendants
               [2] and, thus, on content-related lexical features, we solely
               refer to structural differentiae, that is, to patterns of text
               structure as determinants of class membership. Further, we suppose
               that text types span a type hierarchy based on the type-subtype
               relation [31]. Thus, although we admit that class membership is
               fuzzy so that overlapping classes are inevitable, we suppose a
               non-overlapping type system structured into a rooted tree – whether
               solely based on functional or additional on, e.g., content- or
               mediabased criteria [1]. What regards criteria of goodness of
               classification, we perform a classical supervised categorization
               experiment [30] based on cross-validation as a method of model
               selection [11]. That is, we perform a categorization experiment
               in which for all training and test cases class membership is known
               ex ante. In summary, we perform a supervised experiment of text
               classification in order to learn functionally grounded text types
               where membership to these types is solely based on structural
               criteria.},
  pdf       = {http://ikw.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Geibel_abstract.pdf},
  year      = {2007}
}

Matthias Dehmer, Alexander Mehler and Frank Emmert-Streib. 2007. Graph-theoretical Characterizations of Generalized Trees. Proceedings of the 2007 International Conference on Machine Learning: Models, Technologies & Applications (MLMTA '07), June 25-28, 2007, Las Vegas, 113–117.

BibTeX

@inproceedings{Dehmer:Mehler:Emmert-Streib:2007:a,
  author    = {Dehmer, Matthias and Mehler, Alexander and Emmert-Streib, Frank},
  title     = {Graph-theoretical Characterizations of Generalized Trees},
  booktitle = {Proceedings of the 2007 International Conference on Machine Learning:
               Models, Technologies \& Applications (MLMTA '07), June 25-28,
               2007, Las Vegas},
  pages     = {113-117},
  website   = {https://www.researchgate.net/publication/221188591_Graph-theoretical_Characterizations_of_Generalized_Trees},
  year      = {2007}
}

Rüdiger Gleim, Alexander Mehler and Hans-Jürgen Eikmeyer. 2007. Representing and Maintaining Large Corpora. Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK).

BibTeX

@inproceedings{Gleim:Mehler:Eikmeyer:2007:a,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer, Hans-Jürgen},
  title     = {Representing and Maintaining Large Corpora},
  booktitle = {Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK)},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_eikmeyer_2007_a.pdf},
  year      = {2007}
}

Peter Geibel, Olga Abramov, Alexander Mehler, Helmar Gust and Kai-Uwe Kühnberger. 2007. Classification of Documents Based on the Structure of Their DOM Trees. Proceedings of ICONIP 2007 (14th International Conference on Neural Information Processing), 779–788.

BibTeX

@inproceedings{Geibel:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
  author    = {Geibel, Peter and Abramov, Olga and Mehler, Alexander and Gust, Helmar
               and Kühnberger, Kai-Uwe},
  title     = {Classification of Documents Based on the Structure of Their DOM Trees},
  booktitle = {Proceedings of ICONIP 2007 (14th International Conference on Neural
               Information Processing)},
  series    = {Lecture Notes in Computer Science 4985},
  pages     = {779–788},
  publisher = {Springer},
  abstract  = {In this paper, we discuss kernels that can be applied for the
               classification of XML documents based on their DOM trees. DOM
               trees are ordered trees in which every node might be labeled by
               a vector of attributes including its XML tag and the textual content.
               We describe five new kernels suitable for such structures: a kernel
               based on predefined structural features, a tree kernel derived
               from the well-known parse tree kernel, the set tree kernel that
               allows permutations of children, the string tree kernel being
               an extension of the so-called partial tree kernel, and the soft
               tree kernel as a more efficient alternative. We evaluate the kernels
               experimentally on a corpus containing the DOM trees of newspaper
               articles and on the well-known SUSANNE corpus.},
  website   = {http://www.springerlink.com/content/x414002113425742/},
  year      = {2007}
}

Bernhard Jussen, Alexander Mehler and Alexandra Ernst. 2007. A Corpus Management System for Historical Semantics. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 31(1-2):81–89.

BibTeX

@article{Jussen:Mehler:Ernst:2007,
  author    = {Jussen, Bernhard and Mehler, Alexander and Ernst, Alexandra},
  title     = {A Corpus Management System for Historical Semantics},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {31},
  number    = {1-2},
  pages     = {81-89},
  abstract  = {Der Beitrag beschreibt ein Korpusmanagementsystem für die historische
               Semantik. Die Grundlage hierfür bildet ein Bedeutungsbegriff,
               der – methodologisch gesprochen – auf der Analyse diachroner Korpora
               beruht. Das Ziel der Analyse dieser Korpora besteht darin, Bedeutungswandel
               als eine Bezugsgrö{\ss}e für den Wandel sozialer Systeme zu untersuchen.
               Das vorgestellte Korpusmanagementsystem unterstützt diese Art
               der korpusbasierten historischen Semantik.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/jussen_mehler_ernst_2007.pdf},
  year      = {2007}
}

Alexander Mehler and Reinhard Köhler. 2007. Machine Learning in a Semiotic Perspective. Aspects of Automatic Text Analysis, 1–29.

BibTeX

@incollection{Mehler:Koehler:2007:b,
  author    = {Mehler, Alexander and Köhler, Reinhard},
  title     = {Machine Learning in a Semiotic Perspective},
  booktitle = {Aspects of Automatic Text Analysis},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  series    = {Studies in Fuzziness and Soft Computing},
  pages     = {1-29},
  address   = {Berlin/New York},
  abstract  = {Gegenstand des folgenden Aufsatzes ist der konnotative Aspekt
               der Bedeutungen von Texten. Den Ausgangspunkt der {\"U}berlegungen
               zur Konnotation des Textes bildet die Auffassung, wonach Wort-
               und Textbedeutungskonstitution Ergebnis eines zirkul{\"a}ren Prozesses
               sind, der für die Emergenz einer Hierarchie ineinander geschachtelter
               Spracheinheiten verantwortlich zeichnet. Der Proze{\ss} der Zeichenartikulation
               erfolgt entlang dieser Ebenen und erzeugt durch Verbindung von
               (konnotativer) Inhalts- und Ausdrucksseite auf Textebene das Textzeichen.
               Im Gegensatz zu einer strikten Interpretation des Fregeschen Kompositionalit{\"a}tsprinzips,
               derzufolge die Bedeutungen sprachlicher Einheiten als fixierte,
               kontextfreie Grö{\ss}en vorauszusetzen sind, behandelt der vorliegende
               Ansatz bereits die lexikalische Bedeutung als Grö{\ss}e, die in
               Abh{\"a}ngigkeit von ihrem Kontext variieren kann. Aus semiotischer
               Perspektive ist es vor allem der Gestaltcharakter, welcher die
               konnotative Textbedeutung einer Anwendung des FregePrinzips entzieht.
               Anders ausgedrückt: Die konnotative Bedeutung eines Textes ist
               keineswegs in eine Struktur 'atomarer' Repr{\"a}sentationen zerlegbar.
               Die hierarchische Organisation von Texten erweist sich insofern
               als komplex, als ihre Bedeutungen aus einem zirkul{\"a}ren Proze{\ss}
               resultieren, der best{\"a}tigend und/oder ver{\"a}ndernd auf die
               Bedeutungen der Textkonstituenten einwirkt. Diese Zirkularit{\"a}t
               bedingt, da{\ss} Texte nicht nur als Orte der Manifestation von
               Wortbedeutungsstrukturen anzusehen sind, sondern zugleich als
               Ausgangspunkte für die Modifikation und Emergenz solcher Strukturen
               dienen. Im folgenden wird unter Rekurs auf den Kopenhagener Strukturalismus
               ein Modell der konnotativen Bedeutung von Texten entwickelt, das
               sich unter anderem an dem glossematischen Begriff der Konstante
               orientiert. Die Formalisierung des Modells erfolgt mit Hilfe des
               Konzeptes der unscharfen Menge. Zu diesem Zweck werden die unscharfen
               Verwendungsregularit{\"a}ten von Wörtern auf der Basis eines zweistufigen
               Verfahrens analysiert, welches die syntagmatischen und paradigmatischen
               Regularit{\"a}ten des Wortgebrauches berücksichtigt. Die Rolle
               der Satzebene innerhalb des Prozesses der konnotativen Textbedeutungskonstitution
               wird angedeutet. Abschlie{\ss}end erfolgt eine Exemplifizierung
               des Algorithmus anhand der automatischen Analyse eines Textcorpus.},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-540-37522-7_1},
  year      = {2007}
}

Alexander Mehler, Ulli Waltinger and Armin Wegner. 2007. A Formal Text Representation Model Based on Lexical Chaining. Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial Data (LNVD 2007) September 10, Osnabrück, 17–26.

BibTeX

@inproceedings{Mehler:Waltinger:Wegner:2007:a,
  author    = {Mehler, Alexander and Waltinger, Ulli and Wegner, Armin},
  title     = {A Formal Text Representation Model Based on Lexical Chaining},
  booktitle = {Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial
               Data (LNVD 2007) September 10, Osnabrück},
  editor    = {Geibel, Peter and Jain, Brijnesh J.},
  pages     = {17-26},
  address   = {Osnabrück},
  publisher = {Universit{\"a}t Osnabrück},
  abstract  = {This paper presents a formal text representation model as an alternative
               to the vector space model. It combines a tree-like model with
               graph-inducing lexical relations. The paper aims at formalizing
               two yet unrelated approaches, i.e. lexical chaining [3] and quantitative
               structure analysis [9], in order to combine content and structure
               modeling.},
  pdf       = {http://www.ulliwaltinger.de/pdf/LNVD07MehlerWaltingerWegner.pdf},
  year      = {2007}
}

Tim vor der Brück and Sven Hartrumpf. October, 2007. A Semantically Oriented Readability Checker for German. Proceedings of the 3rd Language & Technology Conference, 270–274.

BibTeX

@incollection{vor:der:Brueck:Hartrumpf:2007,
  author    = {vor der Brück, Tim and Hartrumpf, Sven},
  title     = {A Semantically Oriented Readability Checker for German},
  booktitle = {Proceedings of the 3rd Language \& Technology Conference},
  publisher = {Wydawnictwo Poznańskie},
  editor    = {Zygmunt Vetulani},
  pages     = {270--274},
  address   = {Poznań, Poland},
  abstract  = {One major reason that readability checkers are still far away
               from judging the understandability of texts consists in the fact
               that no semantic information is used. Syntactic, lexical, or morphological
               information can only give limited access for estimating the cognitive
               difficulties for a human being to comprehend a text. In this paper
               however, we present a readability checker which uses semantic
               information in addition. This information is represented as semantic
               networks and is derived by a deep syntactico-semantic analysis.
               We investigate in which situations a semantic readability indicator
               can lead to superior results in comparison with ordinary surface
               indicators like sentence length. Finally, we compute the correlations
               and absolute errors for our semantic indicators related to user
               ratings collected in an online evaluation.},
  isbn      = {978-83-7177-407-2},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf07_online.pdf},
  url       = {http://pi7.fernuni-hagen.de/papers/brueck_hartrumpf07_online.pdf},
  year      = {2007}
}

Tim vor der Brück and Stephan Busemann. 2007. Suggesting Error Corrections of Path Expressions and Categories for Tree-Mapping Grammars. Zeitschrift für Sprachwissenschaft, 26(2).

BibTeX

@article{vor:der:Brueck:Busemann:2007,
  author    = {vor der Brück, Tim and Busemann, Stephan},
  title     = {Suggesting Error Corrections of Path Expressions and Categories
               for Tree-Mapping Grammars},
  journal   = {Zeitschrift für Sprachwissenschaft},
  volume    = {26},
  number    = {2},
  abstract  = {Tree mapping grammars are used in natural language generation
               (NLG) to map non-linguistic input onto a derivation tree from
               which the target text can be trivially read off as the terminal
               yield. Such grammars may consist of a large number of rules. Finding
               errors is quite tedious and sometimes very time-consuming. Often
               the generation fails because the relevant input subtree is not
               specified correctly. This work describes a method to detect and
               correct wrong assignments of input subtrees to grammar categories
               by cross-validating grammar rules with the given input structures.
               The method also detects and corrects the usage of a category in
               a grammar rule. The result is implemented in a grammar development
               workbench and accelerates the grammar writer's work considerably.
               The paper suggests the algorithms can be ported to other areas
               in which tree mapping is required.},
  url       = {http://www.reference-global.com/doi/pdfplus/10.1515/ZFS.2007.021},
  year      = {2007}
}

Tim vor der Brück and Johannes Leveling. 2007. Parameter Learning for a Readability Checking Tool. Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML.

BibTeX

@incollection{vor:der:Brueck:Leveling:2007,
  author    = {vor der Brück, Tim and Leveling, Johannes},
  title     = {Parameter Learning for a Readability Checking Tool},
  booktitle = {Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML},
  publisher = {Gesellschaft für Informatik},
  editor    = {Alexander Hinneburg},
  address   = {Halle/Saale, Germany},
  abstract  = {This paper describes the application of machine learning methods
               to determine parameters for DeLite, a readability checking tool.
               DeLite pinpoints text segments that are difficult to understand
               and computes for a given text a global readability score, which
               is a weighted sum of normalized indicator values. Indicator values
               are numeric properties derived from linguistic units in the text,
               such as the distance between a verb and its complements or the
               number of possible antecedents for a pronoun. Indicators are normalized
               by means of a derivation of the Fermi function with two parameters.
               DeLite requires individual parameters for this normalization function
               and a weight for each indicator to compute the global readability
               score. Several experiments to determine these parameters were
               conducted, using different machine learning approaches. The training
               data consists of more than 300 user ratings of texts from the
               municipality domain. The weights for the indicators are learned
               using two approaches: i) robust regression with linear optimization
               and ii) an approximative iterative linear regression algorithm.
               For evaluation, the computed readability scores are compared to
               user ratings. The evaluation showed that iterative linear regression
               yields a smaller square error than robust regression although
               this method is only approximative. Both methods yield results
               outperforming a first manual setting, and for both methods, basically
               the same set of non-zero weights remain.},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.6079},
  year      = {2007}
}

Christiane Borr, Martina Hielscher-Fastabend and Andy Lücking. 2007. Reliability and Validity of Cervical Auscultation. Dysphagia, 22:225–234.

BibTeX

@article{Borr:Luecking:Hierlscher:2007,
  author    = {Borr, Christiane and Hielscher-Fastabend, Martina and Lücking, Andy},
  title     = {Reliability and Validity of Cervical Auscultation},
  journal   = {Dysphagia},
  volume    = {22},
  pages     = {225--234},
  abstract  = {We conducted a two-part study that contributes to the discussion
               about cervical auscultation (CA) as a scientifically justifiable
               and medically useful tool to identify patients with a high risk
               of aspiration/penetration. We sought to determine (1) acoustic
               features that mark a deglutition act as dysphagic; (2) acoustic
               changes in healthy older deglutition profiles compared with those
               of younger adults; (3) the correctness and concordance of rater
               judgments based on CA; and (4) if education in CA improves individual
               reliability. The first part of the study focused on a comparison
               of the swallow morphology of dysphagic as opposed to healthy subjects
               deglutition in terms of structure properties of the pharyngeal
               phase of deglutition. We obtained the following results. The duration
               of deglutition apnea is significantly higher in the older group
               than in the younger one. Comparing the younger group and the dysphagic
               group we found significant differences in duration of deglutition
               apnea, onset time, and number of gulps. Just one parameter, number
               of gulps, distinguishes significantly between the older and the
               dysphagic groups. The second part of the study aimed at evaluating
               the reliability of CA in detecting dysphagia measured as the concordance
               and the correctness of CA experts in classifying swallowing sounds.
               The interrater reliability coefficient AC1 resulted in a value
               of 0.46, which is to be interpreted as fair agreement. Furthermore,
               we found that comparison with radiologically defined aspiration/penetration
               for the group of experts (speech and language therapists) yielded
               70\% specificity and 94\% sensitivity. We conclude that the swallowing
               sounds contain audible cues that should, in principle, permit
               reliable classification and view CA as an early warning system
               for identifying patients with a high risk of aspiration/penetration;
               however, it is not appropriate as a stand-alone tool.},
  doi       = {10.1007/s00455-007-9078-3},
  issue     = {3},
  pdf       = {http://www.shkim.eu/cborr/ca5manuscript.pdf},
  publisher = {Springer New York},
  url       = {http://dx.doi.org/10.1007/s00455-007-9078-3},
  website   = {http://www.springerlink.com/content/c45578u74r38m4v7/},
  year      = {2007}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Marc Staudacher. June, 2007. Locating Objects by Pointing.

BibTeX

@misc{Kranstedt:et:al:2007,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Staudacher, Marc},
  title     = {Locating Objects by Pointing},
  howpublished = {3rd International Conference of the International
                   Society for Gesture Studies. Evanston, IL, USA},
  keywords  = {own},
  month     = {6},
  year      = {2007}
}

Munshi Asadullah, Md. Zahurul Islam and Mumit Khan. 2007. Error-tolerant Finite-state Recognizer and String Pattern Similarity Based Spell-Checker for Bengali. 5th International Conference on Natural Language Processing (ICON) as a poster,Hyderabad, India, January 2007.

BibTeX

@inproceedings{Asadullah:Zahurul:Khan:2007,
  author    = {Asadullah, Munshi and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Error-tolerant Finite-state Recognizer and String Pattern Similarity
               Based Spell-Checker for Bengali},
  booktitle = {5th International Conference on Natural Language Processing (ICON)
               as a poster,Hyderabad, India, January 2007},
  abstract  = {A crucial figure of merit for a spelling checker is not just whether
               it can detect misspelled words, but also in how it ranks the sugges
               tions for the word. Spelling checker algorithms using edit distance
               methods tend to produce a large number of possibilities for misspelled
               words. We propose an alternative approach to checking the spelling
               of Bangla text that uses a finite state automaton (FSA) to probabilistically
               create the suggestion list for a misspelled word. FSA has proven
               to be an effective method for problems requiring probabilistic
               solution and high error tolerance. We start by using a finite
               state representation for all the words in the Bangla dictionary;
               the algorithm then uses the state tables to test a string, and
               in case of an erroneous string, try to find all possible solutions
               by attempting singular and multi - step transitions to consume
               one or more characters and using the su bsequent characters as
               look - ahead; and finally, we use backtracking to add each possible
               solution to the suggestion list. The use of finite state representation
               for the word implies that the algorithm is much more efficient
               in the case of non - inflected for ms; in case of nouns, it is
               even more significant as Bangla nouns are heavily used in the
               non - inflected form. In terms of error detection and correction,
               the algorithm uses the statistics of Bangla error pattern and
               thus produces a small number of signific ant suggestions. One
               notable limitation is the inability to handle transposition errors
               as a single edit distance errors. This is not as significant as
               it may seem since the number of transposition errors are not as
               common as other errors in Bangla. This p aper presents the structure
               and the algorithm to implement a Practical Bangla spell - checker,
               and discusses the results obtained from the prototype implementation.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Error-tolerant_Finite-state_Recognizer_and_String_Pattern_Similarity_Based_Spell-Checker_for_Bengali.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

Md. Zahurul Islam, Md. Nizam Uddin and Mumit Khan. 2007. A Light Weight Stemmer for Bengali and Its Use in Spelling Checker. 1st International Conference on Digital Communications and Computer Applications (DCCA2007).

BibTeX

@inproceedings{Zahurul:Uddin:Khan:2007,
  author    = {Islam, Md. Zahurul and Uddin, Md. Nizam and Khan, Mumit},
  title     = {A Light Weight Stemmer for Bengali and Its Use in Spelling Checker},
  booktitle = {1st International Conference on Digital Communications and Computer
               Applications (DCCA2007)},
  abstract  = {Stemming is an operation that splits a word into the constituent
               root part and affix without doing complete morphological analysis.
               It is used to impr ove the performance of spelling checkers and
               informatio n retrieval applications, where morphological analysi
               would be too computationally expensive. For spellin g checkers
               specifically, using stemming may drastical ly reduce the dictionary
               size, often a bottleneck for mobile and embedded devices. This
               paper presents a computationally inexpensive stemming algorithm
               for Bengali, which handles suffix removal in a domain independent
               way. The evaluation of the proposed algorithm in a Bengali spelling
               checker indicates t hat it can be effectively used in information
               retrieval applications in general.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/A_Light_Weight_Stemmer_for_Bengali_and_Its_Use_in_Spelling_Checker.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

Md. Zahurul Islam and Mumit Khan. 2007. Bangla Verb Morphology and a Multilingual Computational Morphology FrameWork for PC-KIMMO. The Proceedings of Workshop on Morpho - Syntactic Analysis by the School of Asian Applied Natural Language Processing for Language Diversity and Language Resource Development (ADD), Bangkok, Thailand.

BibTeX

@inproceedings{Zahurul:Khan:2007,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {Bangla Verb Morphology and a Multilingual Computational Morphology
               FrameWork for PC-KIMMO},
  booktitle = {The Proceedings of Workshop on Morpho - Syntactic Analysis by
               the School of Asian Applied Natural Language Processing for Language
               Diversity and Language Resource Development (ADD), Bangkok, Thailand},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Bangla_Verb_Morphology_and_a_Multilingual_Computational_Morphology_FrameWork_for_PC-KIMMO-talk.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

Alexander Mehler, Peter Geibel and Olga Abramov. 2007. Structural Classifiers of Text Types: Towards a Novel Model of Text Representation. Journal for Language Technology and Computational Linguistics (JLCL), 22(2):51–66.

BibTeX

@article{Mehler:Geibel:Pustylnikov:2007,
  author    = {Mehler, Alexander and Geibel, Peter and Abramov, Olga},
  title     = {Structural Classifiers of Text Types: Towards a Novel Model of
               Text Representation},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {22},
  number    = {2},
  pages     = {51-66},
  abstract  = {Texts can be distinguished in terms of their content, function,
               structure or layout (Brinker, 1992; Bateman et al., 2001; Joachims,
               2002; Power et al., 2003). These reference points do not open
               necessarily orthogonal perspectives on text classification. As
               part of explorative data analysis, text classification aims at
               automatically dividing sets of textual objects into classes of
               maximum internal homogeneity and external heterogeneity. This
               paper deals with classifying texts into text types whose instances
               serve more or less homogeneous functions. Other than mainstream
               approaches, which rely on the vector space model (Sebastiani,
               2002) or some of its descendants (Baeza-Yates and Ribeiro-Neto,
               1999) and, thus, on content-related lexical features, we solely
               refer to structural differentiae. That is, we explore patterns
               of text structure as determinants of class membership. Our starting
               point are tree-like text representations which induce feature
               vectors and tree kernels. These kernels are utilized in supervised
               learning based on cross-validation as a method of model selection
               (Hastie et al., 2001) by example of a corpus of press communication.
               For a subset of categories we show that classification can be
               performed very well by structural differentia only.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_geibel_pustylnikov_2007.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.154.604},
  year      = {2007}
}

Olga Abramov and Alexander Mehler. 2007. Structural Differentiae of Text Types. A Quantitative Model. Proceedings of the 31st Annual Conference of the German Classification Society on Data Analysis, Machine Learning, and Applications (GfKl), 655–662.

BibTeX

@inproceedings{Abramov:Mehler:2007:b,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Structural Differentiae of Text Types. A Quantitative Model},
  booktitle = {Proceedings of the 31st Annual Conference of the German Classification
               Society on Data Analysis, Machine Learning, and Applications (GfKl)},
  pages     = {655–662},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/gfkl.pdf},
  website   = {http://www.springerprofessional.de/077---structural-differentiae-of-text-types--a-quantitative-model/1957362.html},
  year      = {2007}
}

Alexander Mehler and Reinhard Köhler. 2007. Aspects of Automatic Text Analysis: Festschrift in Honor of Burghard Rieger. Ed. by Alexander Mehler and Reinhard Köhler.Studies in Fuzziness and Soft Computing. Springer.

BibTeX

@book{Mehler:Koehler:2007:a,
  author    = {Mehler, Alexander and Köhler, Reinhard},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  title     = {Aspects of Automatic Text Analysis: Festschrift in Honor of Burghard Rieger},
  publisher = {Springer},
  series    = {Studies in Fuzziness and Soft Computing},
  address   = {Berlin/New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AspectsOfAutomaticTextAnalysis.jpg},
  pagetotal = {464},
  review    = {http://www.degruyter.com/view/j/zrs.2011.3.issue-2/zrs.2011.050/zrs.2011.050.xml},
  review2   = {http://irsg.bcs.org/informer/Informer27.pdf},
  website   = {http://www.springer.com/de/book/9783540375203},
  year      = {2007}
}

Alexander Mehler and Angelika Storrer. 2007. What are Ontologies Good For? Evaluating Terminological Ontologies in the Framework of Text Graph Classification. Proceedings of OTT '06 – Ontologies in Text Technology: Approaches to Extract Semantic Knowledge from Structured Information, 11–18.

BibTeX

@inproceedings{Mehler:Storrer:2007,
  author    = {Mehler, Alexander and Storrer, Angelika},
  title     = {What are Ontologies Good For? Evaluating Terminological Ontologies
               in the Framework of Text Graph Classification},
  booktitle = {Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
               to Extract Semantic Knowledge from Structured Information},
  editor    = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
  series    = {Publications of the Institute of Cognitive Science
                   (PICS)},
  pages     = {11-18},
  address   = {Osnabrück},
  pdf       = {http://cogsci.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Storrer_abstract.pdf},
  website   = {http://citeseer.uark.edu:8080/citeseerx/viewdoc/summary?doi=10.1.1.91.2979},
  year      = {2007}
}

Maik Stührenberg, Daniela Goecke, Nils Diewald, Alexander Mehler and Irene Cramer. 2007. Web-based Annotation of Anaphoric Relations and Lexical Chains. Proceedings of the Linguistic Annotation Workshop, ACL 2007, 140–147.

BibTeX

@inproceedings{Stuehrenberg:Goecke:Diewald:Mehler:Cramer:2007:a,
  author    = {Stührenberg, Maik and Goecke, Daniela and Diewald, Nils and Mehler, Alexander
               and Cramer, Irene},
  title     = {Web-based Annotation of Anaphoric Relations and Lexical Chains},
  booktitle = {Proceedings of the Linguistic Annotation Workshop, ACL 2007},
  pages     = {140–147},
  pdf       = {http://www.aclweb.org/anthology/W07-1523},
  website   = {https://www.researchgate.net/publication/234800610_Web-based_annotation_of_anaphoric_relations_and_lexical_chains},
  year      = {2007}
}

Ramon Ferrer i Cancho, Alexander Mehler, Olga Abramov and Albert Díaz-Guilera. 2007. Correlations in the organization of large-scale syntactic dependency networks. Proceedings of Graph-based Methods for Natural Language Processing (TextGraphs-2) at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT 2007), Rochester, New York, 65–72.

BibTeX

@inproceedings{Ferrer:i:Cancho:Mehler:Pustylnikov:Diaz-Guilera:2007:a,
  author    = {Ferrer i Cancho, Ramon and Mehler, Alexander and Abramov, Olga
               and Díaz-Guilera, Albert},
  title     = {Correlations in the organization of large-scale syntactic dependency networks},
  booktitle = {Proceedings of Graph-based Methods for Natural Language Processing
               (TextGraphs-2) at the Annual Conference of the North American
               Chapter of the Association for Computational Linguistics (NAACL-HLT
               2007), Rochester, New York},
  pages     = {65-72},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ferrer-i-cancho_mehler_pustylnikov_diaz-guilera_2007_a.pdf},
  year      = {2007}
}

Rüdiger Gleim, Alexander Mehler, Hans-Jürgen Eikmeyer and Hannes Rieser. 2007. Ein Ansatz zur Repräsentation und Verarbeitung großer Korpora multimodaler Daten. Data Structures for Linguistic Resources and Applications. Proceedings of the Biennial GLDV Conference 2007, 11.–13. April, Universität Tübingen, 275–284.

BibTeX

@inproceedings{Gleim:Mehler:Eikmeyer:Rieser:2007,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer, Hans-Jürgen
               and Rieser, Hannes},
  title     = {Ein Ansatz zur Repr{\"a}sentation und Verarbeitung gro{\ss}er
               Korpora multimodaler Daten},
  booktitle = {Data Structures for Linguistic Resources and Applications. Proceedings
               of the Biennial GLDV Conference 2007, 11.–13. April, Universit{\"a}t
               Tübingen},
  editor    = {Rehm, Georg and Witt, Andreas and Lemnitzer, Lothar},
  pages     = {275-284},
  address   = {Tübingen},
  publisher = {Narr},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_eikmeyer_rieser_2007.pdf},
  year      = {2007}
}

Alexander Mehler. 2007. Aspectos Metodológicos da Semiótica Computacional. Computação, Cognição e Semiose, 145–157.

BibTeX

@incollection{Mehler:2004:2007,
  author    = {Mehler, Alexander},
  title     = {Aspectos Metodológicos da Semiótica Computacional},
  booktitle = {Computação, Cognição e Semiose},
  publisher = {EDUFBA},
  editor    = {Queiroz, João and Gudwin, Ricardo and Loula, Angelo},
  pages     = {145-157},
  address   = {Federal University of Bahia},
  year      = {2007}
}

Alexander Mehler. 2007. Compositionality in Quantitative Semantics. A Theoretical Perspective on Text Mining. Aspects of Automatic Text Analysis, 139–167.

BibTeX

@incollection{Mehler:2007:b,
  author    = {Mehler, Alexander},
  title     = {Compositionality in Quantitative Semantics. A Theoretical Perspective
               on Text Mining},
  booktitle = {Aspects of Automatic Text Analysis},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  series    = {Studies in Fuzziness and Soft Computing},
  pages     = {139-167},
  address   = {Berlin/New York},
  abstract  = {This chapter introduces a variant of the principle of compositionality
               in quantitative text semantics as an alternative to the bag-of-features
               approach. The variant includes effects of context-sensitive interpretation
               as well as processes of meaning constitution and change in the
               sense of usage-based semantics. Its starting point is a combination
               of semantic space modeling and text structure analysis. The principle
               is implemented by means of a hierarchical constraint satisfaction
               process which utilizes the notion of hierarchical text structure
               superimposed by graph-inducing coherence relations. The major
               contribution of the chapter is a conceptualization and formalization
               of the principle of compositionality in terms of semantic spaces
               which tackles some well known deficits of existing approaches.
               In particular this relates to the missing linguistic interpretability
               of statistical meaning representations.},
  website   = {http://www.springerlink.com/content/x214w527g42x0116/},
  year      = {2007}
}

Matthias Dehmer and Alexander Mehler. 2007. A New Method of Measuring the Similarity for a Special Class of Directed Graphs. Tatra Mountains Mathematical Publications, 36:39–59.

BibTeX

@article{Dehmer:Mehler:2007:a,
  author    = {Dehmer, Matthias and Mehler, Alexander},
  title     = {A New Method of Measuring the Similarity for a Special Class of Directed Graphs},
  journal   = {Tatra Mountains Mathematical Publications},
  volume    = {36},
  pages     = {39-59},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dehmer_mehler_2004_a.pdf},
  website   = {https://www.researchgate.net/publication/228905939_A_new_method_of_measuring_similarity_for_a_special_class_of_directed_graphs},
  year      = {2007}
}

Peter Geibel, Ulf Krumnack, Olga Abramov, Alexander Mehler, Helmar Gust and Kai-Uwe Kühnberger. 2007. Structure-Sensitive Learning of Text Types. Proceedings of AI 2007: Advances in Artificial Intelligence, 20th Australian Joint Conference on Artificial Intelligence, Gold Coast, Australia, December 2-6, 2007, 4830:642–646.

BibTeX

@inproceedings{Geibel:Krumnack:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
  author    = {Geibel, Peter and Krumnack, Ulf and Abramov, Olga and Mehler, Alexander
               and Gust, Helmar and Kühnberger, Kai-Uwe},
  title     = {Structure-Sensitive Learning of Text Types},
  booktitle = {Proceedings of AI 2007: Advances in Artificial Intelligence, 20th
               Australian Joint Conference on Artificial Intelligence, Gold Coast,
               Australia, December 2-6, 2007},
  editor    = {Orgun, Mehmet A. and Thornton, John},
  volume    = {4830},
  series    = {Lecture Notes in Computer Science},
  pages     = {642-646},
  publisher = {Springer},
  abstract  = {In this paper, we discuss the structure based classification of
               documents based on their logical document structure, i.e., their
               DOM trees. We describe a method using predefined structural features
               and also four tree kernels suitable for such structures. We evaluate
               the methods experimentally on a corpus containing the DOM trees
               of newspaper articles, and on the well-known SUSANNE corpus. We
               will demonstrate that, for the two corpora, many text types can
               be learned based on structural features only.},
  website   = {http://www.springerlink.com/content/w574377ww1h6m212/},
  year      = {2007}
}

2006

Alexander Mehler, Rüdiger Gleim and Matthias Dehmer. 2006. Towards Structure-Sensitive Hypertext Categorization. Proceedings of the 29th Annual Conference of the German Classification Society, March 9-11, 2005, Universität Magdeburg, 406–413.

BibTeX

@inproceedings{Mehler:Gleim:Dehmer:2006,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Dehmer, Matthias},
  title     = {Towards Structure-Sensitive Hypertext Categorization},
  booktitle = {Proceedings of the 29th Annual Conference of the German Classification
               Society, March 9-11, 2005, Universit{\"a}t Magdeburg},
  editor    = {Spiliopoulou, Myra and Kruse, Rudolf and Borgelt, Christian and Nürnberger, Andreas
               and Gaul, Wolfgang},
  pages     = {406-413},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {Hypertext categorization is the task of automatically assigning
               category labels to hypertext units. Comparable to text categorization
               it stays in the area of function learning based on the bag-of-features
               approach. This scenario faces the problem of a many-to-many relation
               between websites and their hidden logical document structure.
               The paper argues that this relation is a prevalent characteristic
               which interferes any effort of applying the classical apparatus
               of categorization to web genres. This is confirmed by a threefold
               experiment in hypertext categorization. In order to outline a
               solution to this problem, the paper sketches an alternative method
               of unsupervised learning which aims at bridging the gap between
               statistical and structural pattern recognition (Bunke et al. 2001)
               in the area of web mining.},
  website   = {http://www.springerlink.com/content/l7665tm3u241317l/},
  year      = {2006}
}

Alexander Mehler. 2006. A Network Perspective on Intertextuality. Exact Methods in the Study of Language and Text, 437–446.

BibTeX

@incollection{Mehler:2006:d,
  author    = {Mehler, Alexander},
  title     = {A Network Perspective on Intertextuality},
  booktitle = {Exact Methods in the Study of Language and Text},
  publisher = {De Gruyter},
  editor    = {Grzybek, Peter and Köhler, Reinhard},
  series    = {Quantitative Linguistics},
  pages     = {437-446},
  address   = {Berlin/New York},
  year      = {2006}
}

Matthias Dehmer, Frank Emmert-Streib, Alexander Mehler and Jürgen Kilian. 2006. Measuring the Structural Similarity of Web-based Documents: A Novel Approach. International Journal of Computational Intelligence, 3(1):1–7.

BibTeX

@article{Dehmer:Emmert:Streib:Mehler:Kilian:2006,
  author    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander
               and Kilian, Jürgen},
  title     = {Measuring the Structural Similarity of Web-based Documents: A Novel Approach},
  journal   = {International Journal of Computational Intelligence},
  volume    = {3},
  number    = {1},
  pages     = {1-7},
  abstract  = {Most known methods for measuring the structural similarity of
               document structures are based on, e.g., tag measures, path metrics
               and tree measures in terms of their DOM-Trees. Other methods measures
               the similarity in the framework of the well known vector space
               model. In contrast to these we present a new approach to measuring
               the structural similarity of web-based documents represented by
               so called generalized trees which are more general than DOM-Trees
               which represent only directed rooted trees. We will design a new
               similarity measure for graphs representing web-based hypertext
               structures. Our similarity measure is mainly based on a novel
               representation of a graph as strings of linear integers, whose
               components represent structural properties of the graph. The similarity
               of two graphs is then defined as the optimal alignment of the
               underlying property strings. In this paper we apply the well known
               technique of sequence alignments to solve a novel and challenging
               problem: Measuring the structural similarity of generalized trees.
               More precisely, we first transform our graphs considered as high
               dimensional objects in linear structures. Then we derive similarity
               values from the alignments of the property strings in order to
               measure the structural similarity of generalized trees. Hence,
               we transform a graph similarity problem to a string similarity
               problem. We demonstrate that our similarity measure captures important
               structural information by applying it to two different test sets
               consisting of graphs representing web-based documents.},
  pdf       = {http://waset.org/publications/15928/measuring-the-structural-similarity-of-web-based-documents-a-novel-approach},
  website   = {http://connection.ebscohost.com/c/articles/24839145/measuring-structural-similarity-web-based-documents-novel-approach},
  year      = {2006}
}

Alexander Mehler and Rüdiger Gleim. 2006. The Net for the Graphs – Towards Webgenre Representation for Corpus Linguistic Studies. WaCky! Working Papers on the Web as Corpus, 191–224.

BibTeX

@incollection{Mehler:Gleim:2006:b,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {The Net for the Graphs – Towards Webgenre Representation for Corpus
               Linguistic Studies},
  booktitle = {WaCky! Working Papers on the Web as Corpus},
  publisher = {Gedit},
  editor    = {Baroni, Marco and Bernardini, Silvia},
  pages     = {191-224},
  address   = {Bologna},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.510.4125},
  year      = {2006}
}

Rüdiger Gleim, Alexander Mehler and Matthias Dehmer. 2006. Web Corpus Mining by Instance of Wikipedia. Proceedings of the EACL 2006 Workshop on Web as Corpus, April 3-7, 2006, Trento, Italy, 67–74.

BibTeX

@inproceedings{Gleim:Mehler:Dehmer:2006:a,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Dehmer, Matthias},
  title     = {Web Corpus Mining by Instance of Wikipedia},
  booktitle = {Proceedings of the EACL 2006 Workshop on Web as Corpus, April
               3-7, 2006, Trento, Italy},
  editor    = {Kilgariff, Adam and Baroni, Marco},
  pages     = {67-74},
  abstract  = {Workshop organizer: Adam Kilgarriff},
  pdf       = {http://www.aclweb.org/anthology/W06-1710},
  website   = {http://pub.uni-bielefeld.de/publication/1773538},
  year      = {2006}
}

Alexander Mehler. 2006. In Search of a Bridge Between Network Analysis in Computational Linguistics and Computational Biology-A Conceptual Note.. BIOCOMP, 496–502.

BibTeX

@inproceedings{mehler:2006,
  author    = {Mehler, Alexander},
  title     = {In Search of a Bridge Between Network Analysis in Computational
               Linguistics and Computational Biology-A Conceptual Note.},
  booktitle = {BIOCOMP},
  pages     = {496--502},
  pdf       = {https://pdfs.semanticscholar.org/81aa/0b840ed413089d69908cff60628a92609ccd.pdf},
  year      = {2006}
}

Tim vor der Brück and Stephan Busemann. October, 2006. Automatic Error Correction for Tree-Mapping Grammars. Proceedings of KONVENS 2006, 1–8.

BibTeX

@inproceedings{vor:der:Brueck:Busemann:2006,
  author    = {vor der Brück, Tim and Busemann, Stephan},
  title     = {Automatic Error Correction for Tree-Mapping Grammars},
  booktitle = {Proceedings of KONVENS 2006},
  pages     = {1--8},
  address   = {Konstanz, Germany},
  abstract  = {Tree mapping grammars are used in natural language generation
               (NLG) to map non-linguistic input onto a derivation tree from
               which the target text can be trivially read off as the terminal
               yield. Such grammars may consist of a large number of rules. Finding
               errors is quite tedious and sometimes very time-consuming. Often
               the generation fails because the relevant input subtree is not
               specified correctly. This work describes a method to detect and
               correct wrong assignments of input subtrees to grammar categories
               by cross-validating grammar rules with the given input structures.
               The result is implemented in a grammar development workbench and
               helps accelerating the grammar writer's work considerably.},
  annote    = {editor: Miriam Butt},
  isbn      = {3-89318-050-8},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck-busemann-konvens06.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/brueck-busemann-konvens06.pdf},
  website   = {http://www.dfki.de/lt/publication_show.php?id=3602},
  year      = {2006}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Marc Staudacher. September, 2006. Measuring and Reconstructing Pointing in Visual Contexts. brandial '06 – Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, 82–89.

BibTeX

@inproceedings{Kranstedt:et:al:2006:c,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Staudacher, Marc},
  title     = {Measuring and Reconstructing Pointing in Visual Contexts},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {82--89},
  address   = {Potsdam},
  publisher = {Universit{\"a}tsverlag Potsdam},
  abstract  = {We describe an experiment to gather original data on geometrical
               aspects of pointing. In particular, we are focusing upon the concept
               of the pointing cone, a geometrical model of a pointing’s extension.
               In our setting we employed methodological and technical procedures
               of a new type to integrate data from annotations as well as from
               tracker recordings. We combined exact information on position
               and orientation with rater’s classifications. Our first results
               seem to challenge classical linguistic and philosophical theories
               of demonstration in that they advise to separate pointings from
               reference.},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/measure.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.144.8472},
  year      = {2006}
}

Andy Lücking, Hannes Rieser and Marc Staudacher. September, 2006. Multi-modal Integration for Gesture and Speech. brandial '06 – Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, 106–113.

BibTeX

@inproceedings{Luecking:Rieser:Staudacher:2006:a,
  author    = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
  title     = {Multi-modal Integration for Gesture and Speech},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {106--113},
  address   = {Potsdam},
  publisher = {Universit{\"a}tsverlag Potsdam},
  abstract  = {Demonstratives, in particular gestures that 'only' accompany speech,
               are not a big issue in current theories of grammar. If we deal
               with gestures, fixing their function is one big problem, the other
               one is how to integrate the representations originating from different
               channels and, ultimately, how to determine their composite meanings.
               The growing interest in multi-modal settings, computer simulations,
               human-machine interfaces and VR-applications increases the need
               for theories of multi-modal structures and events. In our workshop-contribution
               we focus on the integration of multi-modal contents and investigate
               different approaches dealing with this problem such as Johnston
               et al. (1997) and Johnston (1998), Johnston and Bangalore (2000),
               Chierchia (1995), Asher (2005), and Rieser (2005).},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mm-int-brandial-final.pdf},
  year      = {2006}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Ipke Wachsmuth. 2006. Deictic Object Reference in Task-oriented Dialogue. Situated Communication, 155–207.

BibTeX

@incollection{Kranstedt:et:al:2006:b,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Wachsmuth, Ipke},
  title     = {Deictic Object Reference in Task-oriented Dialogue},
  booktitle = {Situated Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Gert Rickheit and Ipke Wachsmuth},
  pages     = {155--207},
  address   = {Berlin},
  abstract  = {This chapter presents an original approach towards a detailed
               understanding of the usage of pointing gestures accompanying referring
               expressions. This effort is undertaken in the context of human-machine
               interaction integrating empirical studies, theory of grammar and
               logics, and simulation techniques. In particular, we take steps
               to classify the role of pointing in deictic expressions and to
               model the focussed area of pointing gestures, the so-called pointing
               cone. This pointing cone serves as a central concept in a formal
               account of multi-modal integration at the linguistic speech-gesture
               interface as well as in a computational model of processing multi-modal
               deictic expressions.},
  keywords  = {own},
  website   = {http://pub.uni-bielefeld.de/publication/1894485},
  year      = {2006}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Ipke Wachsmuth. 2006. Deixis: How to Determine Demonstrated Objects Using a Pointing Cone. Gesture in Human-Computer Interaction and Simulation, 300–311.

BibTeX

@incollection{Kranstedt:et:al:2006:a,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Wachsmuth, Ipke},
  title     = {Deixis: How to Determine Demonstrated Objects Using a Pointing Cone},
  booktitle = {Gesture in Human-Computer Interaction and Simulation},
  publisher = {Springer},
  editor    = {Sylvie Gibet and Nicolas Courty and Jean-Francois Kamp},
  pages     = {300--311},
  address   = {Berlin},
  abstract  = {We present a collaborative approach towards a detailed understanding
               of the usage of pointing gestures accompanying referring expressions.
               This effort is undertaken in the context of human-machine interaction
               integrating empirical studies, theory of grammar and logics, and
               simulation techniques. In particular, we attempt to measure the
               precision of the focussed area of a pointing gesture, the so-called
               pointing cone. The pointing cone serves as a central concept in
               a formal account of multi-modal integration at the linguistic
               speech-gesture interface as well as in a computational model of
               processing multi-modal deictic expressions.},
  anote     = {6th International Gesture Workshop, Berder Island,
                   France, 2005, Revised Selected Papers},
  keywords  = {own},
  website   = {http://www.springerlink.com/content/712036hp5v2q8408/},
  year      = {2006}
}

Thies Pfeiffer, Alfred Kranstedt and Andy Lücking. 2006. Sprach-Gestik Experimente mit IADE, dem Interactive Augmented Data Explorer. Proceedings: Dritter Workshop Virtuelle und Erweiterte Realität der GI-Fachgruppe VR/AR.

BibTeX

@inproceedings{Pfeiffer:Kranstedt:Luecking:2006,
  author    = {Pfeiffer, Thies and Kranstedt, Alfred and Lücking, Andy},
  title     = {Sprach-Gestik Experimente mit IADE, dem Interactive Augmented Data Explorer},
  booktitle = {Proceedings: Dritter Workshop Virtuelle und Erweiterte Realit{\"a}t
               der GI-Fachgruppe VR/AR},
  address   = {Koblenz},
  abstract  = {Für die empirische Erforschung natürlicher menschlicher Kommunikation
               sind wir auf die Akquise und Auswertung umfangreicher Daten angewiesen.
               Die Modalit{\"a}ten, über die sich Menschen ausdrücken können,
               sind sehr unterschiedlich - und genauso verschieden sind die Repr{\"a}sentationen,
               mit denen sie für die Empirie verfügbar gemacht werden können.
               Für eine Untersuchung des Zeigeverhaltens bei der Referenzierung
               von Objekten haben wir mit IADE ein Framework für die Aufzeichnung,
               Analyse und Resimulation von Sprach-Gestik Daten entwickelt. Mit
               dessen Hilfe können wir für unsere Forschung entscheidende Fortschritte
               in der linguistischen Experimentalmethodik machen.},
  keywords  = {own},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Pfeiffer-Kranstedt-Luecking-IADE.pdf},
  website   = {http://pub.uni-bielefeld.de/publication/2426853},
  year      = {2006}
}

Andy Lücking, Hannes Rieser and Marc Staudacher. September, 2006. SDRT and Multi-modal Situated Communication. brandial '06 – Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, 72–79.

BibTeX

@inproceedings{Luecking:Rieser:Stauchdacher:2006:b,
  author    = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
  title     = {SDRT and Multi-modal Situated Communication},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {72--79},
  publisher = {Universit{\"a}tsverlag Potsdam},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/sdrt-sitcomm-brandial-final.pdf},
  year      = {2006}
}

Md. Zahurul Islam and Mumit Khan. 2006. JKimmo: A Multilingual Computational Morphology Framework for PC-KIMMO. 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh.

BibTeX

@inproceedings{Zahurul:Khan:2006,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {JKimmo: A Multilingual Computational Morphology Framework for PC-KIMMO},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  abstract  = {Morphological analysis is of fundamental interest in computational
               linguistics and language processing. While there are established
               morphological analyzers for mostly Western and a few other languages
               using localized interfaces, the same cannot be said for Indic
               and other less-studied languages for which language processing
               is just beginning. There are three primary obstacles to computational
               morphological analysis of these less-studied languages: the generative
               rules that define the language morphology, the morphological processor,
               and the computational interface that a linguist can use to experiment
               with the generative rules. In this paper, we present JKimmo, a
               multilingual morphological open-source framework that uses the
               PC-KIMMO two-level morphological processor and provides a localized
               interface for Bangla morphological analysis. We then apply Jkimmo
               to Bangla computational morphology, demonstrating both its recognition
               and generation capabilities. Jkimmo’s internationalization (i18n)
               frame-work allows easy localization in other languages as well,
               using a property file for the interface definitions and a transliteration
               scheme for the analysis.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/JKimmo_-A_Multilingual_Computational_Morphology_Framework_for_PC-KIMMO.pdf},
  timestamp = {2011.08.02},
  website   = {https://www.researchgate.net/publication/237728403_JKimmo_A_Multilingual_Computational_Morphology_Framework_for_PC-KIMMO},
  year      = {2006}
}

Tofazzal Rownok, Md. Zahurul Islam and Mumit Khan. 2006. Bangla Text Input and Rendering Support for Short Message Service on Mobile Devices. 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh.

BibTeX

@inproceedings{Rownok:Zahurul:Khan:2006,
  author    = {Rownok, Tofazzal and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Bangla Text Input and Rendering Support for Short Message Service
               on Mobile Devices},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  abstract  = {Technology is the most important thing that involve in our everyday
               life. It is involving in almost every aspect of life like communication,
               work, shopping, recreation etc. Communication through mobile devices
               is the most effective and easy way now a day. It is faster, easier
               and you can communicate whenever you want from any-where. Mobile
               messaging or short message service is one of the popular ways
               to communicate using mobile devices. It is a big challenge to
               write and display Bangla characters on mobile devices. In this
               paper, we describe a Bangla text input method and rendering support
               on mobile devices for short message service.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Bangla_Text_Input_and_Rendering_Support_for_Short_Message_Service_on_Mobile_Devices.pdf},
  timestamp = {2011.08.02},
  year      = {2006}
}

Yeasir Arafat, Md. Zahurul Islam and Mumit Khan. 2006. Analysis and Observations From a Bangla news corpus. 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh.

BibTeX

@inproceedings{Arafat:Zahurul:Khan:2006,
  author    = {Arafat, Yeasir and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Analysis and Observations From a Bangla news corpus},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Analysis_and_Observations_From_a_Bangla_news_corpus.pdf},
  timestamp = {2011.08.02},
  year      = {2006}
}

Rüdiger Gleim. 2006. HyGraph - Ein Framework zur Extraktion, Repräsentation und Analyse webbasierter Hypertextstrukturen. Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Tagung 2005, Universität Bonn, 42–53.

BibTeX

@inproceedings{Gleim:2006,
  author    = {Gleim, Rüdiger},
  title     = {HyGraph - Ein Framework zur Extraktion, Repr{\"a}sentation und
               Analyse webbasierter Hypertextstrukturen},
  booktitle = {Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen.
               Beitr{\"a}ge zur GLDV-Tagung 2005, Universit{\"a}t Bonn},
  editor    = {Fisseni, Bernhard and Schmitz, Hans-Christian and Schröder, Bernhard
               and Wagner, Petra},
  pages     = {42-53},
  address   = {Frankfurt a. M.},
  publisher = {Lang},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/GLDV2005-HyGraph-Framework.pdf},
  website   = {https://www.researchgate.net/publication/268294000_HyGraph__Ein_Framework_zur_Extraktion_Reprsentation_und_Analyse_webbasierter_Hypertextstrukturen},
  year      = {2006}
}

Alexander Mehler. 2006. Text Linkage in the Wiki Medium – A Comparative Study. Proceedings of the EACL Workshop on New Text – Wikis and blogs and other dynamic text sources, April 3-7, 2006, Trento, Italy, 1–8.

BibTeX

@inproceedings{Mehler:2006:c,
  author    = {Mehler, Alexander},
  title     = {Text Linkage in the Wiki Medium – A Comparative Study},
  booktitle = {Proceedings of the EACL Workshop on New Text – Wikis and blogs
               and other dynamic text sources, April 3-7, 2006, Trento, Italy},
  editor    = {Karlgren, Jussi},
  pages     = {1-8},
  abstract  = {Workshop organizer: Jussi Karlgren},
  pdf       = {http://www.aclweb.org/anthology/W06-2801},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.6390},
  year      = {2006}
}

Alexander Mehler. 2006. Stratified Constraint Satisfaction Networks in Synergetic Multi-Agent Simulations of Language Evolution. Artificial Cognition Systems, 140–174.

BibTeX

@incollection{Mehler:2006:e,
  author    = {Mehler, Alexander},
  title     = {Stratified Constraint Satisfaction Networks in Synergetic Multi-Agent
               Simulations of Language Evolution},
  booktitle = {Artificial Cognition Systems},
  publisher = {Idea Group Inc.},
  editor    = {Loula, Angelo and Gudwin, Ricardo and Queiroz, João},
  pages     = {140-174},
  address   = {Hershey},
  abstract  = {Ehedem = Mehler:2005:e},
  year      = {2006}
}

Alexander Mehler and Lorenz Sichelschmidt. 2006. Reconceptualizing Latent Semantic Analysis in Terms of Complex Network Theory. A Corpus-Linguistic Approach. 2nd International Conference of the German Cognitive Linguistics Association – Theme Session: Cognitive-Linguistic Approaches: What can we gain by computational treatment of data? 5.-7. Oktober 2006, Ludwig-Maximilians-Universität München, 23–26.

BibTeX

@inproceedings{Mehler:Sichelschmidt:2006,
  author    = {Mehler, Alexander and Sichelschmidt, Lorenz},
  title     = {Reconceptualizing Latent Semantic Analysis in Terms of Complex
               Network Theory. A Corpus-Linguistic Approach},
  booktitle = {2nd International Conference of the German Cognitive Linguistics
               Association – Theme Session: Cognitive-Linguistic Approaches:
               What can we gain by computational treatment of data? 5.-7. Oktober
               2006, Ludwig-Maximilians-Universit{\"a}t München},
  pages     = {23-26},
  editors   = {Alonge, Antonietta and Lönneker-Rodman, Birte},
  pdf       = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.87.5069&rep=rep1&type=pdf},
  year      = {2006}
}

Alexander Mehler, Matthias Dehmer and Rüdiger Gleim. 2006. Towards Logical Hypertext Structure - A Graph-Theoretic Perspective. Proceedings of the Fourth International Workshop on Innovative Internet Computing Systems (I2CS '04), 136–150.

BibTeX

@inproceedings{Mehler:Dehmer:Gleim:2006,
  author    = {Mehler, Alexander and Dehmer, Matthias and Gleim, Rüdiger},
  title     = {Towards Logical Hypertext Structure - A Graph-Theoretic Perspective},
  booktitle = {Proceedings of the Fourth International Workshop on Innovative
               Internet Computing Systems (I2CS '04)},
  editor    = {Böhme, Thomas and Heyer, Gerhard},
  series    = {Lecture Notes in Computer Science 3473},
  pages     = {136-150},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {Facing the retrieval problem according to the overwhelming set
               of documents online the adaptation of text categorization to web
               units has recently been pushed. The aim is to utilize categories
               of web sites and pages as an additional retrieval criterion. In
               this context, the bag-of-words model has been utilized just as
               HTML tags and link structures. In spite of promising results this
               adaptation stays in the framework of IR specific models since
               it neglects the content-based structuring inherent to hypertext
               units. This paper approaches hypertext modelling from the perspective
               of graph-theory. It presents an XML-based format for representing
               websites as hypergraphs. These hypergraphs are used to shed light
               on the relation of hypertext structure types and their web-based
               instances. We place emphasis on two characteristics of this relation:
               In terms of realizational ambiguity we speak of functional equivalents
               to the manifestation of the same structure type. In terms of polymorphism
               we speak of a single web unit which manifests different structure
               types. It is shown that polymorphism is a prevalent characteristic
               of web-based units. This is done by means of a categorization
               experiment which analyses a corpus of hypergraphs representing
               the structure and content of pages of conference websites. On
               this background we plead for a revision of text representation
               models by means of hypergraphs which are sensitive to the manifold
               structuring of web documents.},
  website   = {http://rd.springer.com/chapter/10.1007/11553762_14},
  year      = {2006}
}

Alexander Mehler. 2006. In Search of a Bridge between Network Analysis in Computational Linguistics and Computational Biology – A Conceptual Note. Proceedings of the 2006 International Conference on Bioinformatics & Computational Biology (BIOCOMP '06), June 26, 2006, Las Vegas, USA, 496–500.

BibTeX

@inproceedings{Mehler:2006:a,
  author    = {Mehler, Alexander},
  title     = {In Search of a Bridge between Network Analysis in Computational
               Linguistics and Computational Biology – A Conceptual Note},
  booktitle = {Proceedings of the 2006 International Conference on Bioinformatics
               \& Computational Biology (BIOCOMP '06), June 26, 2006, Las Vegas,
               USA},
  editor    = {Arabnia, Hamid R. and Valafar, Homayoun},
  pages     = {496-500},
  pdf       = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.92.9842&rep=rep1&type=pdf},
  year      = {2006}
}

2005

Matthias Dehmer, Frank Emmert-Streib, Alexander Mehler, Jürgen Kilian and Max Mühlhäuser. 2005. Application of a similarity measure for graphs to web-based document structures. Proceedings of VI. International Conference on Enformatika, Systems Sciences and Engineering, Budapest, Hungary, October 2005, International Academy of Sciences: Enformatika 8 (2005), 77–81.

BibTeX

@inproceedings{Dehmer:Emmert:Streib:Mehler:Kilian:Muehlhaeuser:2005,
  author    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander
               and Kilian, Jürgen and Mühlh{\"a}user, Max},
  title     = {Application of a similarity measure for graphs to web-based document structures},
  booktitle = {Proceedings of VI. International Conference on Enformatika, Systems
               Sciences and Engineering, Budapest, Hungary, October 2005, International
               Academy of Sciences: Enformatika 8 (2005)},
  pages     = {77-81},
  abstract  = {Due to the tremendous amount of information provided by the World
               Wide Web (WWW) developing methods for mining the structure of
               web-based documents is of considerable interest. In this paper
               we present a similarity measure for graphs representing web-based
               hypertext structures. Our similarity measure is mainly based on
               a novel representation of a graph as linear integer strings, whose
               components represent structural properties of the graph. The similarity
               of two graphs is then defined as the optimal alignment of the
               underlying property strings. In this paper we apply the well known
               technique of sequence alignments for solving a novel and challenging
               problem: Measuring the structural similarity of generalized trees.
               In other words: We first transform our graphs considered as high
               dimensional objects in linear structures. Then we derive similarity
               values from the alignments of the property strings in order to
               measure the structural similarity of generalized trees. Hence,
               we transform a graph similarity problem to a string similarity
               problem for developing a efficient graph similarity measure. We
               demonstrate that our similarity measure captures important structural
               information by applying it to two different test sets consisting
               of graphs representing web-based document structures.},
  pdf       = {http://waset.org/publications/15299/application-of-a-similarity-measure-for-graphs-to-web-based-document-structures},
  website   = {https://www.researchgate.net/publication/238687277_Application_of_a_Similarity_Measure_for_Graphs_to_Web-based_Document_Structures},
  year      = {2005}
}

Alexander Mehler. 2005. Preliminaries to an Algebraic Treatment of Lexical Associations. Learning and Extending Lexical Ontologies. Proceedings of the Workshop at the 22nd International Conference on Machine Learning (ICML '05), August 7-11, 2005, Universität Bonn, Germany, 41–47.

BibTeX

@inproceedings{Mehler:2005:c,
  author    = {Mehler, Alexander},
  title     = {Preliminaries to an Algebraic Treatment of Lexical Associations},
  booktitle = {Learning and Extending Lexical Ontologies. Proceedings of the
               Workshop at the 22nd International Conference on Machine Learning
               (ICML '05), August 7-11, 2005, Universit{\"a}t Bonn, Germany},
  editor    = {Biemann, Chris and Paa{\ss}, Gerhard},
  pages     = {41-47},
  year      = {2005}
}

Alexander Mehler and Rüdiger Gleim. 2005. Polymorphism in Generic Web Units. A corpus linguistic study. Proceedings of Corpus Linguistics '05, July 14-17, 2005, University of Birmingham, Great Britian, Corpus Linguistics Conference Series 1(1).

BibTeX

@inproceedings{Mehler:Gleim:2005:a,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {Polymorphism in Generic Web Units. A corpus linguistic study},
  booktitle = {Proceedings of Corpus Linguistics '05, July 14-17, 2005, University
               of Birmingham, Great Britian},
  volume    = {Corpus Linguistics Conference Series 1(1)},
  abstract  = {Corpus linguistics and related disciplines which focus on statistical
               analyses of textual units have substantial need for large corpora.
               More speciﬁcally, genre or register speciﬁc corpora are needed
               which allow studying variations in language use. Along with the
               incredible growth of the internet, the web became an important
               source of linguistic data. Of course, web corpora face the same
               problem of acquiring genre speciﬁc corpora. Amongst other things,
               web mining is a framework of methods for automatically assigning
               category labels to web units and thus may be seen as a solution
               to this corpus acquisition problem as far as genre categories
               are applied. The paper argues that this approach is faced with
               the problem of a many-to-many relation between expression units
               on the one hand and content or function units on the other hand.
               A quantitative study is performed which supports the argumentation
               that functions of web-based communication are very often concentrated
               on single web pages and thus interfere any effort of directly
               applying the classical apparatus of categorization on web page
               level. The paper outlines a two-level algorithm as an alternative
               approach to category assignment which is sensitive to genre speciﬁc
               structures and thus may be used to tackle the problem of acquiring
               genre speciﬁc corpora.},
  issn      = {1747-9398},
  pdf       = {http://www.birmingham.ac.uk/Documents/college-artslaw/corpus/conference-archives/2005-journal/Thewebasacorpus/AlexanderMehlerandRuedigerGleimCorpusLinguistics2005.pdf},
  year      = {2005}
}

Alexander Mehler and Christian Wolff. 2005. Einleitung: Perspektiven und Positionen des Text Mining. Journal for Language Technology and Computational Linguistics (JLCL), 20(1):1–18.

BibTeX

@article{Mehler:Wolff:2005:b,
  author    = {Mehler, Alexander and Wolff, Christian},
  title     = {Einleitung: Perspektiven und Positionen des Text Mining},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {20},
  number    = {1},
  pages     = {1-18},
  abstract  = {Beitr{\"a}ge zum Thema Text Mining beginnen vielfach mit dem Hinweis
               auf die enorme Zunahme online verfügbarer Dokumente, ob nun im
               Internet oder in Intranets (Losiewicz et al. 2000; Merkl 2000;
               Feldman 2001; Mehler 2001; Joachims \& Leopold 2002). Der hiermit
               einhergehenden „Informationsflut“ wird das Ungenügen des Information
               Retrieval (IR) bzw. seiner g{\"a}ngigen Verfahren der Informationsaufbereitung
               und Informationserschlie{\ss}ung gegenübergestellt. Es wird bem{\"a}ngelt,
               dass sich das IR weitgehend darin erschöpft, Teilmengen von Textkollektionen
               auf Suchanfragen hin aufzufinden und in der Regel blo{\ss} listenförmig
               anzuordnen. Das auf diese Weise dargestellte Spannungsverh{\"a}ltnis
               von Informationsexplosion und Defiziten bestehender IR-Verfahren
               bildet den Hintergrund für die Entwicklung von Verfahren zur automatischen
               Verarbeitung textueller Einheiten, die sich st{\"a}rker an den
               Anforderungen von Informationssuchenden orientieren. Anders ausgedrückt:
               Mit der Einführung der Neuen Medien w{\"a}chst die Bedeutung digitalisierter
               Dokumente als Prim{\"a}rmedium für die Verarbeitung, Verbreitung
               und Verwaltung von Information in öffentlichen und betrieblichen
               Organisationen. Dabei steht wegen der Menge zu verarbeitender
               Einheiten die Alternative einer intellektuellen Dokumenterschlie{\ss}ung
               nicht zur Verfügung. Andererseits wachsen die Anforderung an eine
               automatische Textanalyse, der das klassische IR nicht gerecht
               wird. Der Mehrzahl der hiervon betroffenen textuellen Einheiten
               fehlt die explizite Strukturiertheit formaler Datenstrukturen.
               Vielmehr weisen sie je nach Text- bzw. Dokumenttyp ganz unterschiedliche
               Strukturierungsgrade auf. Dabei korreliert die Flexibilit{\"a}t
               der Organisationsziele negativ mit dem Grad an explizierter Strukturiertheit
               und positiv mit der Anzahl jener Texte und Texttypen (E-Mails,
               Memos, Expertisen, technische Dokumentationen etc.), die im Zuge
               ihrer Realisierung produziert bzw. rezipiert werden. Vor diesem
               Hintergrund entsteht ein Bedarf an Texttechnologien, die ihren
               Benutzern nicht nur „intelligente“ Schnittstellen zur Textrezeption
               anbieten, sondern zugleich auf inhaltsorientierte Textanalysen
               zielen, um auf diese Weise aufgabenrelevante Daten explorieren
               und kontextsensitiv aufbereiten zu helfen. Das Text Mining ist
               mit dem Versprechen verbunden, eine solche Technologie darzustellen
               bzw. sich als solche zu entwickeln. Dieser einheitlichen Problembeschreibung
               stehen konkurrierende Textmining-Spezifikationen gegenüber, was
               bereits die Vielfalt der Namensgebungen verdeutlicht. So finden
               sich neben der Bezeichnung Text Mining (Joachims \& Leopold 2002;
               Tan 1999) die Alternativen • Text Data Mining (Hearst 1999b; Merkl
               2000), • Textual Data Mining (Losiewicz et al. 2000), • Text Knowledge
               Engineering (Hahn \& Schnattinger 1998), Knowledge Discovery in
               Texts (Kodratoff 1999) oder Knowledge Discovery in Textual Databases
               (Feldman \& Dagan 1995). Dabei l{\"a}sst bereits die Namensgebung
               erkennen, dass es sich um Analogiebildungen zu dem (nur unwesentlich
               {\"a}lteren) Forschungsgebiet des Data Mining (DM; als Bestandteil
               des Knowledge Discovery in Databases – KDD) handelt. Diese Namensvielfalt
               findet ihre Entsprechung in widerstreitenden Aufgabenzuweisungen.
               So setzt beispielsweise Sebastiani (2002) Informationsextraktion
               und Text Mining weitgehend gleich, wobei er eine Schnittmenge
               zwischen Text Mining und Textkategorisierung ausmacht (siehe auch
               Dörre et al. 1999). Demgegenüber betrachten Kosala \& Blockeel
               (2000) Informationsextraktion und Textkategorisierung lediglich
               als Teilbereiche des ihrer Ansicht nach umfassenderen Text Mining,
               w{\"a}hrend Hearst (1999a) im Gegensatz hierzu Informationsextraktion
               und Textkategorisierung explizit aus dem Bereich des explorativen
               Text Mining ausschlie{\ss}t.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_wolff_2005_b.pdf},
  website   = {http://epub.uni-regensburg.de/6844/},
  year      = {2005}
}

Alexander Mehler. 2005. Korpuslinguistik. Ed. by Alexander Mehler.Journal for Language Technology and Computational Linguistics (JLCL), 20(2).

BibTeX

@book{Mehler:2005:e,
  author    = {Mehler, Alexander},
  editor    = {Mehler, Alexander},
  title     = {Korpuslinguistik},
  volume    = {20(2)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Korpuslinguistik.png},
  pagetotal = {97},
  website   = {http://www.jlcl.org/2005_Heft2/LDV_Forum_Band_20_Heft_2.pdf},
  year      = {2005}
}

Alexander Mehler, Matthias Dehmer and Rüdiger Gleim. 2005. Zur Automatischen Klassifikation von Webgenres. Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Frühjahrstagung '05, 10. März – 01. April 2005, Universität Bonn, 158–174.

BibTeX

@inproceedings{Mehler:Dehmer:Gleim:2005,
  author    = {Mehler, Alexander and Dehmer, Matthias and Gleim, Rüdiger},
  title     = {Zur Automatischen Klassifikation von Webgenres},
  booktitle = {Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen.
               Beitr{\"a}ge zur GLDV-Frühjahrstagung '05, 10. M{\"a}rz – 01.
               April 2005, Universit{\"a}t Bonn},
  editor    = {Fisseni, Bernhard and Schmitz, Hans-Christina and Schröder, Bernhard
               and Wagner, Petra},
  pages     = {158-174},
  address   = {Frankfurt a. M.},
  publisher = {Lang},
  year      = {2005}
}

Alexander Mehler and Christian Wolff. 2005. Text Mining. Ed. by Alexander Mehler and Christian Wolff.Journal for Language Technology and Computational Linguistics (JLCL), 20(1). GSCL.

BibTeX

@book{Mehler:Wolff:2005:a,
  author    = {Mehler, Alexander and Wolff, Christian},
  editor    = {Mehler, Alexander and Wolff, Christian},
  title     = {Text Mining},
  publisher = {GSCL},
  volume    = {20(1)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextMining.png},
  pagetotal = {143},
  website   = {http://www.jlcl.org/2005_Heft1/LDV-Forum1.2005.pdf},
  year      = {2005}
}

Md. Zahurul Islam and Mumit Khan. 2005. Teaching Compiler Development to Undergraduates using a Template Based Approach. 8th International Conference on Computer and Information Technology (ICCIT 2005), Dhaka, Bangladesh.

BibTeX

@inproceedings{Zahurul:Khan:2005,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {Teaching Compiler Development to Undergraduates using a Template Based Approach},
  booktitle = {8th International Conference on Computer and Information Technology
               (ICCIT 2005), Dhaka, Bangladesh},
  abstract  = {Compiler Design remains one of the most dreaded courses in any
               undergraduate Computer Science curriculum, due in part to the
               complexity and the breadth of the material covered in a typical
               14-15 week semester time frame. The situation is further complicated
               by the fact that most undergraduates have never implemented a
               large enough software package that is needed for a working compiler,
               and to do so in such a short time span is a challenge indeed.
               This necessitates changes in the way we teach compilers, and specifically
               in ways we set up the project for the Compiler Design course at
               the undergraduate level. We describe a template based method for
               teaching compiler design and implementation to the undergraduates,
               where the students fill in the blanks in a set of templates for
               each phase of the compiler, starting from the lexical scanner
               to the code generator. Compilers for new languages can be implemented
               by modifying only the parts necessary to implement the syntax
               and the semantics of the language, leaving much of the remaining
               environment as is. The students not only learn how to design the
               various phases of the compiler, but also learn the software design
               and engineering techniques for implementing large software systems.
               In this paper, we describe a compiler teaching methodology that
               implements a full working compiler for an imperative C-like programming
               language with backend code generators for MIPS, Java Virtual Machine
               (JVM) and Microsoft’s .NET Common Language Runtime (CLR).},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Teaching_Compiler_Development_to_Undergraduates_using_a_Template_Based_Approach.pdf},
  timestamp = {2011.08.02},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.173.1323},
  year      = {2005}
}

Alexander Mehler. 2005. Eigenschaften der textuellen Einheiten und Systeme / Properties of Textual Units and Systems. Quantitative Linguistik. Ein internationales Handbuch / Quantitative Linguistics. An International Handbook, 325–348.

BibTeX

@incollection{Mehler:2005:b,
  author    = {Mehler, Alexander},
  title     = {Eigenschaften der textuellen Einheiten und Systeme / Properties
               of Textual Units and Systems},
  booktitle = {Quantitative Linguistik. Ein internationales Handbuch / Quantitative
               Linguistics. An International Handbook},
  publisher = {De Gruyter},
  editor    = {Köhler, Reinhard and Altmann, Gabriel and Piotrowski, Raijmund G.},
  pages     = {325-348},
  address   = {Berlin/New York},
  year      = {2005}
}

Alexander Mehler. 2005. Lexical Chaining as a Source of Text Chaining. Proceedings of the 1st Computational Systemic Functional Grammar Conference, University of Sydney, Australia, 12–21.

BibTeX

@inproceedings{Mehler:2005:d,
  author    = {Mehler, Alexander},
  title     = {Lexical Chaining as a Source of Text Chaining},
  booktitle = {Proceedings of the 1st Computational Systemic Functional Grammar
               Conference, University of Sydney, Australia},
  editor    = {Patrick, Jon and Matthiessen, Christian},
  pages     = {12-21},
  abstract  = {July 16, 2005,},
  pdf       = {http://www.www.texttechnologylab.org/media/pdf/CohesionTrees1.pdf},
  year      = {2005}
}

Alexander Mehler. 2005. Zur textlinguistischen Fundierung der Text- und Korpuskonversion. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 1:29–53.

BibTeX

@article{Mehler:2005:a,
  author    = {Mehler, Alexander},
  title     = {Zur textlinguistischen Fundierung der Text- und Korpuskonversion},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {1},
  pages     = {29-53},
  abstract  = {Die automatische Konversion von Texten in Hypertexte ist mit der
               Erwartung verbunden, computerbasierte Rezeptionshilfen zu gewinnen.
               Dies betrifft insbesondere die Bew{\"a}ltigung der ungeheuren
               Menge an Fachliteratur im Rahmen der Wissenschaftskommunikation.
               Von einem thematisch relevanten Text zu einem thematisch verwandten
               Text per Hyperlink direkt gelangen zu können, stellt einen Anspruch
               dar, dessen Erfüllung mittels digitaler Bibliotheken n{\"a}her
               gerückt zu sein scheint. Doch wie lassen sich die Kriterien, nach
               denen Texte automatisch verlinkt werden, genauer begründen? Dieser
               Beitrag geht dieser Frage aus der Sicht textlinguistischer Modellbildungen
               nach. Er zeigt, dass parallel zur Entwicklung der Textlinguistik,
               wenn auch mit einer gewissen Verzögerung, Konversionsans{\"a}tze
               entwickelt wurden, die sich jeweils an einer bestimmten Stufe
               des Textbegriffs orientieren. Der Beitrag weist nicht nur das
               diesen Ans{\"a}tzen gemeinsame Fundament in Form der so genannten
               Explikationshypothese nach, sondern verweist zugleich auf grundlegende
               Automatisierungsdefizite, die mit ihnen verbunden sind. Mit systemisch-funktionalen
               Hypertexten wird schlie{\ss}lich ein Ansatz skizziert, der darauf
               zielt, den Anspruch nach textlinguistischer Fundierung und Automatisierbarkeit
               zu vereinen.},
  publisher = {GSCL},
  year      = {2005}
}

Andy Lücking and Jens Stegmann. 2005. Assessing Reliability on Annotations (2): Statistical Results for the DeiKon Scheme.

BibTeX

@techreport{Luecking:Stegmann:2005,
  author    = {Andy L\"{u}cking and Jens Stegmann},
  title     = {Assessing Reliability on Annotations (2): Statistical Results
               for the \textsc{DeiKon} Scheme},
  institution = {SFB 360},
  year      = {2005},
  number    = {3},
  address   = {Universit\"{a}t Bielefeld},
  url       = {http://www.sfb360.uni-bielefeld.de/reports/2005/2005-03.html}
}

Jens Stegmann and Andy Lücking. 2005. Assessing Reliability on Annotations (1): Theoretical Considerations.

BibTeX

@techreport{Stegmann:Luecking:2005,
  author    = {Jens Stegmann and Andy L\"{u}cking},
  title     = {Assessing Reliability on Annotations (1): Theoretical Considerations},
  institution = {SFB 360},
  year      = {2005},
  number    = {2},
  address   = {Universit\"{a}t Bielefeld},
  url       = {http://www.sfb360.uni-bielefeld.de/reports/2005/2005-02.html}
}

2004

Alexander Mehler. 2004. Textmining. Texttechnologie. Perspektiven und Anwendungen, 329–352.

BibTeX

@incollection{Mehler:2004:h,
  author    = {Mehler, Alexander},
  title     = {Textmining},
  booktitle = {Texttechnologie. Perspektiven und Anwendungen},
  publisher = {Stauffenburg},
  editor    = {Lobin, Henning and Lemnitzer, Lothar},
  pages     = {329-352},
  address   = {Tübingen},
  year      = {2004}
}

Alexander Mehler and Henning Lobin. 2004. Aspekte der texttechnologischen Modellierung. Automatische Textanalyse: Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte, 1–21.

BibTeX

@incollection{Mehler:Lobin:2004:b,
  author    = {Mehler, Alexander and Lobin, Henning},
  title     = {Aspekte der texttechnologischen Modellierung},
  booktitle = {Automatische Textanalyse: Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  editor    = {Mehler, Alexander and Lobin, Henning},
  pages     = {1-21},
  address   = {Wiesbaden},
  year      = {2004}
}

Alexander Mehler and Henning Lobin. 2004. Automatische Textanalyse. Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte. Ed. by Alexander Mehler and Henning Lobin. Verlag für Sozialwissenschaften.

BibTeX

@book{Mehler:Lobin:2004:a,
  author    = {Mehler, Alexander and Lobin, Henning},
  editor    = {Mehler, Alexander and Lobin, Henning},
  title     = {Automatische Textanalyse. Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  address   = {Wiesbaden},
  pagetotal = {290},
  website   = {http://www.v-r.de/de/Mehler-Lobin-Automatische-Textanalyse/t/352526527/},
  year      = {2004}
}

Andreas Eisele and Tim vor der Brück. October, 2004. Error-Tolerant Finite-State Lookup for Trademark Search. 27th German Conference on Artificial Intelligence (KI). Springer Best Paper Award.

BibTeX

@inproceedings{Eisele:vor:der:Brueck:2004,
  author    = {Eisele, Andreas and vor der Brück, Tim},
  title     = {Error-Tolerant Finite-State Lookup for Trademark Search},
  booktitle = {27th German Conference on Artificial Intelligence (KI)},
  editor    = {Susanne Biundo},
  address   = {Ulm, Germany},
  publisher = {Springer},
  note      = {Springer Best Paper Award},
  abstract  = {Error-tolerant lookup of words in large vocabularies hasmany potential
               uses, both within and beyond natural language processing (NLP).
               This work describes a generic library for finite-state-based lexical
               lookup, originally designed for NLP-related applications, that
               can be adapted to application-specific error metrics. We show
               how this tool can be used for searching existing trademarks in
               a database, using orthographic and phonetic similarity. We sketch
               a prototypical implementation of a trademark search engine and
               show results of a preliminary evaluation of this system.},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/eisele_brueck_2004.pdf},
  specialnote = {Best Paper Award},
  specialnotewebsite = {http://www.springerlink.com/content/e98tbd0jv9clnh2m/},
  website   = {http://www.springerlink.com/content/e98tbd0jv9clnh2m/},
  year      = {2004}
}

Michael Rohn, Wolfgang Raatz and Tim vor der Brück. October, 2004. Objektive Optimierung der lokalen Wettervorhersage. DACH Meteorologenkonferenz.

BibTeX

@inproceedings{Rohn:Raatz:vor:der:Brueck:2004,
  author    = {Rohn, Michael and Raatz, Wolfgang and vor der Brück, Tim},
  title     = {Objektive Optimierung der lokalen Wettervorhersage},
  booktitle = {DACH Meteorologenkonferenz},
  address   = {Karlsruhe, Germany},
  abstract  = {Die lokale Wettervorhersage umfa{\ss}t einen Zeitraum von 0 bis
               178 Stunden und mu{\ss} daher die unterschiedlichsten Punktinformationen
               aus den Ergebnissen der numerischen Modellierung, konventioneller
               Beobachtungen von Bodenwetterelementen sowie Nowcasting-Produkten
               integrieren. Dabei liefern die Verfahren oft unterschiedliche
               Punktprognosen. Um eine Endvorhersage oder Guidance abzuleiten,
               müssen alle verfügbaren Informationen bezüglich ihrer Qualit{\"a}t
               bewertet werden, sodann eine Auswahl getroffen, und abschlie{\ss}end
               zu einer einzigen Aussage kombiniert werden. Dieses Problem von
               Selektion und Kombination verschiedener Vorhersageinformationen
               wird anschaulich von Winkler 1989 aus der Perspektive der Entscheidungstheorie
               beschrieben. In der t{\"a}glichen Routine arbeit des Vorhersagemeteorologen
               wird diese Integration 'intuitiv' vollzogen, basierend auf seiner
               meteorologischen Erfahrung über die synoptische Situation sowie
               seiner Kenntnisse der lokalen Charakteristika des Prognoseortes.
               Der DWDplant, den Vorhersageproze{\ss} durch ein Verfahren 'ObjektiveOptimierung'
               zu unterstützen, welches eine sog. Objektiv Optimierte Guidance
               OOG erzeugt. Das Verfahren umfa{\ss}t objektive Ans{\"a}tze zur
               Kombination verschiedener Vorhersagedaten sowie die kontinuierliche
               Aktualisierung durch Beobachtungs- und Nowcastingdaten.},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/021_RoRaBr.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/021_RoRaBr.pdf},
  year      = {2004}
}

Andy Lücking, Hannes Rieser and Jens Stegmann. 2004. Statistical Support for the Study of Structures in Multi-Modal Dialogue: Inter-Rater Agreement and Synchronization. Catalog '04—Proceedings of the Eighth Workshop on the Semantics and Pragmatics of Dialogue, 56–63.

BibTeX

@inproceedings{Luecking:Rieser:Stegmann:2004,
  author    = {Lücking, Andy and Rieser, Hannes and Stegmann, Jens},
  title     = {Statistical Support for the Study of Structures in Multi-Modal
               Dialogue: Inter-Rater Agreement and Synchronization},
  booktitle = {Catalog '04---Proceedings of the Eighth Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {Jonathan Ginzburg and Enric Vallduví},
  pages     = {56--63},
  address   = {Barcelona},
  organization = {Department of Translation and Philology, Universitat
                   Pompeu Fabra},
  abstract  = {We present a statistical approach to assess relations that hold
               among speech and pointing gestures in and between turns in task-oriented
               dialogue. The units quantified over are the time-stamps of the
               XML-based annotation of the digital video data. It was found that,
               on average, gesture strokes do not exceed, but are freely distributed
               over the time span of their linguistic affiliates. Further, the
               onset of the affiliate was observed to occur earlier than gesture
               initiation. Moreover, we found that gestures do obey certain appropriateness
               conditions and contribute semantic content ('gestures save words')
               as well. Gestures also seem to play a functional role wrt dialogue
               structure: There is evidence that gestures can contribute to the
               bundle of features making up a turn-taking signal. Some statistical
               results support a partitioning of the domain, which is also reflected
               in certain rating difficulties. However, our evaluation of the
               applied annotation scheme generally resulted in very good agreement},
  keywords  = {own},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/08-lucking-etal.pdf},
  year      = {2004}
}

Alexander Mehler. 2004. A Data-Oriented Model of Context in Hypertext Authoring. Proceedings of the 7th International Workshop on Organisational Semiotics (OS '04), July 19-20, 2004, Setúbal, Portugal, 24–45.

BibTeX

@inproceedings{Mehler:2004:c,
  author    = {Mehler, Alexander},
  title     = {A Data-Oriented Model of Context in Hypertext Authoring},
  booktitle = {Proceedings of the 7th International Workshop on Organisational
               Semiotics (OS '04), July 19-20, 2004, Setúbal, Portugal},
  editor    = {Filipe, Joaquim and Liu, Kecheng},
  pages     = {24-45},
  address   = {Setúbal},
  publisher = {INSTICC},
  pdf       = {http://www.orgsem.org/papers/02.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.121.7944},
  year      = {2004}
}

Alexander Mehler. 2004. Automatische Synthese Internet-basierter Links für digitale Bibliotheken. Osnabrücker Beiträge zur Sprachtheorie. Themenheft Internetbasierte Kommunikation, 68:31–53.

BibTeX

@article{Mehler:2004:b,
  author    = {Mehler, Alexander},
  title     = {Automatische Synthese Internet-basierter Links für digitale Bibliotheken},
  journal   = {Osnabrücker Beitr{\"a}ge zur Sprachtheorie.
                   Themenheft Internetbasierte Kommunikation},
  volume    = {68},
  pages     = {31-53},
  abstract  = {Dieser Beitrag behandelt Verfahren zur automatischen Erzeugung
               von Hyperlinks, wie sie im WWW für die Informationssuche bereitstehen.
               Dabei steht die Frage im Vordergrund, auf welche Weise bestehende
               Verfahren suchrelevante Dokumente bestimmen und von diesen aus
               inhaltsverwandte Dokumente verlinken. Dieser Gegenstand verbindet
               den Bereich des klassischen Information Retrievals (IR) mit einem
               Anwendungsgebiet, das in der Wissenschaftskommunikation unter
               dem Stichwort der digitalen Bibliothek unter Nutzbarmachung des
               Hyperlink-basierten Browsings firmiert. Ein Beispiel hierfür bildet
               die digitale Bibliothek CiteSeer (Lawrence et al. 1999), welche
               das Boolesche Retrieval dadurch erweitert, dass ausgehend von
               Treffern einer Suche jene Dokumente per Link angesteuert werden
               können, welche die aufgefundenen Dokumente zitieren oder von diesen
               zitiert werden. CiteSeer ist also ein System, welches das Schlagwort-basierte
               Querying im Rahmen des klassischen IRs mit dem Hypertext-basierten
               Browsing von Zitaten verknüpft, und zwar zu dem Zweck, die Suche
               wissenschaftlicher Dokumente zu erleichtern. Darüber hinaus verwendet
               es die unter dem Stichwort des Vektorraummodells bekannt gewordene
               Technologie für den wortbasierten Vergleich von Texten. Der Beitrag
               setzt an dieser Stelle an. Er argumentiert, dass Verfahren bereitstehen,
               welche die Anforderung nach inhaltsorientiertem Retrieval mit
               dem inhaltsorientierten Browsing verbinden, mit der Forderung
               also, dass Hyperlinks, die E-Texte als digitalisierte Versionen
               von (wissenschaftlichen) Dokumenten verknüpfen (Storrer 2002),
               Inhalts- und nicht nur Zitat-basiert sind.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2004_b.pdf},
  year      = {2004}
}

Matthias Dehmer, Alexander Mehler and Rüdiger Gleim. 2004. Aspekte der Kategorisierung von Webseiten. INFORMATIK 2004 – Informatik verbindet, Band 2, Beiträge der 34. Jahrestagung der Gesellschaft für Informatik e.V. (GI). Workshop Multimedia-Informationssysteme, 51:39–43.

BibTeX

@inproceedings{Dehmer:Mehler:Gleim:2004,
  author    = {Dehmer, Matthias and Mehler, Alexander and Gleim, Rüdiger},
  title     = {Aspekte der Kategorisierung von Webseiten},
  booktitle = {INFORMATIK 2004 – Informatik verbindet, Band 2, Beitr{\"a}ge der
               34. Jahrestagung der Gesellschaft für Informatik e.V. (GI). Workshop
               Multimedia-Informationssysteme},
  editor    = {Dadam, Peter and Reichert, Manfred},
  volume    = {51},
  series    = {Lecture Notes in Informatics},
  pages     = {39-43},
  publisher = {GI},
  abstract  = {Im Zuge der Web-basierten Kommunikation tritt die Frage auf, inwiefern
               Webpages zum Zwecke ihrer inhaltsorientierten Filterung kategorisiert
               werden können. Diese Studie untersucht zwei Ph{\"a}nomene, welche
               die Bedingung der Möglichkeit einer solchen Kategorisierung betreffen
               (siehe [6]): Mit dem Begriff der funktionalen Aquivalenz beziehen
               wir uns auf das Ph{\"a}nomen, dass dieselbe Funktions- oder Inhaltskategorie
               durch völlig verschiedene Bausteine Web-basierter Dokumente manifestiert
               werden kann. Mit dem Begriff des Polymorphie beziehen wir uns
               auf das Ph{\"a}nomen, dass dasselbe Dokument zugleich mehrere
               Funktions- oder Inhaltskategorien manifestieren kann. Die zentrale
               Hypothese lautet, dass beide Ph{\"a}nomene für Web-basierte Hypertextstrukturen
               charakteristisch sind. Ist dies der Fall, so kann die automatische
               Kategorisierung von Hypertexten [2, 10] nicht mehr als eindeutige
               Zuordnung verstanden werden, bei der einem Dokument genau eine
               Kategorie zugeordnet wird. In diesem Sinne thematisiert das Papier
               die Frage nach der ad{\"a}quaten Modellierung multimedialer Dokumente.},
  pdf       = {http://subs.emis.de/LNI/Proceedings/Proceedings51/GI-Proceedings.51-11.pdf},
  website   = {https://www.researchgate.net/publication/221385316_Aspekte_der_Kategorisierung_von_Webseiten},
  year      = {2004}
}

Alexander Mehler. 2004. Textmodellierung: Mehrstufige Modellierung generischer Bausteine der Textähnlichkeitsmessung. Automatische Textanalyse: Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte, 101–120.

BibTeX

@incollection{Mehler:2003:d,
  author    = {Mehler, Alexander},
  title     = {Textmodellierung: Mehrstufige Modellierung generischer Bausteine
               der Text{\"a}hnlichkeitsmessung},
  booktitle = {Automatische Textanalyse: Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  editor    = {Mehler, Alexander and Lobin, Henning},
  pages     = {101-120},
  address   = {Wiesbaden},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AutomatischeTextanalyse2.jpg},
  year      = {2004}
}

2003

Alexander Mehler. 2003. Methodological Aspects of Computational Semiotics. SEED Journal, 3(3):71–80.

BibTeX

@article{Mehler:2003:b,
  author    = {Mehler, Alexander},
  title     = {Methodological Aspects of Computational Semiotics},
  journal   = {SEED Journal},
  volume    = {3},
  number    = {3},
  pages     = {71-80},
  abstract  = {In the following, elementary constituents of models in computational
               semiotics are outlined. This is done by referring to computer
               simulations as a framework which neither aims to describe artificial
               sign systems (as done in computer semiotics), nor to realize semiotic
               functions in “artificial worlds” (as proposed in “artificial semiosis”).
               Rather, the framework referred to focuses on preconditions of
               computer-based simulations of semiotic processes. Following this
               approach, the paper focuses on methodological aspects of computational
               semiotics.},
  year      = {2003}
}

Alexander Mehler. 2003. Konnotative Textbedeutungen: zur Modellierung struktureller Aspekte der Bedeutungen von Texten. Korpuslinguistische Untersuchungen zur quantitativen und systemtheoretischen Linguistik, 320–347.

BibTeX

@incollection{Mehler:2003,
  author    = {Mehler, Alexander},
  title     = {Konnotative Textbedeutungen: zur Modellierung struktureller Aspekte
               der Bedeutungen von Texten},
  booktitle = {Korpuslinguistische Untersuchungen zur quantitativen und systemtheoretischen
               Linguistik},
  publisher = {Gardez! Verlag},
  editor    = {Köhler, Reinhard},
  pages     = {320-347},
  address   = {Sankt Augustin},
  pdf       = {http://ubt.opus.hbz-nrw.de/volltexte/2004/279/pdf/10_mehler.pdf},
  year      = {2003}
}

Alexander Mehler and Siegfried Reich. 2003. Guided Tours + Trails := Guided Trails. Poster at the 14th ACM Conference on Hypertext and Hypermedia (Hypertext '03), Nottingham, August 26-30, 1–2.

BibTeX

@inproceedings{Mehler:Reich:2003,
  author    = {Mehler, Alexander and Reich, Siegfried},
  title     = {Guided Tours + Trails := Guided Trails},
  booktitle = {Poster at the 14th ACM Conference on Hypertext and Hypermedia
               (Hypertext '03), Nottingham, August 26-30},
  pages     = {1-2},
  website   = {http://www.sigweb.org/Ht03posters},
  year      = {2003}
}

Alexander Mehler. 2003. Ein Kompositionalitätsprinzip für numerische Textsemantiken. Journal for Language Technology and Computational Linguistics (JLCL), 18(1-2):321–337.

BibTeX

@article{Mehler:2003:c,
  author    = {Mehler, Alexander},
  title     = {Ein Kompositionalit{\"a}tsprinzip für numerische Textsemantiken},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {18},
  number    = {1-2},
  pages     = {321-337},
  abstract  = {Der Beitrag beschreibt eine Variante des Kompositionalit{\"a}tsprinzips
               der Bedeutung als Grundprinzip für die numerische Analyse unsystematischer
               Sinnrelationen komplexer Zeichen, das über das Ph{\"a}nomen der
               perspektivischen Interpretation hinaus gebrauchssemantische Bedeutungsaspekte
               berücksichtigt. Ziel ist es, ein theoretisches Fundament für korpusanalytische
               Ans{\"a}tze in der Semantik, die oftmals die linguistische Interpretierbarkeit
               ihrer Analyseergebnisse vermissen lassen, zu umrei{\ss}en. Die
               Spezifikation des Kompositionalit{\"a}tsprinzips erfolgt unter
               Rekurs auf das Modell eines hierarchisch geordneten Constraint-Satisfaction-Prozesses.
               Hiermit ist das l{\"a}ngerfristige Ziel verbunden, das Problem
               einer defizit{\"a}ren numerischen Textrepr{\"a}sentation sowie
               die mangelnde Integration von propositionaler und strukturaler
               bzw. korpusanalytischer Semantik anzugehen. Die Erörterungen dieses
               Beitrags sind prim{\"a}r konzeptioneller Natur; sie betreffen
               die Konzeption einer numerischen Textsemantik zur Vermeidung von
               Defiziten bestehender Ans{\"a}tze.},
  pdf       = {http://media.dwds.de/jlcl/2003_Doppelheft/321-337_Mehler.pdf},
  year      = {2003}
}

2002

Alexander Mehler. 2002. Components of a Model of Context-Sensitive Hypertexts. Journal of Universal Computer Science (J.UCS), 8(10):924–943.

BibTeX

@article{Mehler:2002:l,
  author    = {Mehler, Alexander},
  title     = {Components of a Model of Context-Sensitive Hypertexts},
  journal   = {Journal of Universal Computer Science (J.UCS)},
  volume    = {8},
  number    = {10},
  pages     = {924-943},
  abstract  = {On the background of rising Intranet applications the automatic
               generation of adaptable, context-sensitive hypertexts becomes
               more and more important [El-Beltagy et al., 2001]. This observation
               contradicts the literature on hypertext authoring, where Information
               Retrieval techniques prevail, which disregard any linguistic and
               context-theoretical underpinning. As a consequence, resulting
               hypertexts do not manifest those schematic structures, which are
               constitutive for the emergence of text types and the context-mediated
               understanding of their instances, i.e. natural language texts.
               This paper utilizes Systemic Functional Linguistics (SFL) and
               its context model as a theoretical basis of hypertext authoring.
               So called Systemic Functional Hypertexts (SFHT) are proposed,
               which refer to a stratified context layer as the proper source
               of text linkage. The purpose of this paper is twofold: First,
               hypertexts are reconstructed from a linguistic point of view as
               a kind of supersign, whose constituents are natural language texts
               and whose structuring is due to intra- and intertextual coherence
               relations and their context-sensitive interpretation. Second,
               the paper prepares a formal notion of SFHTs as a first step towards
               operationalization of fundamental text linguistic concepts. On
               this background, SFHTs serve to overcome the theoretical poverty
               of many approaches to link generation.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_components_2002.pdf},
  website   = {http://www.jucs.org/jucs_8_10/components_of_a_model},
  year      = {2002}
}

Alexander Mehler and Rodney Clarke. 2002. Systemic Functional Hypertexts. An Architecture for Socialsemiotic Hypertext Systems. New Directions in Humanities Computing. The 14th Joint International Conference of the Association for Literary and Linguistic Computing and the Association for Computers and the Humanities (ALLC/ACH '02), July 24-28, University of Tübingen, 68–69.

BibTeX

@inproceedings{Mehler:Clarke:2002,
  author    = {Mehler, Alexander and Clarke, Rodney},
  title     = {Systemic Functional Hypertexts. An Architecture for Socialsemiotic
               Hypertext Systems},
  booktitle = {New Directions in Humanities Computing. The 14th Joint International
               Conference of the Association for Literary and Linguistic Computing
               and the Association for Computers and the Humanities (ALLC/ACH
               '02), July 24-28, University of Tübingen},
  pages     = {68-69},
  year      = {2002}
}

Alexander Mehler. 2002. Text Mining with the Help of Cohesion Trees. Classification, Automation, and New Media. Proceedings of the 24th Annual Conference of the Gesellschaft für Klassifikation, March 15-17, 2000, Universität Passau, 199–206.

BibTeX

@inproceedings{Mehler:2002:e,
  author    = {Mehler, Alexander},
  title     = {Text Mining with the Help of Cohesion Trees},
  booktitle = {Classification, Automation, and New Media. Proceedings of the
               24th Annual Conference of the Gesellschaft für Klassifikation,
               March 15-17, 2000, Universit{\"a}t Passau},
  editor    = {Gaul, Wolfgang and Ritter, Gunter},
  pages     = {199-206},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {In the framework of automatic text processing, semantic spaces
               are used as a format for modeling similarities of natural language
               texts represented as vectors. They prove to be efficient in divergent
               areas, as information retrieval (Dumais 1995), computational psychology
               (Landauer, Dumais 1997), and computational linguistics (Rieger
               1995; Mehler 1998). In order to group semantically similar texts,
               cluster analysis is used. A central problem of this method relates
               to the difficulty to name clusters, whereas lists neglect the
               polyhierarchical structure of semantic spaces. This paper introduces
               the concept of cohesion tree as an alternative tool for exploring
               similarity relations of texts represented in high dimensional
               spaces. Cohesion trees allow the perspective evaluation of numerically
               represented text similarities. They depart from minimal spanning
               trees (MST) by context-sensitively optimizing path costs. This
               central property underlies the linguistic interpretation of cohesion
               trees: instead of manifesting context-free associations, they
               model context priming effects.},
  website   = {http://www.springerlink.com/content/x484814744877078/},
  year      = {2002}
}

Alexander Mehler. 2002. Cohesive Paths: Applying the Concept of Cohesion to Hypertext. Sprachwissenschaft auf dem Weg in das dritte Jahrtausend. Proceedings of the 34th Linguistics Colloquium, September 7-10, 1999, Universität Mainz, 725–733.

BibTeX

@inproceedings{Mehler:2002:f,
  author    = {Mehler, Alexander},
  title     = {Cohesive Paths: Applying the Concept of Cohesion to Hypertext},
  booktitle = {Sprachwissenschaft auf dem Weg in das dritte Jahrtausend. Proceedings
               of the 34th Linguistics Colloquium, September 7-10, 1999, Universit{\"a}t
               Mainz},
  editor    = {Rapp, Reinhard},
  pages     = {725-733},
  address   = {Frankfurt a. M.},
  publisher = {Peter Lang},
  year      = {2002}
}

Alexander Mehler. 2002. Hierarchical Orderings of Textual Units. Proceedings of the 19th International Conference on Computational Linguistics (COLING '02), August 24 – September 1, 2002, Taipei, Taiwan, 646–652.

BibTeX

@inproceedings{Mehler:2002:k,
  author    = {Mehler, Alexander},
  title     = {Hierarchical Orderings of Textual Units},
  booktitle = {Proceedings of the 19th International Conference on Computational
               Linguistics (COLING '02), August 24 – September 1, 2002, Taipei,
               Taiwan},
  pages     = {646-652},
  address   = {San Francisco},
  publisher = {Morgan Kaufmann},
  abstract  = {Text representation is a central task for any approach to automatic
               learning from texts. It requires a format which allows to interrelate
               texts even if they do not share content words, but deal with similar
               topics. Furthermore, measuring text similarities raises the question
               of how to organize the resulting clusters. This paper presents
               cohesion trees (CT) as a data structure for the perspective, hierarchical
               organization of text corpora. CTs operate on alternative text
               representation models taking lexical organization, quantitative
               text characteristics, and text structure into account. It is shown
               that CTs realize text linkages which are lexically more homogeneous
               than those produced by minimal spanning trees.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2002_k.pdf},
  year      = {2002}
}

Alexander Mehler. 2002. Hierarchical Analysis of Text Similarity Data. Künstliche Intelligenz (KI), 2:12–16.

BibTeX

@article{Mehler:2002:a,
  author    = {Mehler, Alexander},
  title     = {Hierarchical Analysis of Text Similarity Data},
  journal   = {Künstliche Intelligenz (KI)},
  volume    = {2},
  pages     = {12-16},
  abstract  = {Semantic spaces are used as a representational format for modeling
               similarities of signs. As a multidimensional data structure they
               are bound to the question of how to explore similarity relations
               of signs mapped onto them. This paper introduces an abstract data
               structure called dependency scheme as a formal format which encapsulates
               two types of order relations, whose variable instatiation allows
               to derive different classes of trees for the hierarchial analysis
               of text similarity data derived from semantic spaces.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2002_a.pdf},
  year      = {2002}
}

Alexander Mehler. 2002. Textbedeutungsrekonstruktion. Grundzüge einer Architektur zur Modellierung der Bedeutungen von Texten. Prozesse der Bedeutungskonstruktion, 445–486.

BibTeX

@incollection{Mehler:2002:b,
  author    = {Mehler, Alexander},
  title     = {Textbedeutungsrekonstruktion. Grundzüge einer Architektur zur
               Modellierung der Bedeutungen von Texten},
  booktitle = {Prozesse der Bedeutungskonstruktion},
  publisher = {Peter Lang},
  editor    = {Pohl, Inge},
  pages     = {445-486},
  address   = {Frankfurt a. M.},
  year      = {2002}
}

2001

Alexander Mehler. 2001. Aspects of Text Mining. From Computational Semiotics to Systemic Functional Hypertexts. Australasian Journal of Information Systems (AJIS), 8(2):129–141.

BibTeX

@article{Mehler:2001:b,
  author    = {Mehler, Alexander},
  title     = {Aspects of Text Mining. From Computational Semiotics to Systemic
               Functional Hypertexts},
  journal   = {Australasian Journal of Information Systems (AJIS)},
  volume    = {8},
  number    = {2},
  pages     = {129-141},
  abstract  = {The significance of natural language texts as the prime information
               structure for the management and dissemination of knowledge in
               organisations is still increasing. Making relevant documents available
               depending on varying tasks in different contexts is of primary
               importance for any efficient task completion. Implementing this
               demand requires the content based processing of texts, which enables
               to reconstruct or, if necessary, to explore the relationship of
               task, context and document. Text mining is a technology that is
               suitable for solving problems of this kind. In the following,
               semiotic aspects of text mining are investigated. Based on the
               primary object of text mining - natural language lexis - the specific
               complexity of this class of signs is outlined and requirements
               for the implementation of text mining procedures are derived.
               This is done with reference to text linkage introduced as a special
               task in text mining. Text linkage refers to the exploration of
               implicit, content based relations of texts (and their annotation
               as typed links in corpora possibly organised as hypertexts). In
               this context, the term systemic functional hypertext is introduced,
               which distinguishes genre and register layers for the management
               of links in a poly-level hypertext system},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Mehler_AJIS-2001.pdf},
  website   = {http://journal.acs.org.au/index.php/ajis/article/view/249/220},
  year      = {2001}
}

Alexander Mehler. 2001. Textbedeutung. Zur prozeduralen Analyse und Repräsentation struktureller Ähnlichkeiten von Texten / Text Meaning – Procedural Analysis and Representation of Structural Similarities of Texts. Computer Studies in Language and Speech, 5. Peter Lang. Zugl. Diss. Univ. Trier.

BibTeX

@book{Mehler:2001:a,
  author    = {Mehler, Alexander},
  title     = {Textbedeutung. Zur prozeduralen Analyse und Repr{\"a}sentation
               struktureller {\"A}hnlichkeiten von Texten / Text Meaning – Procedural
               Analysis and Representation of Structural Similarities of Texts},
  publisher = {Peter Lang},
  volume    = {5},
  series    = {Computer Studies in Language and Speech},
  address   = {Frankfurt a. M.},
  note      = {Zugl. Diss. Univ. Trier},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/38648_cover_front.jpg},
  pagetotal = {401},
  website   = {https://www.peterlang.com/view/product/39259?tab=toc&format=PBK},
  year      = {2001}
}

Alexander Mehler and Rodney Clarke. 2001. Systemic Functional Hypertexts (SFHT): Modeling Contexts in Hypertexts. Organizational Semiotics. Evolving a Science of Information Systems, 153–170.

BibTeX

@incollection{Mehler:Clarke:2001,
  author    = {Mehler, Alexander and Clarke, Rodney},
  title     = {Systemic Functional Hypertexts (SFHT): Modeling Contexts in Hypertexts},
  booktitle = {Organizational Semiotics. Evolving a Science of Information Systems},
  publisher = {Kluwer},
  editor    = {Liu, Kecheng and Clarke, Rodney J. and Andersen, Peter B. and Stamper, Ronald K.},
  pages     = {153-170},
  address   = {Boston},
  abstract  = {IFIP TC8 / WG8.1 Working Conference on Organizational Semiotics.
               July 23-25, 2001, Montreal, Canada},
  website   = {http://link.springer.com/chapter/10.1007/978-0-387-35611-2_10},
  year      = {2001}
}

1999

Rodney Clarke and Alexander Mehler. 1999. Theorising Print Media in Contexts: A Systemic Semiotic Contribution to Computational Semiotics. Proceedings of the 7th International Congress of the IASS-AIS: International Association for Semiotic Studies – Sign Processes in Complex Systems, Dresden, University of Technology, October 6-11.

BibTeX

@inproceedings{Clarke:Mehler:1999,
  author    = {Clarke, Rodney and Mehler, Alexander},
  title     = {Theorising Print Media in Contexts: A Systemic Semiotic Contribution
               to Computational Semiotics},
  booktitle = {Proceedings of the 7th International Congress of the IASS-AIS:
               International Association for Semiotic Studies – Sign Processes
               in Complex Systems, Dresden, University of Technology, October
               6-11},
  year      = {1999}
}

Alexander Mehler. 1999. Aspects of Text Semantics in Hypertext. Returning to our Diverse Roots. Proceedings of the 10th ACM Conference on Hypertext and Hypermedia (Hypertext '99), February 21-25, 1999, Technische Universität Darmstadt, 25–26.

BibTeX

@inproceedings{Mehler:1999,
  author    = {Mehler, Alexander},
  title     = {Aspects of Text Semantics in Hypertext},
  booktitle = {Returning to our Diverse Roots. Proceedings of the 10th ACM Conference
               on Hypertext and Hypermedia (Hypertext '99), February 21-25, 1999,
               Technische Universit{\"a}t Darmstadt},
  editor    = {Tochtermann, Klaus and Westbomke, Jörg and Wiil, Uffe K. and Leggett, John J.},
  pages     = {25-26},
  address   = {New York},
  publisher = {ACM Press},
  pdf       = {{http://dl.acm.org/ft_gateway.cfm?id=294477&ftid=30049&dwn=1&CFID=722943569&CFTOKEN=97409508}},
  website   = {http://dl.acm.org/citation.cfm?id=294477},
  year      = {1999}
}

1998

Alexander Mehler. 1998. Toward Computational Aspects of Text Semiotics. Proceedings of the 1998 Joint Conference of IEEE ISIC, IEEE CIRA, and ISAS on the Science and Technology of Intelligent Systems, September 14-17, 1998, NIST, Gaithersburg, USA, 807–813.

BibTeX

@inproceedings{Mehler:1998,
  author    = {Mehler, Alexander},
  title     = {Toward Computational Aspects of Text Semiotics},
  booktitle = {Proceedings of the 1998 Joint Conference of IEEE ISIC, IEEE CIRA,
               and ISAS on the Science and Technology of Intelligent Systems,
               September 14-17, 1998, NIST, Gaithersburg, USA},
  editor    = {Albus, James and Meystel, Alex},
  pages     = {807-813},
  address   = {Gaithersburg},
  publisher = {IEEE},
  website   = {http://www.researchgate.net/publication/3766784_Toward_computational_aspects_of_text_semiotics},
  year      = {1998}
}

1996

Alexander Mehler. 1996. A Multiresolutional Approach to Fuzzy Text Meaning. Journal of Quantitative Linguistics, 3(2):113–127.

BibTeX

@article{Mehler:1996:b,
  author    = {Mehler, Alexander},
  title     = {A Multiresolutional Approach to Fuzzy Text Meaning},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {3},
  number    = {2},
  pages     = {113-127},
  abstract  = {In diesem Beitrag beschreiben wir den eHumanities Desktop3. Es
               handelt sich dabei um eine rein webbasierte Umgebung für die texttechnologische
               Arbeit mit Korpora, welche von der standardisierten Repr{\"a}sentation
               textueller Einheiten über deren computerlinguistische Vorverarbeitung
               bis hin zu Text Mining–Funktionalit{\"a}ten eine gro{\ss}e Zahl
               von Werkzeugen integriert. Diese Integrationsleistung betrifft
               neben den Textkorpora und den hierauf operierenden texttechnologischen
               Werkzeugen auch die je zum Einsatz kommenden lexikalischen Ressourcen.
               Aus dem Blickwinkel der geisteswissenschaftlichen Fachinformatik
               gesprochen fokussiert der Desktop somit darauf, eine Vielzahl
               heterogener sprachlicher Ressourcen mit grundlegenden texttechnologischen
               Methoden zu integrieren, und zwar so, dass das Integrationsresultat
               auch in den H{\"a}nden von Nicht–Texttechnologen handhabbar bleibt.
               Wir exemplifizieren diese Handhabung an einem Beispiel aus der
               historischen Semantik, und damit an einem Bereich, der erst in
               jüngerer Zeit durch die Texttechnologie erschlossen wird.},
  year      = {1996}
}

Alexander Mehler. 1996. A Multiresolutional Approach to Fuzzy Text Meaning – a First Attempt. Proceedings of the 1996 International Multidisciplinary Conference on Intelligent Systems: A Semiotic Perspective, Gaithersburg, Maryland, October 20-23, I:261–273.

BibTeX

@inproceedings{Mehler:1996:a,
  author    = {Mehler, Alexander},
  title     = {A Multiresolutional Approach to Fuzzy Text Meaning -- a First Attempt},
  booktitle = {Proceedings of the 1996 International Multidisciplinary Conference
               on Intelligent Systems: A Semiotic Perspective, Gaithersburg,
               Maryland, October 20-23},
  editor    = {Albus, James and Meystel, Alex and Quintero, Richard},
  volume    = {I},
  pages     = {261-273},
  address   = {Gaithersburg},
  publisher = {National Institute of Standards and Technology (NIST)},
  year      = {1996}
}

Total: 505

Articles

BibTeX

@article{Chen:et:al:2026,
  doi       = {10.1371/journal.pone.0346096},
  author    = {Chen, Yanran and Zhao, Wei and Breitbarth, Anne and Stoeckel, Manuel
               and Mehler, Alexander and Schlechtweg, Dominik and Eger, Steffen},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {Syntactic language change in English and German: Metrics, parsers,
               and convergences},
  year      = {2026},
  month     = {04},
  volume    = {21},
  url       = {https://doi.org/10.1371/journal.pone.0346096},
  pages     = {1-33},
  abstract  = {Syntactic language change has gained increasing attention in recent
               years. Previous computational work based on dependency relations
               has focused on diachronic trends in dependency distance, which
               measures the linear distance between dependent words, using dependency
               trees automatically predicted by a dependency parser (mostly the
               Stanford CoreNLP parser). In this work, we introduce a set of
               15 syntax metrics that extend the analysis beyond linear distance
               by incorporating both linear and tree graph properties of dependency
               trees, such as tree height and degree. Besides, we propose a multi-parser
               approach to reduce the impact of using specific parsers, thereby
               increasing the robustness of the detected language changes. Through
               a cross-lingual investigation of English and German in parliamentary
               debates from the last 160 years, using 6 different parsers (CoreNLP
               and five newer alternatives), we demonstrate that: (1) Relying
               on one single parser can be problematic, as the agreement on predicted
               trends can be low across parsers. (2) Our set of metrics can capture
               subtle patterns of syntactic changes. Our analysis shows that
               syntactic change over the time period inspected is largely similar
               between English and German, with only 2.2% of cases yielding opposite
               trends in these metrics. (3) We also show that changes in syntactic
               metrics seem to be more frequent at the tails of sentence length
               distributions and often move in opposite directions for short
               and long sentences. To our best knowledge, ours is the most comprehensive
               computational analysis of syntactic language change using modern
               NLP technology in recent corpora of English and German.},
  number    = {4}
}

BibTeX

@article{hahn:etal:2026,
  title     = {Using Artificial Intelligence for Eliciting Diagnostic Evidence
               From Students’ Drawings: A Case Study From a Formative Mathematics
               Assessment},
  volume    = {7},
  issn      = {2698-1866},
  url       = {http://dx.doi.org/10.1027/2698-1866/a000123},
  doi       = {10.1027/2698-1866/a000123},
  journal   = {Psychological Test Adaptation and Development},
  publisher = {Hogrefe Publishing Group},
  author    = {Hahn, Sonja and Hammerla, Leon and Hankeln, Corinna and Gross, Sebastian
               and Steinke, Marie and R\"{o}per Korf, Christina M. and Kroehne, Ulf},
  year      = {2026},
  month     = {apr},
  pages     = {73–90}
}

Cedric Borkowski, Giuseppe Abrami, Dawit Terefe, Daniel Baumartz and Alexander Mehler. 2026. DUUIgateway: A Web Service for Platform-independent, Ubiquitous Big Data NLP. SoftwareX, 34:102549.

BibTeX

@article{Borkowski:et:al:2026,
  title     = {{DUUIgateway}: A Web Service for Platform-independent, Ubiquitous Big Data NLP},
  journal   = {SoftwareX},
  volume    = {34},
  pages     = {102549},
  year      = {2026},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2026.102549},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711026000439},
  author    = {Borkowski, Cedric and Abrami, Giuseppe and Terefe, Dawit and Baumartz, Daniel
               and Mehler, Alexander},
  keywords  = {duui, neglab, core, core_b05, core_c08, new-data-spaces, circlet},
  abstract  = {Distributed processing of unstructured text data is a challenge
               in the rapidly changing and evolving natural language processing
               (NLP) landscape. This landscape is characterized by heterogeneous
               systems, models, and formats, and especially by the increasing
               influence of AI systems. While many of these systems handle text
               data, there are also unified systems that process multiple input
               and output formats, while allowing for distributed corpus processing.
               However, there are hardly any user-friendly interfaces that allow
               existing NLP frameworks to be used flexibly and extended in a
               user-controlled manner. Due to this gap and the increasing importance
               of NLP for various scientific disciplines, there has been a demand
               for a web and API based flexible software solution for deploying,
               managing and monitoring NLP systems. Such a solution is provided
               by Docker Unified UIMA-gateway. We introduce DUUIgateway and evaluate
               its API and user-driven approach to encapsulation. We also describe
               how these features improve the usability and accessibility of
               the NLP framework DUUI. We illustrate DUUIgateway in the field
               of process modeling in higher education and show how it closes
               the latter gap in NLP by making a variety of systems for processing
               text and multimodal data accessible to non-experts.}
}

BibTeX

@article{Mehler:et:al:2026:a,
  title     = {Linguistic Features of Student Responses as Indicators of Performance
               in Critical Online Reasoning Tasks},
  author    = {Alexander Mehler and Walter Bisang and Maxim Konca and Patryik Czerwinski
               and Jeremias Josef Graf and Jana Fritsch},
  journal   = {Zeitschrift für Erziehungswissenschaft},
  issn      = {1862-5215},
  url       = {http://dx.doi.org/10.1007/s11618-026-01388-6},
  doi       = {10.1007/s11618-026-01388-6},
  year      = {2026},
  publisher = {Springer Science and Business Media LLC},
  keywords  = {core,core_b05}
}

Mounika Marreddy, Subba Reddy Oota, Venkata Charan Chinni, Manish Gupta and Lucie Flek. 2025. USDC: A Dataset of User Stance and Dogmatism in Long Conversations. Findings of ACL.

BibTeX

@article{marreddy:et:al:2025,
  title     = {USDC: A Dataset of User Stance and Dogmatism in Long Conversations},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Chinni, Venkata Charan
               and Gupta, Manish and Flek, Lucie},
  journal   = {Findings of ACL},
  year      = {2025}
}

BibTeX

@article{Bagci:et:al:2025,
  author    = {Bagci, Mevl{\"u}t and Mehler, Alexander and Abrami, Giuseppe and Schrottenbacher, Patrick
               and Spiekermann, Christian and Konca, Maxim and Schreiber, Jakob and Saukel, Kevin
               and Quintino, Marc and Engel, Juliane},
  title     = {Simulation-Based Learning in Virtual Reality: Three Use Cases
               from Social Science and Technological Foundations in Terms of
               Va.Si.Li-Lab},
  journal   = {Technology, Knowledge and Learning},
  publisher = {Springer Nature},
  year      = {2025},
  month     = {April},
  day       = {01},
  abstract  = {This article examines the predictability of communication scenarios
               within the context of simulation-based learning in virtual reality
               (VR). The aim is to investigate multimodal patterns of social
               interaction that accompany human communication in conflict situations.
               Understanding these patterns can ultimately enhance educational
               technologies' ability to address problematic learning situations
               and support learners in benefiting from VR-based learning. To
               achieve this, the system must accurately predict the task context.
               A central goal of this article is to shed light on this potential.
               Additionally, our research extends to visual communication beyond
               purely linguistic interactions, aiming to enhance VR immersion
               in communicative practices. To this end, the article examines
               the associations between multimodal information units generated
               by individuals interacting in three distinct learning scenarios:
               work organization, school pedagogy, and social life. Several experiments
               demonstrate that predictability exists when multimodal communication
               is analyzed at the level of eight coarse-grained modalities, including
               speech, head and body movements, and gestures. The interactions
               are observed in VR using Va.Si.Li-Lab, a simulation-based system
               that virtualizes learning scenarios, enabling participants to
               collaboratively manage potentially conflicting tasks through multimodal
               communication (Mehler et al. in: Duffy (ed) Digital human modeling
               and applications in health, safety, ergonomics and risk management,
               Springer Nature Switzerland, Cham, 2023). The article discusses
               the technology underlying Va.Si.Li-Lab, its database, and the
               post-processing of interaction data, including speech data. It
               provides theoretical motivation for the application scenarios
               and presents experimental data to illustrate the system's usefulness.
               Based on these data, the article details experiments on the multimodal
               detection of social scenarios, positioning Va.Si.Li-Lab as a use
               case in simulation-based learning.},
  issn      = {2211-1670},
  doi       = {10.1007/s10758-025-09837-7},
  url       = {https://doi.org/10.1007/s10758-025-09837-7}
}

Andy Lücking and Jonathan Ginzburg. 2025. Exceptions From Rules and Noteworthy Exceptions. Linguistics and Philosophy, 48:371–409.

BibTeX

@article{Luecking:Ginzburg:2025-exceptions,
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  title     = {Exceptions From Rules and Noteworthy Exceptions},
  subtitle  = {The Balance Scale for Making Exceptions},
  journal   = {Linguistics and Philosophy},
  year      = {2025},
  volume    = {48},
  pages     = {371-409},
  url       = {https://doi.org/10.1007/s10988-024-09429-1},
  doi       = {10.1007/s10988-024-09429-1},
  keywords  = {gemdis,neglab}
}

Giuseppe Abrami, Markos Genios, Filip Fitzermann, Daniel Baumartz and Alexander Mehler. 2025. Docker Unified UIMA Interface: New perspectives for NLP on big data. SoftwareX, 29:102033.

BibTeX

@article{Abrami:et:al:2025:a,
  title     = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
  journal   = {SoftwareX},
  volume    = {29},
  pages     = {102033},
  year      = {2025},
  issn      = {2352-7110},
  doi       = {https://doi.org/10.1016/j.softx.2024.102033},
  url       = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
  author    = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
               and Alexander Mehler},
  keywords  = {Docker, Kubernetes, UIMA, Distributed NLP, duui, biofid, neglab, new-data-spaces, circlet, core, core_c08},
  abstract  = {Processing large amounts of natural language text using machine
               learning-based models is becoming important in many disciplines.
               This demand is being met by a variety of approaches, resulting
               in the heterogeneous deployment of separate, partly incompatible,
               not natively scalable applications. To overcome the technological
               bottleneck involved, we have developed Docker Unified UIMA Interface,
               a system for the standardized, parallel, platform-independent,
               distributed and microservices-based solution for processing large
               and extensive text corpora with any NLP method. We present DUUI
               as a framework that enables automated orchestration of GPU-based
               NLP processes beyond the existing Docker Swarm cluster variant,
               and in addition to the adaptation to new runtime environments
               such as Kubernetes. Therefore, a new driver for DUUI is introduced,
               which enables the lightweight orchestration of DUUI processes
               within a Kubernetes environment in a scalable setup. In this way,
               the paper opens up novel text-technological perspectives for existing
               practices in disciplines that deal with the scientific analysis
               of large amounts of data based on NLP.}
}

BibTeX

@article{Schrottenbacher:et:al:2025,
  author    = {Schrottenbacher, Patrick and Mehler, Alexander and Berg, Theresa
               and Hustedt, Jasper and Gagel, Julian and Lüttig, Timo and Abrami, Giuseppe},
  title     = {Geo-spatial hypertext in virtual reality: mapping and navigating
               global news event spaces},
  journal   = {New Review of Hypermedia and Multimedia},
  volume    = {31},
  number    = {1-2},
  pages     = {76--105},
  year      = {2025},
  publisher = {Taylor \& Francis},
  doi       = {10.1080/13614568.2024.2383601},
  url       = {https://doi.org/10.1080/13614568.2024.2383601},
  eprint    = {https://doi.org/10.1080/13614568.2024.2383601},
  abstract  = {Every day, a myriad of events take place that are documented and
               shared online through news articles from a variety of sources.
               As a result, as users navigate the Web, the volume of data can
               lead to information overload, making it difficult to find specific
               details about an event. We present News in Time and Space (NiTS)
               to address this issue: NiTS is a fully immersive system integrated
               into Va.Si.Li-Lab that organises textual information in a geospatial
               hypertext system in virtual reality. With NiTS, users can visualise,
               filter and interact with information currently based on GDELT
               on a virtual globe providing document networks to analyse global
               events and trends. The article describes NiTS, its event semantics
               and architecture. It evaluates NiTS in comparison to a classic
               search engine website, extended by NiTSs information filtering
               capabilities to make it comparable. Our comparison with this website
               technology, which is directly linked to the user's usage habits,
               shows that NiTS enables comparable information exploration even
               if the users have little or no experience with VR. That is, we
               observe an equivalent search result behaviour, but with the advantage
               that VR allows users to get their results with a higher level
               of usability without distracting them from their tasks. Through
               its integration with Va.Si.Li-Lab, a simulation-based learning
               environment, NiTS can be used in simulations of learning processes
               aimed at studying critical online reasoning, where Va.Si.Li-Lab
               guarantees that this can be done in relation to individual or
               groups of learners.}
}

BibTeX

@article{Boenisch:et:al:2025:b,
  author    = {B\"{o}nisch, Kevin and Mehler, Alexander and Babbili, Shaduan
               and Heinrich, Yannick and Stephan, Philipp and Abrami, Giuseppe},
  abstract  = {We present Viki LibraRy, a dynamically built library in virtual
               reality (VR) designed to visualize hypertext systems, with an
               emphasis on collaborative interaction and spatial immersion. Viki
               LibraRy goes beyond traditional methods of text distribution by
               providing a platform where users can share, process, and engage
               with textual information. It operates at the interface of VR,
               collaborative learning and spatial data processing to make reading
               tangible and memorable in a spatially mediated way. The article
               describes the building blocks of Viki LibraRy, its underlying
               architecture, and several use cases. It evaluates Viki LibraRy
               in comparison to a conventional web interface for text retrieval
               and reading. The article shows that Viki LibraRy provides users
               with spatial references for structuring their recall, so that
               they can better remember consulted texts and their meta-information
               (e.g. in terms of subject areas and content categories)},
  title     = {{Viki LibraRy: Collaborative Hypertext Browsing and Navigation
               in Virtual Reality}},
  journal   = {New Review of Hypermedia and Multimedia},
  volume    = {31},
  number    = {1-2},
  pages     = {45--75},
  year      = {2025},
  publisher = {Taylor \& Francis},
  doi       = {10.1080/13614568.2024.2383581},
  url       = {https://doi.org/10.1080/13614568.2024.2383581},
  eprint    = {https://doi.org/10.1080/13614568.2024.2383581}
}

BibTeX

@article{Owoyele:et:al:2020,
  title     = {Socio-Semantic X-Ray of Multi-Actor Constellations using Topics
               and Interstitial Authors: A Toolkit for Augmenting Computational
               Literature Reviews},
  author    = {Owoyele, Babajide and Verma, Bhuvanesh and Omolaoye, Victor and Edelman, Jonathan Antonio
               and Loorbach, Derk and de Melo, Gerard},
  journal   = {Available at SSRN 4713155},
  doi       = {10.2139/ssrn.4713155},
  url       = {https://dx.doi.org/10.2139/ssrn.4713155},
  year      = {2024}
}

BibTeX

@article{Mattern:Hemati:Lücking:Mehler:2024,
  author    = {Mattern, Dominik and Hemati, Wahed and Lücking, Andy and Mehler, Alexander},
  title     = {On German verb sense disambiguation: A three-part approach based
               on linking a sense inventory (GermaNet) to a corpus through annotation
               (TGVCorp) and using the corpus to train a VSD classifier (TTvSense)},
  abstractnote = {We develop a three-part approach to Verb Sense Disambiguation (VSD) in German. After considering a set of lexical resources and corpora, we arrive at a statistically motivated selection of a subset of verbs and their senses from GermaNet. This sub-inventory is then used to disambiguate the occurrences of the corresponding verbs in a corpus resulting from the union of TüBa-D/Z, Salsa, and E-VALBU. The corpus annotated in this way is called TGVCorp. It is used in the third part of the paper for training a classifier for VSD and for its comparative evaluation with a state-of-the-art approach in this research area, namely EWISER. Our simple classifier outperforms the transformer-based approach on the same data in both accuracy and speed in German but not in English and we discuss possible reasons.},
  journal   = {Journal of Language Modelling},
  volume    = {12},
  number    = {1},
  year      = {2024},
  month     = {Sep.},
  pages     = {155–212},
  url       = {https://jlm.ipipan.waw.pl/index.php/JLM/article/view/356}
}

BibTeX

@article{Marreddy:et:al:2023emnlp,
  title     = {On robustness of finetuned transformer-based nlp models},
  author    = {Neerudu, Pavan Kalyan Reddy and Oota, Subba Reddy and Marreddy, Mounika
               and Kagita, Venkateswara Rao and Gupta, Manish},
  journal   = {arXiv preprint arXiv:2305.14453},
  year      = {2023}
}

BibTeX

@article{Henlein:et:al:2023a,
  author    = {Henlein, Alexander and Gopinath, Anju and Krishnaswamy, Nikhil
               and Mehler, Alexander and Pustejovsky, James},
  doi       = {10.3389/frai.2023.1084740},
  issn      = {2624-8212},
  journal   = {Frontiers in Artificial Intelligence},
  title     = {Grounding human-object interaction to affordance behavior in multimodal datasets},
  url       = {https://www.frontiersin.org/articles/10.3389/frai.2023.1084740},
  volume    = {6},
  year      = {2023},
  keywords  = {gemdis}
}

BibTeX

@article{Oota:et:al:2022,
  title     = {Neural language taskonomy: Which NLP tasks are the most predictive
               of fMRI brain activity?},
  author    = {Oota, Subba Reddy and Arora, Jashn and Agarwal, Veeral and Marreddy, Mounika
               and Gupta, Manish and Surampudi, Bapi Raju},
  journal   = {arXiv preprint arXiv:2205.01404},
  url       = {https://arxiv.org/pdf/2205.01404},
  year      = {2022},
  abstract  = {Several popular Transformer based language models have been found
               to be successful for text-driven brain encoding. However, existing
               literature leverages only pretrained text Transformer models and
               has not explored the efficacy of task-specific learned Transformer
               representations. In this work, we explore transfer learning from
               representations learned for ten popular natural language processing
               tasks (two syntactic and eight semantic) for predicting brain
               responses from two diverse datasets: Pereira (subjects reading
               sentences from paragraphs) and Narratives (subjects listening
               to the spoken stories). Encoding models based on task features
               are used to predict activity in different regions across the whole
               brain. Features from coreference resolution, NER, and shallow
               syntax parsing explain greater variance for the reading activity.
               On the other hand, for the listening activity, tasks such as paraphrase
               generation, summarization, and natural language inference show
               better encoding performance. Experiments across all 10 task representations
               provide the following cognitive insights: (i) language left hemisphere
               has higher predictive brain activity versus language right hemisphere,
               (ii) posterior medial cortex, temporoparieto-occipital junction,
               dorsal frontal lobe have higher correlation versus early auditory
               and auditory association cortex, (iii) syntactic and semantic
               tasks display a good predictive performance across brain regions
               for reading and listening stimuli resp},
  pdf       = {https://arxiv.org/pdf/2205.01404}
}

BibTeX

@article{Marreddy:et:al:2022,
  title     = {Am I a resource-poor language? Data sets, embeddings, models and
               analysis for four different NLP tasks in telugu language},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  journal   = {ACM Transactions on Asian and Low-Resource Language Information Processing},
  volume    = {22},
  number    = {1},
  numpages  = {34},
  articleno = {18},
  year      = {2022},
  issn      = {2375-4699},
  url       = {https://doi.org/10.1145/3531535},
  doi       = {10.1145/3531535},
  publisher = {Association for Computing Machinery},
  abstract  = {Due to the lack of a large annotated corpus, many resource-poor
               Indian languages struggle to reap the benefits of recent deep
               feature representations in Natural Language Processing (NLP).
               Moreover, adopting existing language models trained on large English
               corpora for Indian languages is often limited by data availability,
               rich morphological variation, syntax, and semantic differences.
               In this paper, we explore the traditional to recent efficient
               representations to overcome the challenges of a low resource language,
               Telugu. In particular, our main objective is to mitigate the low-resource
               problem for Telugu. Overall, we present several contributions
               to a resource-poor language viz. Telugu. (i) a large annotated
               data (35,142 sentences in each task) for multiple NLP tasks such
               as sentiment analysis, emotion identification, hate-speech detection,
               and sarcasm detection, (ii) we create different lexicons for sentiment,
               emotion, and hate-speech for improving the efficiency of the models,
               (iii) pretrained word and sentence embeddings, and (iv) different
               pretrained language models for Telugu such as ELMo-Te, BERT-Te,
               RoBERTa-Te, ALBERT-Te, and DistilBERT-Te on a large Telugu corpus
               consisting of 8,015,588 sentences (1,637,408 sentences from Telugu
               Wikipedia and 6,378,180 sentences crawled from different Telugu
               websites). Further, we show that these representations significantly
               improve the performance of four NLP tasks and present the benchmark
               results for Telugu. We argue that our pretrained embeddings are
               competitive or better than the existing multilingual pretrained
               models: mBERT, XLM-R, and IndicBERT. Lastly, the fine-tuning of
               pretrained models show higher performance than linear probing
               results on four NLP tasks with the following F1-scores: Sentiment
               (68.72), Emotion (58.04), Hate-Speech (64.27), and Sarcasm (77.93).
               We also experiment on publicly available Telugu datasets (Named
               Entity Recognition, Article Genre Classification, and Sentiment
               Analysis) and find that our Telugu pretrained language models
               (BERT-Te and RoBERTa-Te) outperform the state-of-the-art system
               except for the sentiment task. We open-source our corpus, four
               different datasets, lexicons, embeddings, and code &nbsp;https://github.com/Cha14ran/DREAM-T.
               The pretrained Transformer models for Telugu are available at
               &nbsp;https://huggingface.co/ltrctelugu.},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3531535}
}

Andy Lücking and Jonathan Ginzburg. 2022. Leading voices: Dialogue semantics, cognitive science, and the polyphonic structure of multimodal interaction. Language and Cognition.

BibTeX

@article{Luecking:Ginzburg:2022-lv,
  title     = {Leading voices: {Dialogue} semantics, cognitive science, and the
               polyphonic structure of multimodal interaction},
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  journal   = {Language and Cognition},
  year      = {2022},
  doi       = {10.1017/langcog.2022.30},
  keywords  = {gemdis}
}

Andy Lücking and Jonathan Ginzburg. 2022. Referential transparency as the proper treatment of quantification. Semantics and Pragmatics, 15.

BibTeX

@article{Luecking:Ginzburg:2022,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Referential transparency as the proper treatment of quantification},
  journal   = {Semantics and Pragmatics},
  year      = {2022},
  volume    = {15},
  eid       = {4},
  doi       = {10.3765/sp.15.4},
  keywords  = {gemdis}
}

BibTeX

@article{Konca:et:al:2021,
  title     = {From distinguishability to informativity. A quantitative text
               model for detecting random texts.},
  author    = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
  journal   = {Language and Text: Data, models, information and applications},
  volume    = {356},
  pages     = {145--162},
  year      = {2021},
  editor    = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
  publisher = {John Benjamins Publishing Company},
  doi       = {10.1075/cilt.356.10kon}
}

BibTeX

@article{Lokot:Abramov:Mehler:2021,
  doi       = {10.1371/journal.pone.0259776},
  author    = {Lokot, Tatiana and Abramov, Olga and Mehler, Alexander},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {On the asymptotic behavior of the average geodesic distance L
               and the compactness CB of simple connected undirected graphs whose
               order approaches infinity},
  year      = {2021},
  month     = {11},
  volume    = {16},
  url       = {https://doi.org/10.1371/journal.pone.0259776},
  pages     = {1-13},
  abstract  = {The average geodesic distance L Newman (2003) and the compactness
               CB Botafogo (1992) are important graph indices in applications
               of complex network theory to real-world problems. Here, for simple
               connected undirected graphs G of order n, we study the behavior
               of L(G) and CB(G), subject to the condition that their order |V(G)|
               approaches infinity. We prove that the limit of L(G)/n and CB(G)
               lies within the interval [0;1/3] and [2/3;1], respectively. Moreover,
               for any not necessarily rational number β ∈ [0;1/3] (α ∈ [2/3;1])
               we show how to construct the sequence of graphs {G}, |V(G)| =
               n → ∞, for which the limit of L(G)/n (CB(G)) is exactly β (α)
               (Theorems 1 and 2). Based on these results, our work points to
               novel classification possibilities of graphs at the node level
               as well as to the information-theoretic classification of the
               structural complexity of graph indices.},
  number    = {11}
}

BibTeX

@article{Luecking:et:al:2021,
  author    = {Andy Lücking and Christine Driller and Manuel Stoeckel and Giuseppe Abrami
               and Adrian Pachzelt and Alexander Mehler},
  year      = {2021},
  journal   = {Language Resources and Evaluation},
  title     = {Multiple Annotation for Biodiversity: Developing an annotation
               framework among biology, linguistics and text technology},
  editor    = {Nancy Ide and Nicoletta Calzolari},
  doi       = {10.1007/s10579-021-09553-5},
  pdf       = {https://link.springer.com/content/pdf/10.1007/s10579-021-09553-5.pdf},
  keywords  = {biofid}
}

BibTeX

@article{Luecking:Brueckner:Abrami:Uslu:Mehler:2021,
  journal   = {Frontiers in Education},
  doi       = {10.3389/feduc.2020.578475},
  title     = {Computational linguistic assessment of textbooks and online texts
               by means of threshold concepts in economics},
  author    = {L{\"u}cking, Andy and Br{\"u}ckner, Sebastian and Abrami, Giuseppe
               and Uslu, Tolga and Mehler, Alexander},
  eid       = {578475},
  url       = {https://www.frontiersin.org/articles/10.3389/feduc.2020.578475/},
  year      = {2021}
}

BibTeX

@article{Paul:et:al:2020,
  title     = {Estimating electrification using multi-temporal DMSP/OLS night
               imagery as proxy measure of human well-being in India},
  author    = {Paul, Arati and Verma, Bhuvanesh and Chakraborty, Debasish},
  journal   = {Spatial Information Research},
  volume    = {28},
  issn      = {2366-3294},
  pages     = {469--473},
  year      = {2020},
  url       = {http://dx.doi.org/10.1007/s41324-019-00307-8},
  doi       = {10.1007/s41324-019-00307-8},
  publisher = {Springer}
}

BibTeX

@article{Mehler:Hemati:Welke:Konca:Uslu:2020,
  abstract  = {We test the hypothesis that the extent to which one obtains information
               on a given topic through Wikipedia depends on the language in
               which it is consulted. Controlling the size factor, we investigate
               this hypothesis for a number of 25 subject areas. Since Wikipedia
               is a central part of the web-based information landscape, this
               indicates a language-related, linguistic bias. The article therefore
               deals with the question of whether Wikipedia exhibits this kind
               of linguistic relativity or not. From the perspective of educational
               science, the article develops a computational model of the information
               landscape from which multiple texts are drawn as typical input
               of web-based reading. For this purpose, it develops a hybrid model
               of intra- and intertextual similarity of different parts of the
               information landscape and tests this model on the example of 35
               languages and corresponding Wikipedias. In the way it measures
               the similarities of hypertexts, the article goes beyond existing
               approaches by examining their structural and semantic aspects
               intra- and intertextually. In this way it builds a bridge between
               reading research, educational science, Wikipedia research and
               computational linguistics.},
  author    = {Mehler, Alexander and Hemati, Wahed and Welke, Pascal and Konca, Maxim
               and Uslu, Tolga},
  doi       = {10.3389/feduc.2020.562670},
  issn      = {2504-284X},
  journal   = {Frontiers in Education},
  pages     = {206},
  title     = {Multiple Texts as a Limiting Factor in Online Learning: Quantifying
               (Dis-)similarities of Knowledge Networks},
  url       = {https://www.frontiersin.org/article/10.3389/feduc.2020.562670},
  pdf       = {https://www.frontiersin.org/articles/10.3389/feduc.2020.562670/pdf},
  volume    = {5},
  year      = {2020}
}

BibTeX

@article{Luecking:et:al:2020,
  author    = {Andy L{\"{u}}cking and Sebastian Br{\"{u}}ckner and Giuseppe Abrami
               and Tolga Uslu and Alexander Mehler},
  title     = {Computational linguistic assessment of textbook and online learning
               media by means of threshold concepts in business education},
  journal   = {CoRR},
  volume    = {abs/2008.02096},
  year      = {2020},
  url       = {https://arxiv.org/abs/2008.02096},
  archiveprefix = {arXiv},
  eprint    = {2008.02096},
  timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/abs-2008-02096.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

BibTeX

@article{Driller:et:al:2020,
  author    = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati
               and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek},
  title     = {Fast and Easy Access to Central European Biodiversity Data with BIOfid},
  volume    = {4},
  number    = {},
  year      = {2020},
  doi       = {10.3897/biss.4.59157},
  publisher = {Pensoft Publishers},
  abstract  = {The storage of data in public repositories such as the Global
               Biodiversity Information Facility (GBIF) or the National Center
               for Biotechnology Information (NCBI) is nowadays stipulated in
               the policies of many publishers in order to facilitate data replication
               or proliferation. Species occurrence records contained in legacy
               printed literature are no exception to this. The extent of their
               digital and machine-readable availability, however, is still far
               from matching the existing data volume (Thessen and Parr 2014).
               But precisely these data are becoming more and more relevant to
               the investigation of ongoing loss of biodiversity. In order to
               extract species occurrence records at a larger scale from available
               publications, one has to apply specialised text mining tools.
               However, such tools are in short supply especially for scientific
               literature in the German language.The Specialised Information
               Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims
               at reducing this desideratum, inter alia, by preparing a searchable
               text corpus semantically enriched by a new kind of multi-label
               annotation. For this purpose, we feed manual annotations into
               automatic, machine-learning annotators. This mixture of automatic
               and manual methods is needed, because BIOfid approaches a new
               application area with respect to language (mainly German of the
               19th century), text type (biological reports), and linguistic
               focus (technical and everyday language).We will present current
               results of the performance of BIOfid’s semantic search engine
               and the application of independent natural language processing
               (NLP) tools. Most of these are freely available online, such as
               TextImager (Hemati et al. 2016). We will show how TextImager is
               tied into the BIOfid pipeline and how it is made scalable (e.g.
               extendible by further modules) and usable on different systems
               (docker containers).Further, we will provide a short introduction
               to generating machine-learning training data using TextAnnotator
               (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility
               can be assessed by the implementation of inter-annotator agreement
               methods (Abrami et al. 2020). Beyond taxon recognition and entity
               linking, we place particular emphasis on location and time information.
               For this purpose, our annotation tag-set combines general categories
               and biology-specific categories (including taxonomic names) with
               location and time ontologies. The application of the annotation
               categories is regimented by annotation guidelines (Lücking et
               al. 2020). Within the next years, our work deliverable will be
               a semantically accessible and data-extractable text corpus of
               around two million pages. In this way, BIOfid is creating a new
               valuable resource that expands our knowledge of biodiversity and
               its determinants.},
  issn      = {},
  pages     = {e59157},
  url       = {https://doi.org/10.3897/biss.4.59157},
  eprint    = {https://doi.org/10.3897/biss.4.59157},
  journal   = {Biodiversity Information Science and Standards},
  keywords  = {biofid}
}

BibTeX

@article{Mehler:et:al:2020b,
  author    = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
               and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
  title     = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
               Word Embeddings to SemioGraphs}},
  journal   = {Studi e Saggi Linguistici},
  doi       = {10.4454/ssl.v58i1.276},
  year      = {2020},
  volume    = {58},
  number    = {1},
  pages     = {121--155},
  abstract  = {In this article we present the Frankfurt Latin Lexicon (FLL),
               a lexical resource for Medieval Latin that is used both for the
               lemmatization of Latin texts and for the post-editing of lemmatizations.
               We describe recent advances in the development of lemmatizers
               and test them against the Capitularies corpus (comprising Frankish
               royal edicts, mid-6th to mid-9th century), a corpus created as
               a reference for processing Medieval Latin. We also consider the
               post-correction of lemmatizations using a limited crowdsourcing
               process aimed at continuous review and updating of the FLL. Starting
               from the texts resulting from this lemmatization process, we describe
               the extension of the FLL by means of word embeddings, whose interactive
               traversing by means of SemioGraphs completes the digital enhanced
               hermeneutic circle. In this way, the article argues for a more
               comprehensive understanding of lemmatization, encompassing classical
               machine learning as well as intellectual post-corrections and,
               in particular, human computation in the form of interpretation
               processes based on graph representations of the underlying lexical
               resources.},
  url       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
  pdf       = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}

BibTeX

@article{Mehler:Gleim:Gaitsch:Uslu:Hemati:2020,
  author    = {Alexander Mehler and R{\"{u}}diger Gleim and Regina Gaitsch and Tolga Uslu
               and Wahed Hemati},
  title     = {From Topic Networks to Distributed Cognitive Maps: {Zipfian} Topic
               Universes in the Area of Volunteered Geographic Information},
  journal   = {Complexity},
  volume    = {4},
  doi       = {10.1155/2020/4607025},
  pages     = {1-47},
  issuetitle = {Cognitive Network Science: A New Frontier},
  year      = {2020}
}

BibTeX

@article{Stegbauer:Mehler:2020,
  author    = {Christian Stegbauer and Alexander Mehler},
  title     = {Ursachen der Entstehung von ubiquit{\"{a}}ren Zentrum-Peripheriestrukturen
               und ihre Folgen},
  journal   = {Soziale Welt -- Zeitschrift f\"{u}r sozialwissenschaftliche Forschung und Praxis (SozW)},
  volume    = {Sonderband 23},
  year      = {2020},
  pages     = {265--284}
}

Wahed Hemati and Alexander Mehler. March, 2019. CRFVoter: gene and protein related object recognition using a conglomerate of CRF-based tools. Journal of Cheminformatics, 11(1):11.

BibTeX

@article{Hemati:Mehler:2019b,
  author    = {Hemati, Wahed and Mehler, Alexander},
  title     = {{{CRFVoter}: gene and protein related object recognition using
               a conglomerate of CRF-based tools}},
  journal   = {Journal of Cheminformatics},
  year      = {2019},
  month     = {Mar},
  day       = {14},
  volume    = {11},
  number    = {1},
  pages     = {11},
  abstract  = {Gene and protein related objects are an important class of entities
               in biomedical research, whose identification and extraction from
               scientific articles is attracting increasing interest. In this
               work, we describe an approach to the BioCreative V.5 challenge
               regarding the recognition and classification of gene and protein
               related objects. For this purpose, we transform the task as posed
               by BioCreative V.5 into a sequence labeling problem. We present
               a series of sequence labeling systems that we used and adapted
               in our experiments for solving this task. Our experiments show
               how to optimize the hyperparameters of the classifiers involved.
               To this end, we utilize various algorithms for hyperparameter
               optimization. Finally, we present CRFVoter, a two-stage application
               of Conditional Random Field (CRF) that integrates the optimized
               sequence labelers from our study into one ensemble classifier.},
  issn      = {1758-2946},
  doi       = {10.1186/s13321-019-0343-x},
  url       = {https://doi.org/10.1186/s13321-019-0343-x}
}

Wahed Hemati and Alexander Mehler. January, 2019. LSTMVoter: chemical named entity recognition using a conglomerate of sequence labeling tools. Journal of Cheminformatics, 11(1):7.

BibTeX

@article{Hemati:Mehler:2019a,
  abstract  = {Chemical and biomedical named entity recognition (NER) is an essential
               preprocessing task in natural language processing. The identification
               and extraction of named entities from scientific articles is also
               attracting increasing interest in many scientific disciplines.
               Locating chemical named entities in the literature is an essential
               step in chemical text mining pipelines for identifying chemical
               mentions, their properties, and relations as discussed in the
               literature. In this work, we describe an approach to the BioCreative
               V.5 challenge regarding the recognition and classification of
               chemical named entities. For this purpose, we transform the task
               of NER into a sequence labeling problem. We present a series of
               sequence labeling systems that we used, adapted and optimized
               in our experiments for solving this task. To this end, we experiment
               with hyperparameter optimization. Finally, we present LSTMVoter,
               a two-stage application of recurrent neural networks that integrates
               the optimized sequence labelers from our study into a single ensemble
               classifier.},
  author    = {Hemati, Wahed and Mehler, Alexander},
  day       = {10},
  doi       = {10.1186/s13321-018-0327-2},
  issn      = {1758-2946},
  journal   = {Journal of Cheminformatics},
  month     = {Jan},
  number    = {1},
  pages     = {7},
  title     = {{{LSTMVoter}: chemical named entity recognition using a conglomerate
               of sequence labeling tools}},
  url       = {https://doi.org/10.1186/s13321-018-0327-2},
  volume    = {11},
  year      = {2019}
}

BibTeX

@article{Gleim:Eger:Mehler:2019,
  author    = {Gleim, R\"{u}diger and Eger, Steffen and Mehler, Alexander and Uslu, Tolga
               and Hemati, Wahed and L\"{u}cking, Andy and Henlein, Alexander and Kahlsdorf, Sven
               and Hoenen, Armin},
  title     = {A practitioner's view: a survey and comparison of lemmatization
               and morphological tagging in German and Latin},
  journal   = {Journal of Language Modeling},
  year      = {2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/07/jlm-tagging.pdf},
  doi       = {10.15398/jlm.v7i1.205},
  url       = {http://jlm.ipipan.waw.pl/index.php/JLM/article/view/205}
}

Tatiana Lokot, Alexander Mehler and Olga Abramov. November, 2018. On the limit value of compactness of some graph classes. PLOS ONE, 13(11):1–8.

BibTeX

@article{Lokot:Mehler:Abramov:2018,
  author    = {Lokot, Tatiana and Mehler, Alexander and Abramov, Olga},
  journal   = {PLOS ONE},
  publisher = {Public Library of Science},
  title     = {On the limit value of compactness of some graph classes},
  year      = {2018},
  month     = {11},
  volume    = {13},
  url       = {https://doi.org/10.1371/journal.pone.0207536},
  pages     = {1-8},
  abstract  = {In this paper, we study the limit of compactness which is a graph
               index originally introduced for measuring structural characteristics
               of hypermedia. Applying compactness to large scale small-world
               graphs (Mehler, 2008) observed its limit behaviour to be equal
               1. The striking question concerning this finding was whether this
               limit behaviour resulted from the specifics of small-world graphs
               or was simply an artefact. In this paper, we determine the necessary
               and sufficient conditions for any sequence of connected graphs
               resulting in a limit value of CB = 1 which can be generalized
               with some consideration for the case of disconnected graph classes
               (Theorem 3). This result can be applied to many well-known classes
               of connected graphs. Here, we illustrate it by considering four
               examples. In fact, our proof-theoretical approach allows for quickly
               obtaining the limit value of compactness for many graph classes
               sparing computational costs.},
  number    = {11},
  doi       = {10.1371/journal.pone.0207536}
}

BibTeX

@article{Driller:et:al:2018,
  author    = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland
               and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed
               and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler
               and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster
               and Gerwin Kasperek and Angela Hausinger},
  title     = {Workflow and Current Achievements of BIOfid, an Information Service
               Mobilizing Biodiversity Data from Literature Sources},
  volume    = {2},
  number    = {},
  year      = {2018},
  doi       = {10.3897/biss.2.25876},
  publisher = {Pensoft Publishers},
  abstract  = {BIOfid is a specialized information service currently being developed
               to mobilize biodiversity data dormant in printed historical and
               modern literature and to offer a platform for open access journals
               on the science of biodiversity. Our team of librarians, computer
               scientists and biologists produce high-quality text digitizations,
               develop new text-mining tools and generate detailed ontologies
               enabling semantic text analysis and semantic search by means of
               user-specific queries. In a pilot project we focus on German publications
               on the distribution and ecology of vascular plants, birds, moths
               and butterflies extending back to the Linnaeus period about 250
               years ago. The three organism groups have been selected according
               to current demands of the relevant research community in Germany.
               The text corpus defined for this purpose comprises over 400 volumes
               with more than 100,000 pages to be digitized and will be complemented
               by journals from other digitization projects, copyright-free and
               project-related literature. With TextImager (Natural Language
               Processing & Text Visualization) and TextAnnotator (Discourse
               Semantic Annotation) we have already extended and launched tools
               that focus on the text-analytical section of our project. Furthermore,
               taxonomic and anatomical ontologies elaborated by us for the taxa
               prioritized by the project’s target group - German institutions
               and scientists active in biodiversity research - are constantly
               improved and expanded to maximize scientific data output. Our
               poster describes the general workflow of our project ranging from
               literature acquisition via software development, to data availability
               on the BIOfid web portal (http://biofid.de/), and the implementation
               into existing platforms which serve to promote global accessibility
               of biodiversity data.},
  issn      = {},
  pages     = {e25876},
  url       = {https://doi.org/10.3897/biss.2.25876},
  eprint    = {https://doi.org/10.3897/biss.2.25876},
  journal   = {Biodiversity Information Science and Standards},
  keywords  = {biofid}
}

BibTeX

@article{Mehler:Gleim:Luecking:Uslu:Stegbauer:2018,
  author    = {Alexander Mehler and Rüdiger Gleim and Andy Lücking and Tolga Uslu
               and Christian Stegbauer},
  title     = {On the Self-similarity of {Wikipedia} Talks: a Combined Discourse-analytical
               and Quantitative Approach},
  journal   = {Glottometrics},
  volume    = {40},
  pages     = {1-44},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Glottometrics-Mehler.pdf},
  year      = {2018}
}

Alexander Mehler and Andy Lücking. 2017. Modelle sozialer Netzwerke und Natural Language Processing: eine methodologische Randnotiz. Soziologie, 46(1):43–47.

BibTeX

@article{Mehler:Luecking:2017,
  author    = {Alexander Mehler and Andy Lücking},
  title     = {Modelle sozialer Netzwerke und Natural Language Processing: eine
               methodologische Randnotiz},
  journal   = {Soziologie},
  volume    = {46},
  number    = {1},
  pages     = {43-47},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Soziologe-NetzwerkeundNLP.pdf},
  year      = {2017}
}

BibTeX

@article{Eger:vorDerBrueck:Mehler:2016,
  author    = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
  title     = {A Comparison of Four Character-Level String-to-String Translation
               Models for (OCR) Spelling Error Correction},
  journal   = {The Prague Bulletin of Mathematical Linguistics},
  volume    = {105},
  pages     = {77-99},
  doi       = {10.1515/pralin-2016-0004},
  pdf       = {https://ufal.mff.cuni.cz/pbml/105/art-eger-vor-der-brueck.pdf},
  year      = {2016}
}

BibTeX

@article{Mehler:et:al:2016,
  author    = {Alexander Mehler and Rüdiger Gleim and Tim vor der Brück and Wahed Hemati
               and Tolga Uslu and Steffen Eger},
  title     = {Wikidition: Automatic Lexiconization and Linkiﬁcation of Text Corpora},
  journal   = {Information Technology},
  volume    = {58},
  pages     = {70-79},
  abstract  = {We introduce a new text technology, called Wikidition, which automatically
               generates large scale editions of corpora of natural language
               texts. Wikidition combines a wide range of text mining tools for
               automatically linking lexical, sentential and textual units. This
               includes the extraction of corpus-specific lexica down to the
               level of syntactic words and their grammatical categories. To
               this end, we introduce a novel measure of text reuse and exemplify
               Wikidition by means of the capitularies, that is, a corpus of
               Medieval Latin texts.},
  doi       = {10.1515/itit-2015-0035},
  year      = {2016}
}

Armin Hoenen, Alexander Mehler and Jost Gippert. 2016. Editorial. JLCL, 31(2):iii–iv.

BibTeX

@article{Hoenen:Mehler:Gippert:2016,
  author    = {Armin Hoenen and Alexander Mehler and Jost Gippert},
  title     = {{Editorial}},
  journal   = {JLCL},
  volume    = {31},
  number    = {2},
  pages     = {iii--iv},
  pdf       = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
  year      = {2016}
}

Armin Hoenen and Lela Samushia. 2016. Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch for Aiding Reconstruction. JLCL, 31(2):25–38.

BibTeX

@article{Hoenen:Samushia:2016,
  author    = {Armin Hoenen and Lela Samushia},
  title     = {{Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch
               for Aiding Reconstruction}},
  journal   = {JLCL},
  volume    = {31},
  number    = {2},
  pages     = {25--38},
  year      = {2016}
}

Natia Dundua, Armin Hoenen and Lela Samushia. 2015. A Parallel Corpus of the Old Georgian Gospel Manuscripts and their Stemmatology. The Georgian Journal for Language Logic Computation, IV:176–185.

BibTeX

@article{Dundua:Hoenen:Samushia:2015,
  author    = {Dundua, Natia and Hoenen, Armin and Samushia, Lela},
  title     = {{A Parallel Corpus of the Old Georgian Gospel Manuscripts and
               their Stemmatology}},
  journal   = {The Georgian Journal for Language Logic Computation},
  volume    = {IV},
  pages     = {176-185},
  publisher = {CLLS, Tbilisi State University and Kurt G{\"o}del
                   Society},
  year      = {2015}
}

Steffen Eger. 2015. Identities for Partial Bell Polynomials Derived from Identities for Weighted Integer Compositions.. Aequationes Mathematicae.

BibTeX

@article{Eger:2015b,
  author    = {Eger, Steffen},
  title     = {Identities for Partial Bell Polynomials Derived from Identities
               for Weighted Integer Compositions.},
  journal   = {Aequationes Mathematicae},
  doi       = {10.1007/s00010-015-0338-2},
  year      = {2015}
}

Steffen Eger. 2015. Some Elementary Congruences for the Number of Weighted Integer Compositions.. Journal of Integer Sequences (electronic only), 18(4).

BibTeX

@article{Eger:2015a,
  author    = {Eger, Steffen},
  title     = {Some Elementary Congruences for the Number of Weighted Integer Compositions.},
  journal   = {Journal of Integer Sequences (electronic only)},
  volume    = {18},
  number    = {4},
  pdf       = {https://cs.uwaterloo.ca/journals/JIS/VOL18/Eger/eger11.pdf},
  publisher = {School of Computer Science, University of Waterloo,
                   Waterloo, ON},
  year      = {2015}
}

Andy Lücking, Thies Pfeiffer and Hannes Rieser. 2015. Pointing and Reference Reconsidered. Journal of Pragmatics, 77:56–79.

BibTeX

@article{Luecking:Pfeiffer:Rieser:2015,
  author    = {Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes},
  title     = {Pointing and Reference Reconsidered},
  journal   = {Journal of Pragmatics},
  volume    = {77},
  pages     = {56-79},
  abstract  = {Current semantic theory on indexical expressions claims that demonstratively
               used indexicals such as this lack a referent-determining meaning
               but instead rely on an accompanying demonstration act like a pointing
               gesture. While this view allows to set up a sound logic of demonstratives,
               the direct-referential role assigned to pointing gestures has
               never been scrutinized thoroughly in semantics or pragmatics.
               We investigate the semantics and pragmatics of co-verbal pointing
               from a foundational perspective combining experiments, statistical
               investigation, computer simulation and theoretical modeling techniques
               in a novel manner. We evaluate various referential hypotheses
               with a corpus of object identification games set up in experiments
               in which body movement tracking techniques have been extensively
               used to generate precise pointing measurements. Statistical investigation
               and computer simulations show that especially distal areas in
               the pointing domain falsify the semantic direct-referential hypotheses
               concerning pointing gestures. As an alternative, we propose that
               reference involving pointing rests on a default inference which
               we specify using the empirical data. These results raise numerous
               problems for classical semantics–pragmatics interfaces: we argue
               for pre-semantic pragmatics in order to account for inferential
               reference in addition to classical post-semantic Gricean pragmatics.},
  doi       = {10.1016/j.pragma.2014.12.013},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Luecking_Pfeiffer_Rieser_Pointing_and_Reference_Reconsiderd.pdf},
  website   = {http://www.sciencedirect.com/science/article/pii/S037821661500003X},
  year      = {2015}
}

BibTeX

@article{Chen:2014:a,
  author    = {Chen, Xinying},
  title     = {Language as a whole -- A new framework for linguistic knowledge
               integration: Comment on "Approaching human language with complex
               networks" by {Cong} and {Liu}},
  journal   = {Physics of Life Reviews},
  volume    = {11},
  number    = {4},
  pages     = {628-629},
  doi       = {10.1016/j.plrev.2014.07.011},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Language-as-a-whole-Chen.pdf},
  url       = {http://www.sciencedirect.com/science/article/pii/S1571064514001249},
  year      = {2014}
}

BibTeX

@article{Gong:Lam:Chen:Zhang:2014,
  author    = {Gong, Tao and Lam, Yau Wai and Chen, Xinying and Zhang, Menghan},
  title     = {Review: Evolutionary Linguistics in the Past Two Decades -- EVOLANG10:
               the 10th International Conference on Language Evolution},
  journal   = {Journal of Chinese Linguistics},
  volume    = {42},
  number    = {2},
  pages     = {499-530},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/JCL-EvolangReview.pdf},
  year      = {2014}
}

Giuseppe Abrami, Alexander Mehler, Dietmar Pravida and Susanne Zeunert. December, 2014. Rubrik: Neues aus dem Netz. Kunstchronik, 12:623.

BibTeX

@article{Abrami:Mehler:Pravida:Zeunert:2014,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar and Zeunert, Susanne},
  title     = {Rubrik: Neues aus dem Netz},
  journal   = {Kunstchronik},
  volume    = {12},
  pages     = {623},
  address   = {München},
  month     = {12},
  publisher = {Zentralinstitut für Kunstgeschichte},
  website   = {http://www.zikg.eu/publikationen/laufende-publikationen/kunstchronik},
  year      = {2014}
}

Steffen Eger. 2014. A proof of the Mann-Shanks primality criterion conjecture for extended binomial coefficients. Integers: The Electronic Journal of Combinatorial Number Theory, 14.

BibTeX

@article{Eger:2014:a,
  author    = {Eger, Steffen},
  title     = {A proof of the Mann-Shanks primality criterion conjecture for
               extended binomial coefficients},
  journal   = {Integers: The Electronic Journal of Combinatorial
                   Number Theory},
  volume    = {14},
  abstract  = {We show that the Mann-Shanks primality criterion holds for weighted
               extended binomial coefficients (which count the number of weighted
               integer compositions), not only for the ordinary binomial coefficients.},
  pdf       = {http://www.emis.de/journals/INTEGERS/papers/o60/o60.pdf},
  website   = {http://www.emis.de/journals/INTEGERS/vol14.html},
  year      = {2014}
}

Steffen Eger. 2014. Stirling's approximation for central extended binomial coefficients.. The American Mathematical Monthly, 121(4):344–349.

BibTeX

@article{Eger:2014:b,
  author    = {Eger, Steffen},
  title     = {Stirling's approximation for central extended binomial coefficients.},
  journal   = {The American Mathematical Monthly},
  volume    = {121},
  number    = {4},
  pages     = {344-349},
  abstract  = {We derive asymptotic formulas for central extended binomial coefficients,
               which are generalizations of binomial coefficients, using the
               distribution of the sum of independent discrete uniform random
               variables with the Central Limit Theorem and a local limit variant.},
  website   = {http://www.jstor.org/stable/10.4169/amer.math.monthly.121.04.344},
  year      = {2014}
}

BibTeX

@article{Mehler:2014,
  author    = {Mehler, Alexander},
  title     = {On the Expressiveness, Validity and Reproducibility of Models
               of Language Evolution. Comment on 'Modelling language evolution:
               Examples and predictions' by Tao Gong, Shuai Lan, and Menghan
               Zhang},
  journal   = {Physics of Life Review},
  abstract  = {},
  pdf       = {http://www.sciencedirect.com/science/article/pii/S1571064514000529/pdfft?md5=6a2cbbfc083d7bc3adfd26d431cc55d8&pid=1-s2.0-S1571064514000529-main.pdf},
  website   = {https://www.researchgate.net/publication/261290946_On_the_expressiveness_validity_and_reproducibility_of_models_of_language_evolution_Comment_on_Modelling_language_evolution_Examples_and_predictions_by_Tao_Gong_Shuai_Lan_and_Menghan_Zhang},
  year      = {2014}
}

BibTeX

@article{Biemann:Crane:Fellbaum:Mehler:2014,
  author    = {Chris Biemann and Gregory R. Crane and Christiane D. Fellbaum
               and Alexander Mehler},
  title     = {Computational Humanities - bridging the gap between Computer Science
               and Digital Humanities (Dagstuhl Seminar 14301)},
  journal   = {Dagstuhl Reports},
  volume    = {4},
  number    = {7},
  pages     = {80-111},
  abstract  = {Research in the field of Digital Humanities, also known as Humanities
               Computing, has seen a steady increase over the past years. Situated
               at the intersection of computing science and the humanities, present
               efforts focus on making resources such as texts, images, musical
               pieces and other semiotic artifacts digitally available, searchable
               and analysable. To this end, computational tools enabling textual
               search, visual analytics, data mining, statistics and natural
               language processing are harnessed to support the humanities researcher.
               The processing of large data sets with appropriate software opens
               up novel and fruitful approaches to questions in the traditional
               humanities. This report summarizes the Dagstuhl seminar 14301
               on “Computational Humanities – bridging the gap between Computer
               Science and Digital Humanities”},
  issn      = {2192-5283},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dagrep_v004_i007_p080_s14301.pdf},
  publisher = {Schloss Dagstuhl--Leibniz-Zentrum für Informatik},
  year      = {2014}
}

Alexander Mehler, Andy Lücking and Giuseppe Abrami. 2014. WikiNect: Image Schemata as a Basis of Gestural Writing for Kinetic Museum Wikis. Universal Access in the Information Society, 1–17.

BibTeX

@article{Mehler:Luecking:Abrami:2014,
  author    = {Mehler, Alexander and Lücking, Andy and Abrami, Giuseppe},
  title     = {{WikiNect}: Image Schemata as a Basis of Gestural Writing for
               Kinetic Museum Wikis},
  journal   = {Universal Access in the Information Society},
  pages     = {1-17},
  abstract  = {This paper provides a theoretical assessment of gestures in the
               context of authoring image-related hypertexts by example of the
               museum information system WikiNect. To this end, a first implementation
               of gestural writing based on image schemata is provided (Lakoff
               in Women, fire, and dangerous things: what categories reveal about
               the mind. University of Chicago Press, Chicago, 1987). Gestural
               writing is defined as a sort of coding in which propositions are
               only expressed by means of gestures. In this respect, it is shown
               that image schemata allow for bridging between natural language
               predicates and gestural manifestations. Further, it is demonstrated
               that gestural writing primarily focuses on the perceptual level
               of image descriptions (Hollink et al. in Int J Hum Comput Stud
               61(5):601–626, 2004). By exploring the metaphorical potential
               of image schemata, it is finally illustrated how to extend the
               expressiveness of gestural writing in order to reach the conceptual
               level of image descriptions. In this context, the paper paves
               the way for implementing museum information systems like WikiNect
               as systems of kinetic hypertext authoring based on full-fledged
               gestural writing.},
  doi       = {10.1007/s10209-014-0386-8},
  issn      = {1615-5289},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/art_10.1007_s10209-014-0386-8.pdf},
  website   = {http://dx.doi.org/10.1007/s10209-014-0386-8},
  year      = {2014}
}

Steffen Eger. 2013. Sequence Segmentation by Enumeration: An Exploration.. Prague Bull. Math. Linguistics, 100:113–131.

BibTeX

@article{Eger:2013:a,
  author    = {Eger, Steffen},
  title     = {Sequence Segmentation by Enumeration: An Exploration.},
  journal   = {Prague Bull. Math. Linguistics},
  volume    = {100},
  pages     = {113-131},
  abstract  = {We investigate exhaustive enumeration and subsequent language
               model evaluation (E\&E approach) as an alternative to solving
               the sequence segmentation problem. We show that, under certain
               conditions (on string lengths and regarding a possibility to accurately
               estimate the number of segments), which are satisfied for important
               NLP applications, such as phonological segmentation, syllabification,
               and morphological segmentation, the E\&E approach is feasible
               and promises superior results than the standard sequence labeling
               approach to sequence segmentation.},
  pdf       = {http://ufal.mff.cuni.cz/pbml/100/art-eger.pdf},
  year      = {2013}
}

Steffen Eger. 2013. A Contribution to the Theory of Word Length Distribution Based on a Stochastic Word Length Distribution Model.. Journal of Quantitative Linguistics, 20(3):252–265.

BibTeX

@article{Eger:2013:b,
  author    = {Eger, Steffen},
  title     = {A Contribution to the Theory of Word Length Distribution Based
               on a Stochastic Word Length Distribution Model.},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {20},
  number    = {3},
  pages     = {252-265},
  abstract  = {We derive a stochastic word length distribution model based on
               the concept of compound distributions and show its relationships
               with and implications for Wimmer et al. ’s (1994) synergetic word
               length distribution model.},
  year      = {2013}
}

Steffen Eger. 2013. Sequence alignment with arbitrary steps and further generalizations, with applications to alignments in linguistics.. Information Sciences, 237:287–304.

BibTeX

@article{Eger:2013:c,
  author    = {Eger, Steffen},
  title     = {Sequence alignment with arbitrary steps and further generalizations,
               with applications to alignments in linguistics.},
  journal   = {Information Sciences},
  volume    = {237},
  pages     = {287-304},
  abstract  = {We provide simple generalizations of the classical Needleman–Wunsch
               algorithm for aligning two sequences. First, we let both sequences
               be defined over arbitrary, potentially different alphabets. Secondly,
               we consider similarity functions between elements of both sequences
               with ranges in a semiring. Thirdly, instead of considering only
               ‘match’, ‘mismatch’ and ‘skip’ operations, we allow arbitrary
               non-negative alignment ‘steps’ S. Next, we present novel combinatorial
               formulas for the number of monotone alignments between two sequences
               for selected steps S. Finally, we illustrate sample applications
               in natural language processing that require larger steps than
               available in the original Needleman–Wunsch sequence alignment
               procedure such that our generalizations can be fruitfully adopted.},
  website   = {http://www.sciencedirect.com/science/article/pii/S0020025513001485},
  year      = {2013}
}

Steffen Eger. 2013. Restricted weighted integer compositions and extended binomial coefficients.. Journal of Integer Sequences (electronic only), 16(1).

BibTeX

@article{Eger:2013:d,
  author    = {Eger, Steffen},
  title     = {Restricted weighted integer compositions and extended binomial coefficients.},
  journal   = {Journal of Integer Sequences (electronic only)},
  volume    = {16},
  number    = {1},
  abstract  = {We prove a simple relationship between extended binomial coefficients
               — natural extensions of the well-known binomial coefficients —
               and weighted restricted integer compositions. Moreover, wegiveaveryuseful
               interpretation ofextendedbinomial coefficients as representing
               distributions of sums of independent discrete random variables.
               We apply our results, e.g., to determine the distribution of the
               sum of k logarithmically distributed random variables, and to
               determining the distribution, specifying all moments, of the random
               variable whose values are part-products of random restricted integer
               compositions. Based on our findings and using the central limit
               theorem, we also give generalized Stirling formulae for central
               extended binomial coefficients. We enlarge the list of known properties
               of extended binomial coefficients.},
  issn      = {1530-7638},
  pdf       = {https://cs.uwaterloo.ca/journals/JIS/VOL16/Eger/eger6.pdf},
  publisher = {School of Computer Science, University of Waterloo,
                   Waterloo, ON},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.397.3745},
  year      = {2013}
}

Andy Lücking and Alexander Mehler. 2013. On Three Notions of Grounding of Artificial Dialog Companions. Science, Technology & Innovation Studies, 10(1):31–36.

BibTeX

@article{Luecking:Mehler:2013:a,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {On Three Notions of Grounding of Artificial Dialog Companions},
  journal   = {Science, Technology \& Innovation Studies},
  volume    = {10},
  number    = {1},
  pages     = {31-36},
  abstract  = {We provide a new, theoretically motivated evaluation grid for
               assessing the conversational achievements of Artificial Dialog
               Companions (ADCs). The grid is spanned along three grounding problems.
               Firstly, it is argued that symbol grounding in general has to
               be instrinsic. Current approaches in this context, however, are
               limited to a certain kind of expression that can be grounded in
               this way. Secondly, we identify three requirements for conversational
               grounding, the process leading to mutual understanding. Finally,
               we sketch a test case for symbol grounding in the form of the
               philosophical grounding problem that involves the use of modal
               language. Together, the three grounding problems provide a grid
               that allows us to assess ADCs’ dialogical performances and to
               pinpoint future developments on these grounds.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/STI-final-badge.pdf},
  website   = {http://www.sti-studies.de/ojs/index.php/sti/article/view/143},
  year      = {2013}
}

BibTeX

@article{Luecking:Bergmann:Hahn:Kopp:Rieser:2012,
  author    = {Lücking, Andy and Bergman, Kirsten and Hahn, Florian and Kopp, Stefan
               and Rieser, Hannes},
  title     = {Data-based Analysis of Speech and Gesture: The Bielefeld Speech
               and Gesture Alignment Corpus (SaGA) and its Applications},
  journal   = {Journal of Multimodal User Interfaces},
  volume    = {7},
  number    = {1-2},
  pages     = {5-18},
  abstract  = {Communicating face-to-face, interlocutors frequently produce multimodal
               meaning packages consisting of speech and accompanying gestures.
               We discuss a systematically annotated speech and gesture corpus
               consisting of 25 route-and-landmark-description dialogues, the
               Bielefeld Speech and Gesture Alignment corpus (SaGA), collected
               in experimental face-to-face settings. We first describe the primary
               and secondary data of the corpus and its reliability assessment.
               Then we go into some of the projects carried out using SaGA demonstrating
               the wide range of its usability: on the empirical side, there
               is work on gesture typology, individual and contextual parameters
               influencing gesture production and gestures’ functions for dialogue
               structure. Speech-gesture interfaces have been established extending
               unification-based grammars. In addition, the development of a
               computational model of speech-gesture alignment and its implementation
               constitutes a research line we focus on.},
  doi       = {10.1007/s12193-012-0106-8},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/MMUI-SaGA-revision2.pdf},
  website   = {http://www.springerlink.com/content/a547448u86h3116x/?MUD=MP},
  year      = {2013}
}

Armin Hoenen. 2012. Measuring Repetitiveness in Texts, a Preliminary Investigation. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 36(2):93–104.

BibTeX

@article{Hoenen:2012:a,
  author    = {Hoenen, Armin},
  title     = {Measuring Repetitiveness in Texts, a Preliminary Investigation},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {36},
  number    = {2},
  pages     = {93-104},
  abstract  = {In this paper, a model is presented for the automatic measurement
               that can systematically describe the usage and function of the
               phenomenon of repetition in written text. The motivating hypothesis
               for this study is that the more repetitive a text is, the easier
               it is to memorize. Therefore, an automated measurement index can
               provide feedback to writers and for those who design texts that
               are often memorized including songs, holy texts, theatrical plays,
               and advertising slogans. The potential benefits of this kind of
               systematic feedback are numerous, the main one being that content
               creators would be able to employ a standard threshold of memorizability.
               This study explores multiple ways of implementing and calculating
               repetitiveness across levels of analysis (such as paragraph-level
               or sub-word level) genres (such as songs, holy texts, and other
               genres) and languages, integrating these into the a model for
               the automatic measurement of repetitiveness. The Avestan language
               and some of its idiosyncratic features are explored in order to
               illuminate how the proposed index is applied in the ranking of
               texts according to their repetitiveness.},
  website   = {http://www.linse.uni-due.de/jahrgang-36-2012/articles/measuring-repetitiveness-in-texts-a-preliminary-investigation.html},
  year      = {2012}
}

Steffen Eger. 2012. The Combinatorics of String Alignments: Reconsidering the Problem.. Journal of Quantitative Linguistics, 19(1):32–53.

BibTeX

@article{Eger:2012:a,
  author    = {Eger, Steffen},
  title     = {The Combinatorics of String Alignments: Reconsidering the Problem.},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {19},
  number    = {1},
  pages     = {32-53},
  abstract  = {In recent work, Covington discusses the number of alignments of
               two strings. Thereby, Covington defines an alignment as “a way
               of pairing up elements of two strings, optionally skipping some
               but preserving the order”. This definition has drawbacks as it
               excludes many relevant situations. In this work, we specify the
               notion of an alignment so that many linguistically interesting
               situations are covered. To this end, we define an alignment in
               an abstract manner as a set of pairs and then define three properties
               on such sets. Secondly, we specify the numbers of possibilities
               of aligning two strings in each case.},
  website   = {
                   http://www.tandfonline.com/doi/full/10.1080/09296174.2011.638792#tabModule},
  year      = {2012}
}

Alexander Mehler, Andy Lücking and Peter Menke. 2012. Assessing Cognitive Alignment in Different Types of Dialog by means of a Network Model. Neural Networks, 32:159–164.

BibTeX

@article{Mehler:Luecking:Menke:2012,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {Assessing Cognitive Alignment in Different Types of Dialog by
               means of a Network Model},
  journal   = {Neural Networks},
  volume    = {32},
  pages     = {159-164},
  abstract  = {We present a network model of dialog lexica, called TiTAN (Two-layer
               Time-Aligned Network) series. TiTAN series capture the formation
               and structure of dialog lexica in terms of serialized graph representations.
               The dynamic update of TiTAN series is driven by the dialog-inherent
               timing of turn-taking. The model provides a link between neural,
               connectionist underpinnings of dialog lexica on the one hand and
               observable symbolic behavior on the other. On the neural side,
               priming and spreading activation are modeled in terms of TiTAN
               networking. On the symbolic side, TiTAN series account for cognitive
               alignment in terms of the structural coupling of the linguistic
               representations of dialog partners. This structural stance allows
               us to apply TiTAN in machine learning of data of dialogical alignment.
               In previous studies, it has been shown that aligned dialogs can
               be distinguished from non-aligned ones by means of TiTAN -based
               modeling. Now, we simultaneously apply this model to two types
               of dialog: task-oriented, experimentally controlled dialogs on
               the one hand and more spontaneous, direction giving dialogs on
               the other. We ask whether it is possible to separate aligned dialogs
               from non-aligned ones in a type-crossing way. Starting from a
               recent experiment (Mehler, Lücking, \& Menke, 2011a), we show
               that such a type-crossing classification is indeed possible. This
               hints at a structural fingerprint left by alignment in networks
               of linguistic items that are routinely co-activated during conversation.},
  doi       = {10.1016/j.neunet.2012.02.013},
  website   = {http://www.sciencedirect.com/science/article/pii/S0893608012000421},
  year      = {2012}
}

BibTeX

@article{Luecking:Mehler:2011,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {A Model of Complexity Levels of Meaning Constitution in Simulation
               Models of Language Evolution},
  journal   = {International Journal of Signs and Semiotic Systems},
  volume    = {1},
  number    = {1},
  pages     = {18-38},
  abstract  = {Currently, some simulative accounts exist within dynamic or evolutionary
               frameworks that are concerned with the development of linguistic
               categories within a population of language users. Although these
               studies mostly emphasize that their models are abstract, the paradigm
               categorization domain is preferably that of colors. In this paper,
               the authors argue that color adjectives are special predicates
               in both linguistic and metaphysical terms: semantically, they
               are intersective predicates, metaphysically, color properties
               can be empirically reduced onto purely physical properties. The
               restriction of categorization simulations to the color paradigm
               systematically leads to ignoring two ubiquitous features of natural
               language predicates, namely relativity and context-dependency.
               Therefore, the models for simulation models of linguistic categories
               are not able to capture the formation of categories like perspective-dependent
               predicates ‘left’ and ‘right’, subsective predicates like ‘small’
               and ‘big’, or predicates that make reference to abstract objects
               like ‘I prefer this kind of situation’. The authors develop a
               three-dimensional grid of ascending complexity that is partitioned
               according to the semiotic triangle. They also develop a conceptual
               model in the form of a decision grid by means of which the complexity
               level of simulation models of linguistic categorization can be
               assessed in linguistic terms.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_article_IJSSS.pdf},
  year      = {2011}
}

BibTeX

@article{Mehler:Abramov:Diewald:2011:a,
  author    = {Mehler, Alexander and Abramov, Olga and Diewald, Nils},
  title     = {Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf
               Hypothesis in the Context of Wikipedia},
  journal   = {Computer Speech and Language},
  volume    = {25},
  number    = {3},
  pages     = {716-740},
  abstract  = {In this article, we test a variant of the Sapir-Whorf Hypothesis
               in the area of complex network theory. This is done by analyzing
               social ontologies as a new resource for automatic language classification.
               Our method is to solely explore structural features of social
               ontologies in order to predict family resemblances of languages
               used by the corresponding communities to build these ontologies.
               This approach is based on a reformulation of the Sapir-Whorf Hypothesis
               in terms of distributed cognition. Starting from a corpus of 160
               Wikipedia-based social ontologies, we test our variant of the
               Sapir-Whorf Hypothesis by several experiments, and find out that
               we outperform the corresponding baselines. All in all, the article
               develops an approach to classify linguistic networks of tens of
               thousands of vertices by exploring a small range of mathematically
               well-established topological indices.},
  doi       = {10.1016/j.csl.2010.05.006},
  website   = {http://www.sciencedirect.com/science/article/pii/S0885230810000434},
  year      = {2011}
}

Mathias Lösch, Ulli Waltinger, Wolfram Horstmann and Alexander Mehler. 2011. Building a DDC-annotated Corpus from OAI Metadata. Journal of Digital Information, 12(2).

BibTeX

@article{Loesch:Waltinger:Horstmann:Mehler:2011,
  author    = {Lösch, Mathias and Waltinger, Ulli and Horstmann, Wolfram and Mehler, Alexander},
  title     = {Building a DDC-annotated Corpus from OAI Metadata},
  journal   = {Journal of Digital Information},
  volume    = {12},
  number    = {2},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surface-oriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score.},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  pdf       = {https://journals.tdl.org/jodi/index.php/jodi/article/download/1765/1767},
  website   = {http://journals.tdl.org/jodi/article/view/1765},
  year      = {2011}
}

BibTeX

@article{Mehler:Diewald:Waltinger:et:al:2010,
  author    = {Mehler, Alexander and Diewald, Nils and Waltinger, Ulli and Gleim, Rüdiger
               and Esch, Dietmar and Job, Barbara and Küchelmann, Thomas and Abramov, Olga
               and Blanchard, Philippe},
  title     = {Evolution of Romance Language in Written Communication: Network
               Analysis of Late Latin and Early Romance Corpora},
  journal   = {Leonardo},
  volume    = {44},
  number    = {3},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_diewald_waltinger_gleim_esch_job_kuechelmann_pustylnikov_blanchard_2010.pdf},
  publisher = {MIT Press},
  year      = {2011}
}

Olga Abramov and Alexander Mehler. 2011. Automatic Language Classification by Means of Syntactic Dependency Networks. Journal of Quantitative Linguistics, 18(4):291–336.

BibTeX

@article{Abramov:Mehler:2011:a,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Automatic Language Classification by Means of Syntactic Dependency Networks},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {18},
  number    = {4},
  pages     = {291-336},
  abstract  = {This article presents an approach to automatic language classification
               by means of linguistic networks. Networks of 11 languages were
               constructed from dependency treebanks, and the topology of these
               networks serves as input to the classification algorithm. The
               results match the genealogical similarities of these languages.
               In addition, we test two alternative approaches to automatic language
               classification – one based on n-grams and the other on quantitative
               typological indices. All three methods show good results in identifying
               genealogical groups. Beyond genetic similarities, network features
               (and feature combinations) offer a new source of typological information
               about languages. This information can contribute to a better understanding
               of the interplay of single linguistic phenomena observed in language.},
  website   = {http://www.researchgate.net/publication/220469321_Automatic_Language_Classification_by_means_of_Syntactic_Dependency_Networks},
  year      = {2011}
}

BibTeX

@article{Mehler:Schwandt:Gleim:Jussen:2011,
  author    = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},
  title     = {Der eHumanities Desktop als Werkzeug in der historischen Semantik:
               Funktionsspektrum und Einsatzszenarien},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {26},
  number    = {1},
  pages     = {97-117},
  abstract  = {Die Digital Humanities bzw. die Computational Humanities entwickeln
               sich zu eigenst{\"a}ndigen Disziplinen an der Nahtstelle von Geisteswissenschaft
               und Informatik. Diese Entwicklung betrifft zunehmend auch die
               Lehre im Bereich der geisteswissenschaftlichen Fachinformatik.
               In diesem Beitrag thematisieren wir den eHumanities Desktop als
               ein Werkzeug für diesen Bereich der Lehre. Dabei geht es genauer
               um einen Brückenschlag zwischen Geschichtswissenschaft und Informatik:
               Am Beispiel der historischen Semantik stellen wir drei Lehrszenarien
               vor, in denen der eHumanities Desktop in der geschichtswissenschaftlichen
               Lehre zum Einsatz kommt. Der Beitrag schliesst mit einer Anforderungsanalyse
               an zukünftige Entwicklungen in diesem Bereich.},
  pdf       = {http://media.dwds.de/jlcl/2011_Heft1/8.pdf },
  year      = {2011}
}

Alexander Mehler, Andy Lücking and Petra Weiß. 2010. A Network Model of Interpersonal Alignment. Entropy, 12(6):1440–1483.

BibTeX

@article{Mehler:Weiss:Luecking:2010:a,
  author    = {Mehler, Alexander and Lücking, Andy and Wei{\ss}, Petra},
  title     = {A Network Model of Interpersonal Alignment},
  journal   = {Entropy},
  volume    = {12},
  number    = {6},
  pages     = {1440-1483},
  abstract  = {In dyadic communication, both interlocutors adapt to each other
               linguistically, that is, they align interpersonally. In this article,
               we develop a framework for modeling interpersonal alignment in
               terms of the structural similarity of the interlocutors’ dialog
               lexica. This is done by means of so-called two-layer time-aligned
               network series, that is, a time-adjusted graph model. The graph
               model is partitioned into two layers, so that the interlocutors’
               lexica are captured as subgraphs of an encompassing dialog graph.
               Each constituent network of the series is updated utterance-wise.
               Thus, both the inherent bipartition of dyadic conversations and
               their gradual development are modeled. The notion of alignment
               is then operationalized within a quantitative model of structure
               formation based on the mutual information of the subgraphs that
               represent the interlocutor’s dialog lexica. By adapting and further
               developing several models of complex network theory, we show that
               dialog lexica evolve as a novel class of graphs that have not
               been considered before in the area of complex (linguistic) networks.
               Additionally, we show that our framework allows for classifying
               dialogs according to their alignment status. To the best of our
               knowledge, this is the first approach to measuring alignment in
               communication that explores the similarities of graph-like cognitive
               representations.},
  doi       = {10.3390/e12061440},
  pdf       = {http://www.mdpi.com/1099-4300/12/6/1440/pdf},
  website   = {http://www.mdpi.com/1099-4300/12/6/1440/},
  year      = {2010}
}

Tim vor der Brück. 2010. Hypernymy Extraction Using a Semantic Network Representation. International Journal of Computational Linguistics and Applications, 1(1):105–119.

BibTeX

@article{vor:der:Brueck:2010,
  author    = {vor der Brück, Tim},
  title     = {Hypernymy Extraction Using a Semantic Network Representation},
  journal   = {International Journal of Computational Linguistics and
                   Applications},
  volume    = {1},
  number    = {1},
  pages     = {105--119},
  abstract  = {There are several approaches to detect hypernymy relations from
               texts by text mining. Usually these approaches are based on supervised
               learning and in a first step are extracting several patterns.
               These patterns are then applied to previously unseen texts and
               used to recognize hypernym/hyponym pairs. Normally these approaches
               are only based on a surface representation or a syntactical tree
               structure, i.e., constituency or dependency trees derived by a
               syntactical parser. In this work, however, we present an approach
               that operates directly on a semantic network (SN), which is generated
               by a deep syntactico-semantic analysis. Hyponym/hypernym pairs
               are then extracted by the application of graph matching. This
               algorithm is combined with a shallow approach enriched with semantic
               information.},
  pdf       = {http://www.gelbukh.com/ijcla/2010-1-2/Hypernymy
                   Extraction Using.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
  year      = {2010}
}

Tim vor der Brück and Hermann Helbig. 2010. Retrieving Meronyms from Texts Using An Automated Theorem Prover. Journal for Language Technology and Computational Linguistics (JLCL), 25(1):57–81.

BibTeX

@article{vor:der:Brueck:Helbig:2010:b,
  author    = {vor der Brück, Tim and Helbig, Hermann},
  title     = {Retrieving Meronyms from Texts Using An Automated Theorem Prover},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {25},
  number    = {1},
  pages     = {57--81},
  abstract  = {In this paper we present a truly semantic-oriented approach for
               meronymy relation extraction. It directly operates, instead of
               syntactic trees or surface representations, on semantic networks
               (SNs). These SNs are derived from texts (in our case, the German
               Wikip edia) by a deep linguistic syntactico-semantic analysis.
               The extraction of meronym/holonym pairs is carried out by using,
               among other components, an automated theorem prover, whose work
               is based on a set of logical axioms. The corresponding algorithm
               is combined with a shallow approach enriched with semantic information.
               Through the employment of logical methods, the recall and precision
               of the semantic patterns pertinent to the extracted relations
               can be increased considerably.},
  pdf       = {http://www.jlcl.org/2010_Heft1/tim_vorderbrueck.pdf},
  year      = {2010}
}

BibTeX

@article{Mehler:Waltinger:2009:b,
  author    = {Mehler, Alexander and Waltinger, Ulli},
  title     = {Enhancing Document Modeling by Means of Open Topic Models: Crossing
               the Frontier of Classification Schemes in Digital Libraries by
               Example of the DDC},
  journal   = {Library Hi Tech},
  volume    = {27},
  number    = {4},
  pages     = {520-539},
  abstract  = {Purpose: We present a topic classification model using the Dewey
               Decimal Classification (DDC) as the target scheme. This is done
               by exploring metadata as provided by the Open Archives Initiative
               (OAI) to derive document snippets as minimal document representations.
               The reason is to reduce the effort of document processing in digital
               libraries. Further, we perform feature selection and extension
               by means of social ontologies and related web-based lexical resources.
               This is done to provide reliable topic-related classifications
               while circumventing the problem of data sparseness. Finally, we
               evaluate our model by means of two language-specific corpora.
               This paper bridges digital libraries on the one hand and computational
               linguistics on the other. The aim is to make accessible computational
               linguistic methods to provide thematic classifications in digital
               libraries based on closed topic models as the DDC. Design/methodology/approach:
               text classification, text-technology, computational linguistics,
               computational semantics, social semantics. Findings: We show that
               SVM-based classifiers perform best by exploring certain selections
               of OAI document metadata. Research limitations/implications: The
               findings show that it is necessary to further develop SVM-based
               DDC-classifiers by using larger training sets possibly for more
               than two languages in order to get better F-measure values. Practical
               implications: We can show that DDC-classifications come into reach
               which primarily explore OAI metadata. Originality/value: We provide
               algorithmic and formal-mathematical information how to build DDC-classifiers
               for digital libraries.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_waltinger_2009_b.pdf},
  website   = {http://biecoll.ub.uni-bielefeld.de/frontdoor.php?source_opus=5001&la=de},
  year      = {2009}
}

Alexander Mehler. 2008. Structural Similarities of Complex Networks: A Computational Model by Example of Wiki Graphs. Applied Artificial Intelligence, 22(7&8):619–683.

BibTeX

@article{Mehler:2008:a,
  author    = {Mehler, Alexander},
  title     = {Structural Similarities of Complex Networks: A Computational Model
               by Example of Wiki Graphs},
  journal   = {Applied Artificial Intelligence},
  volume    = {22},
  number    = {7\&8},
  pages     = {619–683},
  abstract  = {This article elaborates a framework for representing and classifying
               large complex networks by example of wiki graphs. By means of
               this framework we reliably measure the similarity of document,
               agent, and word networks by solely regarding their topology. In
               doing so, the article departs from classical approaches to complex
               network theory which focuses on topological characteristics in
               order to check their small world property. This does not only
               include characteristics that have been studied in complex network
               theory, but also some of those which were invented in social network
               analysis and hypertext theory. We show that network classifications
               come into reach which go beyond the hypertext structures traditionally
               analyzed in web mining. The reason is that we focus on networks
               as a whole as units to be classified—above the level of websites
               and their constitutive pages. As a consequence, we bridge classical
               approaches to text and web mining on the one hand and complex
               network theory on the other hand. Last but not least, this approach
               also provides a framework for quantifying the linguistic notion
               of intertextuality.},
  doi       = {10.1080/08839510802164085},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/mehler_2008_Structural_Similarities_of_Complex_Networks.pdf},
  website   = {https://www.researchgate.net/publication/200772675_Structural_similarities_of_complex_networks_A_computational_model_by_example_of_wiki_graphs},
  year      = {2008}
}

Tim vor der Brück, Sven Hartrumpf and Hermann Helbig. 2008. A Readability Checker with Supervised Learning using Deep Indicators. Informatica, 32(4):429–435.

BibTeX

@article{vor:der:Brueck:Hartrumpf:Helbig:2008:b,
  author    = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
  title     = {A Readability Checker with Supervised Learning using Deep Indicators},
  journal   = {Informatica},
  volume    = {32},
  number    = {4},
  pages     = {429--435},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surface-oriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score.},
  website   = {http://connection.ebscohost.com/c/articles/36288796/readability-checker-supervised-learning-using-deep-indicators},
  year      = {2008}
}

BibTeX

@article{Mehler:Gleim:Ernst:Waltinger:2008,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Ernst, Alexandra and Waltinger, Ulli},
  title     = {WikiDB: Building Interoperable Wiki-Based Knowledge Resources
               for Semantic Databases},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {32},
  number    = {1},
  pages     = {47-70},
  abstract  = {This article describes an API for exploring the logical document
               and the logical network structure of wikis. It introduces an algorithm
               for the semantic preprocessing, filtering and typing of these
               building blocks. Further, this article models the process of wiki
               generation based on a unified format of syntactic, semantic and
               pragmatic representations. This three-level approach to make accessible
               syntactic, semantic and pragmatic aspects of wiki-based structure
               formation is complemented by a corresponding database model –
               called WikiDB – and an API operating thereon. Finally, the article
               provides an empirical study of using the three-fold representation
               format in conjunction with WikiDB.},
  pdf       = {http://www.ulliwaltinger.de/pdf/Konvens_2008_WikiDB_Building_Semantic_Databases_MehlerGleimErnstWaltinger.pdf},
  year      = {2008}
}

BibTeX

@article{Jussen:Mehler:Ernst:2007,
  author    = {Jussen, Bernhard and Mehler, Alexander and Ernst, Alexandra},
  title     = {A Corpus Management System for Historical Semantics},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {31},
  number    = {1-2},
  pages     = {81-89},
  abstract  = {Der Beitrag beschreibt ein Korpusmanagementsystem für die historische
               Semantik. Die Grundlage hierfür bildet ein Bedeutungsbegriff,
               der – methodologisch gesprochen – auf der Analyse diachroner Korpora
               beruht. Das Ziel der Analyse dieser Korpora besteht darin, Bedeutungswandel
               als eine Bezugsgrö{\ss}e für den Wandel sozialer Systeme zu untersuchen.
               Das vorgestellte Korpusmanagementsystem unterstützt diese Art
               der korpusbasierten historischen Semantik.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/jussen_mehler_ernst_2007.pdf},
  year      = {2007}
}

Tim vor der Brück and Stephan Busemann. 2007. Suggesting Error Corrections of Path Expressions and Categories for Tree-Mapping Grammars. Zeitschrift für Sprachwissenschaft, 26(2).

BibTeX

@article{vor:der:Brueck:Busemann:2007,
  author    = {vor der Brück, Tim and Busemann, Stephan},
  title     = {Suggesting Error Corrections of Path Expressions and Categories
               for Tree-Mapping Grammars},
  journal   = {Zeitschrift für Sprachwissenschaft},
  volume    = {26},
  number    = {2},
  abstract  = {Tree mapping grammars are used in natural language generation
               (NLG) to map non-linguistic input onto a derivation tree from
               which the target text can be trivially read off as the terminal
               yield. Such grammars may consist of a large number of rules. Finding
               errors is quite tedious and sometimes very time-consuming. Often
               the generation fails because the relevant input subtree is not
               specified correctly. This work describes a method to detect and
               correct wrong assignments of input subtrees to grammar categories
               by cross-validating grammar rules with the given input structures.
               The method also detects and corrects the usage of a category in
               a grammar rule. The result is implemented in a grammar development
               workbench and accelerates the grammar writer's work considerably.
               The paper suggests the algorithms can be ported to other areas
               in which tree mapping is required.},
  url       = {http://www.reference-global.com/doi/pdfplus/10.1515/ZFS.2007.021},
  year      = {2007}
}

Christiane Borr, Martina Hielscher-Fastabend and Andy Lücking. 2007. Reliability and Validity of Cervical Auscultation. Dysphagia, 22:225–234.

BibTeX

@article{Borr:Luecking:Hierlscher:2007,
  author    = {Borr, Christiane and Hielscher-Fastabend, Martina and Lücking, Andy},
  title     = {Reliability and Validity of Cervical Auscultation},
  journal   = {Dysphagia},
  volume    = {22},
  pages     = {225--234},
  abstract  = {We conducted a two-part study that contributes to the discussion
               about cervical auscultation (CA) as a scientifically justifiable
               and medically useful tool to identify patients with a high risk
               of aspiration/penetration. We sought to determine (1) acoustic
               features that mark a deglutition act as dysphagic; (2) acoustic
               changes in healthy older deglutition profiles compared with those
               of younger adults; (3) the correctness and concordance of rater
               judgments based on CA; and (4) if education in CA improves individual
               reliability. The first part of the study focused on a comparison
               of the swallow morphology of dysphagic as opposed to healthy subjects
               deglutition in terms of structure properties of the pharyngeal
               phase of deglutition. We obtained the following results. The duration
               of deglutition apnea is significantly higher in the older group
               than in the younger one. Comparing the younger group and the dysphagic
               group we found significant differences in duration of deglutition
               apnea, onset time, and number of gulps. Just one parameter, number
               of gulps, distinguishes significantly between the older and the
               dysphagic groups. The second part of the study aimed at evaluating
               the reliability of CA in detecting dysphagia measured as the concordance
               and the correctness of CA experts in classifying swallowing sounds.
               The interrater reliability coefficient AC1 resulted in a value
               of 0.46, which is to be interpreted as fair agreement. Furthermore,
               we found that comparison with radiologically defined aspiration/penetration
               for the group of experts (speech and language therapists) yielded
               70\% specificity and 94\% sensitivity. We conclude that the swallowing
               sounds contain audible cues that should, in principle, permit
               reliable classification and view CA as an early warning system
               for identifying patients with a high risk of aspiration/penetration;
               however, it is not appropriate as a stand-alone tool.},
  doi       = {10.1007/s00455-007-9078-3},
  issue     = {3},
  pdf       = {http://www.shkim.eu/cborr/ca5manuscript.pdf},
  publisher = {Springer New York},
  url       = {http://dx.doi.org/10.1007/s00455-007-9078-3},
  website   = {http://www.springerlink.com/content/c45578u74r38m4v7/},
  year      = {2007}
}

BibTeX

@article{Mehler:Geibel:Pustylnikov:2007,
  author    = {Mehler, Alexander and Geibel, Peter and Abramov, Olga},
  title     = {Structural Classifiers of Text Types: Towards a Novel Model of
               Text Representation},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {22},
  number    = {2},
  pages     = {51-66},
  abstract  = {Texts can be distinguished in terms of their content, function,
               structure or layout (Brinker, 1992; Bateman et al., 2001; Joachims,
               2002; Power et al., 2003). These reference points do not open
               necessarily orthogonal perspectives on text classification. As
               part of explorative data analysis, text classification aims at
               automatically dividing sets of textual objects into classes of
               maximum internal homogeneity and external heterogeneity. This
               paper deals with classifying texts into text types whose instances
               serve more or less homogeneous functions. Other than mainstream
               approaches, which rely on the vector space model (Sebastiani,
               2002) or some of its descendants (Baeza-Yates and Ribeiro-Neto,
               1999) and, thus, on content-related lexical features, we solely
               refer to structural differentiae. That is, we explore patterns
               of text structure as determinants of class membership. Our starting
               point are tree-like text representations which induce feature
               vectors and tree kernels. These kernels are utilized in supervised
               learning based on cross-validation as a method of model selection
               (Hastie et al., 2001) by example of a corpus of press communication.
               For a subset of categories we show that classification can be
               performed very well by structural differentia only.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_geibel_pustylnikov_2007.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.154.604},
  year      = {2007}
}

Matthias Dehmer and Alexander Mehler. 2007. A New Method of Measuring the Similarity for a Special Class of Directed Graphs. Tatra Mountains Mathematical Publications, 36:39–59.

BibTeX

@article{Dehmer:Mehler:2007:a,
  author    = {Dehmer, Matthias and Mehler, Alexander},
  title     = {A New Method of Measuring the Similarity for a Special Class of Directed Graphs},
  journal   = {Tatra Mountains Mathematical Publications},
  volume    = {36},
  pages     = {39-59},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dehmer_mehler_2004_a.pdf},
  website   = {https://www.researchgate.net/publication/228905939_A_new_method_of_measuring_similarity_for_a_special_class_of_directed_graphs},
  year      = {2007}
}

BibTeX

@article{Dehmer:Emmert:Streib:Mehler:Kilian:2006,
  author    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander
               and Kilian, Jürgen},
  title     = {Measuring the Structural Similarity of Web-based Documents: A Novel Approach},
  journal   = {International Journal of Computational Intelligence},
  volume    = {3},
  number    = {1},
  pages     = {1-7},
  abstract  = {Most known methods for measuring the structural similarity of
               document structures are based on, e.g., tag measures, path metrics
               and tree measures in terms of their DOM-Trees. Other methods measures
               the similarity in the framework of the well known vector space
               model. In contrast to these we present a new approach to measuring
               the structural similarity of web-based documents represented by
               so called generalized trees which are more general than DOM-Trees
               which represent only directed rooted trees. We will design a new
               similarity measure for graphs representing web-based hypertext
               structures. Our similarity measure is mainly based on a novel
               representation of a graph as strings of linear integers, whose
               components represent structural properties of the graph. The similarity
               of two graphs is then defined as the optimal alignment of the
               underlying property strings. In this paper we apply the well known
               technique of sequence alignments to solve a novel and challenging
               problem: Measuring the structural similarity of generalized trees.
               More precisely, we first transform our graphs considered as high
               dimensional objects in linear structures. Then we derive similarity
               values from the alignments of the property strings in order to
               measure the structural similarity of generalized trees. Hence,
               we transform a graph similarity problem to a string similarity
               problem. We demonstrate that our similarity measure captures important
               structural information by applying it to two different test sets
               consisting of graphs representing web-based documents.},
  pdf       = {http://waset.org/publications/15928/measuring-the-structural-similarity-of-web-based-documents-a-novel-approach},
  website   = {http://connection.ebscohost.com/c/articles/24839145/measuring-structural-similarity-web-based-documents-novel-approach},
  year      = {2006}
}

Alexander Mehler and Christian Wolff. 2005. Einleitung: Perspektiven und Positionen des Text Mining. Journal for Language Technology and Computational Linguistics (JLCL), 20(1):1–18.

BibTeX

@article{Mehler:Wolff:2005:b,
  author    = {Mehler, Alexander and Wolff, Christian},
  title     = {Einleitung: Perspektiven und Positionen des Text Mining},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {20},
  number    = {1},
  pages     = {1-18},
  abstract  = {Beitr{\"a}ge zum Thema Text Mining beginnen vielfach mit dem Hinweis
               auf die enorme Zunahme online verfügbarer Dokumente, ob nun im
               Internet oder in Intranets (Losiewicz et al. 2000; Merkl 2000;
               Feldman 2001; Mehler 2001; Joachims \& Leopold 2002). Der hiermit
               einhergehenden „Informationsflut“ wird das Ungenügen des Information
               Retrieval (IR) bzw. seiner g{\"a}ngigen Verfahren der Informationsaufbereitung
               und Informationserschlie{\ss}ung gegenübergestellt. Es wird bem{\"a}ngelt,
               dass sich das IR weitgehend darin erschöpft, Teilmengen von Textkollektionen
               auf Suchanfragen hin aufzufinden und in der Regel blo{\ss} listenförmig
               anzuordnen. Das auf diese Weise dargestellte Spannungsverh{\"a}ltnis
               von Informationsexplosion und Defiziten bestehender IR-Verfahren
               bildet den Hintergrund für die Entwicklung von Verfahren zur automatischen
               Verarbeitung textueller Einheiten, die sich st{\"a}rker an den
               Anforderungen von Informationssuchenden orientieren. Anders ausgedrückt:
               Mit der Einführung der Neuen Medien w{\"a}chst die Bedeutung digitalisierter
               Dokumente als Prim{\"a}rmedium für die Verarbeitung, Verbreitung
               und Verwaltung von Information in öffentlichen und betrieblichen
               Organisationen. Dabei steht wegen der Menge zu verarbeitender
               Einheiten die Alternative einer intellektuellen Dokumenterschlie{\ss}ung
               nicht zur Verfügung. Andererseits wachsen die Anforderung an eine
               automatische Textanalyse, der das klassische IR nicht gerecht
               wird. Der Mehrzahl der hiervon betroffenen textuellen Einheiten
               fehlt die explizite Strukturiertheit formaler Datenstrukturen.
               Vielmehr weisen sie je nach Text- bzw. Dokumenttyp ganz unterschiedliche
               Strukturierungsgrade auf. Dabei korreliert die Flexibilit{\"a}t
               der Organisationsziele negativ mit dem Grad an explizierter Strukturiertheit
               und positiv mit der Anzahl jener Texte und Texttypen (E-Mails,
               Memos, Expertisen, technische Dokumentationen etc.), die im Zuge
               ihrer Realisierung produziert bzw. rezipiert werden. Vor diesem
               Hintergrund entsteht ein Bedarf an Texttechnologien, die ihren
               Benutzern nicht nur „intelligente“ Schnittstellen zur Textrezeption
               anbieten, sondern zugleich auf inhaltsorientierte Textanalysen
               zielen, um auf diese Weise aufgabenrelevante Daten explorieren
               und kontextsensitiv aufbereiten zu helfen. Das Text Mining ist
               mit dem Versprechen verbunden, eine solche Technologie darzustellen
               bzw. sich als solche zu entwickeln. Dieser einheitlichen Problembeschreibung
               stehen konkurrierende Textmining-Spezifikationen gegenüber, was
               bereits die Vielfalt der Namensgebungen verdeutlicht. So finden
               sich neben der Bezeichnung Text Mining (Joachims \& Leopold 2002;
               Tan 1999) die Alternativen • Text Data Mining (Hearst 1999b; Merkl
               2000), • Textual Data Mining (Losiewicz et al. 2000), • Text Knowledge
               Engineering (Hahn \& Schnattinger 1998), Knowledge Discovery in
               Texts (Kodratoff 1999) oder Knowledge Discovery in Textual Databases
               (Feldman \& Dagan 1995). Dabei l{\"a}sst bereits die Namensgebung
               erkennen, dass es sich um Analogiebildungen zu dem (nur unwesentlich
               {\"a}lteren) Forschungsgebiet des Data Mining (DM; als Bestandteil
               des Knowledge Discovery in Databases – KDD) handelt. Diese Namensvielfalt
               findet ihre Entsprechung in widerstreitenden Aufgabenzuweisungen.
               So setzt beispielsweise Sebastiani (2002) Informationsextraktion
               und Text Mining weitgehend gleich, wobei er eine Schnittmenge
               zwischen Text Mining und Textkategorisierung ausmacht (siehe auch
               Dörre et al. 1999). Demgegenüber betrachten Kosala \& Blockeel
               (2000) Informationsextraktion und Textkategorisierung lediglich
               als Teilbereiche des ihrer Ansicht nach umfassenderen Text Mining,
               w{\"a}hrend Hearst (1999a) im Gegensatz hierzu Informationsextraktion
               und Textkategorisierung explizit aus dem Bereich des explorativen
               Text Mining ausschlie{\ss}t.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_wolff_2005_b.pdf},
  website   = {http://epub.uni-regensburg.de/6844/},
  year      = {2005}
}

Alexander Mehler. 2005. Zur textlinguistischen Fundierung der Text- und Korpuskonversion. Sprache und Datenverarbeitung. International Journal for Language Data Processing, 1:29–53.

BibTeX

@article{Mehler:2005:a,
  author    = {Mehler, Alexander},
  title     = {Zur textlinguistischen Fundierung der Text- und Korpuskonversion},
  journal   = {Sprache und Datenverarbeitung. International Journal
                   for Language Data Processing},
  volume    = {1},
  pages     = {29-53},
  abstract  = {Die automatische Konversion von Texten in Hypertexte ist mit der
               Erwartung verbunden, computerbasierte Rezeptionshilfen zu gewinnen.
               Dies betrifft insbesondere die Bew{\"a}ltigung der ungeheuren
               Menge an Fachliteratur im Rahmen der Wissenschaftskommunikation.
               Von einem thematisch relevanten Text zu einem thematisch verwandten
               Text per Hyperlink direkt gelangen zu können, stellt einen Anspruch
               dar, dessen Erfüllung mittels digitaler Bibliotheken n{\"a}her
               gerückt zu sein scheint. Doch wie lassen sich die Kriterien, nach
               denen Texte automatisch verlinkt werden, genauer begründen? Dieser
               Beitrag geht dieser Frage aus der Sicht textlinguistischer Modellbildungen
               nach. Er zeigt, dass parallel zur Entwicklung der Textlinguistik,
               wenn auch mit einer gewissen Verzögerung, Konversionsans{\"a}tze
               entwickelt wurden, die sich jeweils an einer bestimmten Stufe
               des Textbegriffs orientieren. Der Beitrag weist nicht nur das
               diesen Ans{\"a}tzen gemeinsame Fundament in Form der so genannten
               Explikationshypothese nach, sondern verweist zugleich auf grundlegende
               Automatisierungsdefizite, die mit ihnen verbunden sind. Mit systemisch-funktionalen
               Hypertexten wird schlie{\ss}lich ein Ansatz skizziert, der darauf
               zielt, den Anspruch nach textlinguistischer Fundierung und Automatisierbarkeit
               zu vereinen.},
  publisher = {GSCL},
  year      = {2005}
}

Alexander Mehler. 2004. Automatische Synthese Internet-basierter Links für digitale Bibliotheken. Osnabrücker Beiträge zur Sprachtheorie. Themenheft Internetbasierte Kommunikation, 68:31–53.

BibTeX

@article{Mehler:2004:b,
  author    = {Mehler, Alexander},
  title     = {Automatische Synthese Internet-basierter Links für digitale Bibliotheken},
  journal   = {Osnabrücker Beitr{\"a}ge zur Sprachtheorie.
                   Themenheft Internetbasierte Kommunikation},
  volume    = {68},
  pages     = {31-53},
  abstract  = {Dieser Beitrag behandelt Verfahren zur automatischen Erzeugung
               von Hyperlinks, wie sie im WWW für die Informationssuche bereitstehen.
               Dabei steht die Frage im Vordergrund, auf welche Weise bestehende
               Verfahren suchrelevante Dokumente bestimmen und von diesen aus
               inhaltsverwandte Dokumente verlinken. Dieser Gegenstand verbindet
               den Bereich des klassischen Information Retrievals (IR) mit einem
               Anwendungsgebiet, das in der Wissenschaftskommunikation unter
               dem Stichwort der digitalen Bibliothek unter Nutzbarmachung des
               Hyperlink-basierten Browsings firmiert. Ein Beispiel hierfür bildet
               die digitale Bibliothek CiteSeer (Lawrence et al. 1999), welche
               das Boolesche Retrieval dadurch erweitert, dass ausgehend von
               Treffern einer Suche jene Dokumente per Link angesteuert werden
               können, welche die aufgefundenen Dokumente zitieren oder von diesen
               zitiert werden. CiteSeer ist also ein System, welches das Schlagwort-basierte
               Querying im Rahmen des klassischen IRs mit dem Hypertext-basierten
               Browsing von Zitaten verknüpft, und zwar zu dem Zweck, die Suche
               wissenschaftlicher Dokumente zu erleichtern. Darüber hinaus verwendet
               es die unter dem Stichwort des Vektorraummodells bekannt gewordene
               Technologie für den wortbasierten Vergleich von Texten. Der Beitrag
               setzt an dieser Stelle an. Er argumentiert, dass Verfahren bereitstehen,
               welche die Anforderung nach inhaltsorientiertem Retrieval mit
               dem inhaltsorientierten Browsing verbinden, mit der Forderung
               also, dass Hyperlinks, die E-Texte als digitalisierte Versionen
               von (wissenschaftlichen) Dokumenten verknüpfen (Storrer 2002),
               Inhalts- und nicht nur Zitat-basiert sind.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2004_b.pdf},
  year      = {2004}
}

Alexander Mehler. 2003. Methodological Aspects of Computational Semiotics. SEED Journal, 3(3):71–80.

BibTeX

@article{Mehler:2003:b,
  author    = {Mehler, Alexander},
  title     = {Methodological Aspects of Computational Semiotics},
  journal   = {SEED Journal},
  volume    = {3},
  number    = {3},
  pages     = {71-80},
  abstract  = {In the following, elementary constituents of models in computational
               semiotics are outlined. This is done by referring to computer
               simulations as a framework which neither aims to describe artificial
               sign systems (as done in computer semiotics), nor to realize semiotic
               functions in “artificial worlds” (as proposed in “artificial semiosis”).
               Rather, the framework referred to focuses on preconditions of
               computer-based simulations of semiotic processes. Following this
               approach, the paper focuses on methodological aspects of computational
               semiotics.},
  year      = {2003}
}

Alexander Mehler. 2003. Ein Kompositionalitätsprinzip für numerische Textsemantiken. Journal for Language Technology and Computational Linguistics (JLCL), 18(1-2):321–337.

BibTeX

@article{Mehler:2003:c,
  author    = {Mehler, Alexander},
  title     = {Ein Kompositionalit{\"a}tsprinzip für numerische Textsemantiken},
  journal   = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  volume    = {18},
  number    = {1-2},
  pages     = {321-337},
  abstract  = {Der Beitrag beschreibt eine Variante des Kompositionalit{\"a}tsprinzips
               der Bedeutung als Grundprinzip für die numerische Analyse unsystematischer
               Sinnrelationen komplexer Zeichen, das über das Ph{\"a}nomen der
               perspektivischen Interpretation hinaus gebrauchssemantische Bedeutungsaspekte
               berücksichtigt. Ziel ist es, ein theoretisches Fundament für korpusanalytische
               Ans{\"a}tze in der Semantik, die oftmals die linguistische Interpretierbarkeit
               ihrer Analyseergebnisse vermissen lassen, zu umrei{\ss}en. Die
               Spezifikation des Kompositionalit{\"a}tsprinzips erfolgt unter
               Rekurs auf das Modell eines hierarchisch geordneten Constraint-Satisfaction-Prozesses.
               Hiermit ist das l{\"a}ngerfristige Ziel verbunden, das Problem
               einer defizit{\"a}ren numerischen Textrepr{\"a}sentation sowie
               die mangelnde Integration von propositionaler und strukturaler
               bzw. korpusanalytischer Semantik anzugehen. Die Erörterungen dieses
               Beitrags sind prim{\"a}r konzeptioneller Natur; sie betreffen
               die Konzeption einer numerischen Textsemantik zur Vermeidung von
               Defiziten bestehender Ans{\"a}tze.},
  pdf       = {http://media.dwds.de/jlcl/2003_Doppelheft/321-337_Mehler.pdf},
  year      = {2003}
}

Alexander Mehler. 2002. Components of a Model of Context-Sensitive Hypertexts. Journal of Universal Computer Science (J.UCS), 8(10):924–943.

BibTeX

@article{Mehler:2002:l,
  author    = {Mehler, Alexander},
  title     = {Components of a Model of Context-Sensitive Hypertexts},
  journal   = {Journal of Universal Computer Science (J.UCS)},
  volume    = {8},
  number    = {10},
  pages     = {924-943},
  abstract  = {On the background of rising Intranet applications the automatic
               generation of adaptable, context-sensitive hypertexts becomes
               more and more important [El-Beltagy et al., 2001]. This observation
               contradicts the literature on hypertext authoring, where Information
               Retrieval techniques prevail, which disregard any linguistic and
               context-theoretical underpinning. As a consequence, resulting
               hypertexts do not manifest those schematic structures, which are
               constitutive for the emergence of text types and the context-mediated
               understanding of their instances, i.e. natural language texts.
               This paper utilizes Systemic Functional Linguistics (SFL) and
               its context model as a theoretical basis of hypertext authoring.
               So called Systemic Functional Hypertexts (SFHT) are proposed,
               which refer to a stratified context layer as the proper source
               of text linkage. The purpose of this paper is twofold: First,
               hypertexts are reconstructed from a linguistic point of view as
               a kind of supersign, whose constituents are natural language texts
               and whose structuring is due to intra- and intertextual coherence
               relations and their context-sensitive interpretation. Second,
               the paper prepares a formal notion of SFHTs as a first step towards
               operationalization of fundamental text linguistic concepts. On
               this background, SFHTs serve to overcome the theoretical poverty
               of many approaches to link generation.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_components_2002.pdf},
  website   = {http://www.jucs.org/jucs_8_10/components_of_a_model},
  year      = {2002}
}

Alexander Mehler. 2002. Hierarchical Analysis of Text Similarity Data. Künstliche Intelligenz (KI), 2:12–16.

BibTeX

@article{Mehler:2002:a,
  author    = {Mehler, Alexander},
  title     = {Hierarchical Analysis of Text Similarity Data},
  journal   = {Künstliche Intelligenz (KI)},
  volume    = {2},
  pages     = {12-16},
  abstract  = {Semantic spaces are used as a representational format for modeling
               similarities of signs. As a multidimensional data structure they
               are bound to the question of how to explore similarity relations
               of signs mapped onto them. This paper introduces an abstract data
               structure called dependency scheme as a formal format which encapsulates
               two types of order relations, whose variable instatiation allows
               to derive different classes of trees for the hierarchial analysis
               of text similarity data derived from semantic spaces.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2002_a.pdf},
  year      = {2002}
}

Alexander Mehler. 2001. Aspects of Text Mining. From Computational Semiotics to Systemic Functional Hypertexts. Australasian Journal of Information Systems (AJIS), 8(2):129–141.

BibTeX

@article{Mehler:2001:b,
  author    = {Mehler, Alexander},
  title     = {Aspects of Text Mining. From Computational Semiotics to Systemic
               Functional Hypertexts},
  journal   = {Australasian Journal of Information Systems (AJIS)},
  volume    = {8},
  number    = {2},
  pages     = {129-141},
  abstract  = {The significance of natural language texts as the prime information
               structure for the management and dissemination of knowledge in
               organisations is still increasing. Making relevant documents available
               depending on varying tasks in different contexts is of primary
               importance for any efficient task completion. Implementing this
               demand requires the content based processing of texts, which enables
               to reconstruct or, if necessary, to explore the relationship of
               task, context and document. Text mining is a technology that is
               suitable for solving problems of this kind. In the following,
               semiotic aspects of text mining are investigated. Based on the
               primary object of text mining - natural language lexis - the specific
               complexity of this class of signs is outlined and requirements
               for the implementation of text mining procedures are derived.
               This is done with reference to text linkage introduced as a special
               task in text mining. Text linkage refers to the exploration of
               implicit, content based relations of texts (and their annotation
               as typed links in corpora possibly organised as hypertexts). In
               this context, the term systemic functional hypertext is introduced,
               which distinguishes genre and register layers for the management
               of links in a poly-level hypertext system},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Mehler_AJIS-2001.pdf},
  website   = {http://journal.acs.org.au/index.php/ajis/article/view/249/220},
  year      = {2001}
}

Alexander Mehler. 1996. A Multiresolutional Approach to Fuzzy Text Meaning. Journal of Quantitative Linguistics, 3(2):113–127.

BibTeX

@article{Mehler:1996:b,
  author    = {Mehler, Alexander},
  title     = {A Multiresolutional Approach to Fuzzy Text Meaning},
  journal   = {Journal of Quantitative Linguistics},
  volume    = {3},
  number    = {2},
  pages     = {113-127},
  abstract  = {In diesem Beitrag beschreiben wir den eHumanities Desktop3. Es
               handelt sich dabei um eine rein webbasierte Umgebung für die texttechnologische
               Arbeit mit Korpora, welche von der standardisierten Repr{\"a}sentation
               textueller Einheiten über deren computerlinguistische Vorverarbeitung
               bis hin zu Text Mining–Funktionalit{\"a}ten eine gro{\ss}e Zahl
               von Werkzeugen integriert. Diese Integrationsleistung betrifft
               neben den Textkorpora und den hierauf operierenden texttechnologischen
               Werkzeugen auch die je zum Einsatz kommenden lexikalischen Ressourcen.
               Aus dem Blickwinkel der geisteswissenschaftlichen Fachinformatik
               gesprochen fokussiert der Desktop somit darauf, eine Vielzahl
               heterogener sprachlicher Ressourcen mit grundlegenden texttechnologischen
               Methoden zu integrieren, und zwar so, dass das Integrationsresultat
               auch in den H{\"a}nden von Nicht–Texttechnologen handhabbar bleibt.
               Wir exemplifizieren diese Handhabung an einem Beispiel aus der
               historischen Semantik, und damit an einem Bereich, der erst in
               jüngerer Zeit durch die Texttechnologie erschlossen wird.},
  year      = {1996}
}

Bachelor's Theses

Patrick Schrottenbacher. 2024. BA Thesis: Identifying toxic behaviour in online games. Goethe University.

BibTeX

@bathesis{schrottenbacher:2024,
  author    = {Patrick Schrottenbacher},
  title     = {Identifying toxic behaviour in online games},
  institution = {Goethe University},
  pages     = {35},
  year      = {2024},
  url       = {https://publikationen.ub.uni-frankfurt.de/files/81676/Toxic_video_game_classification.pdf}
  repository = {https://github.com/TheBv/toxic-video-games-gnn}
}

Kevin Bönisch. 2023. BA Thesis: Dialog generation using language models. Goethe University.

BibTeX

@bathesis{boenisch:2023,
  author    = {Kevin B{\"o}nisch},
  title     = {Dialog generation using language models},
  institution = {Goethe University},
  pages     = {28},
  year      = {2023},
  url       = {https://publikationen.ub.uni-frankfurt.de/opus4/frontdoor/index/index/docId/79165},
  repository = {https://github.com/texttechnologylab/ROBERT}
}

Daniel Baumartz. June, 2020. BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle Studie.

BibTeX

@bathesis{Baumartz:2020,
  author    = {Baumartz, Daniel},
  title     = {{Automatic Topic Modeling in the Context of Digital Libraries:
               Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
               experimentelle Studie}},
  year      = {2020},
  month     = {6},
  school    = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
  address   = {Frankfurt, Germany},
  url       = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}

Books

BibTeX

@book{Zlatkin-Troitschanskaia:et:al:2024,
  title     = {Students’, Graduates’ and Young Professionals’ Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  isbn      = {9783031695100},
  url       = {http://dx.doi.org/10.1007/978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0},
  publisher = {Springer Cham},
  year      = {2024},
  abstract  = {This book addresses the topic of online information for everyday
               personal and professional use by students, graduates, and young
               professionals. It focuses on the development of the job-related
               use of online information by young professionals in their practical
               phases of education (traineeship/practical year) in the domains
               of law, teaching, and medicine. The research conducted in this
               context investigates the general and domain-specific use of online
               resources in educational contexts and examines the effectiveness
               of an innovative digital training approach in enhancing skills
               required for the competent use of online information. For this
               purpose, the presented research uses a yet unprecedented approach
               of data triangulation, in which self-rated data, digitally and
               in vivo assessed response process data and expert ratings are
               integrated into a theoretically founded assessment framework and
               are examined from various interdisciplinary perspectives with
               different analysis methods. Overall, this work addresses key research
               questions related to the use of online information in practical
               tasks as well as to the impact of digital training. It provides
               in-depth multidisciplinary analyses of multimodal processes and
               performance data, allowing implications equally relevant for practitioners,
               policymakers, and researchers in the field of education.}
}

BibTeX

@book{Biemann:Mehler:2015,
  editor    = {Biemann, Chris and Mehler, Alexander},
  title     = {{Text Mining: From Ontology Learning to Automated Text Processing
               Applications. Festschrift in Honor of Gerhard Heyer}},
  publisher = {Springer},
  series    = {Theory and Applications of Natural Language Processing},
  address   = {Heidelberg},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextMiningsmall.jpg},
  year      = {2015}
}

BibTeX

@book{Mehler:Luecking:Banisch:Blanchard:Frank-Job:2015,
  editor    = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
               and Frank-Job, Barbara},
  title     = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
  publisher = {Springer},
  series    = {Understanding Complex Systems},
  adress    = {Berlin and New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/UCS_17-2-tmp.png},
  isbn      = {978-36-662-47237-8},
  year      = {2015}
}

BibTeX

@book{Schneider:Storrer:Mehler:2013,
  author    = {Mehler, Alexander and Schneider, Roman and Storrer, Angelika},
  editor    = {Roman Schneider and Angelika Storrer and Alexander Mehler},
  title     = {Webkorpora in Computerlinguistik und Sprachforschung},
  publisher = {JLCL},
  volume    = {28},
  number    = {2},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Webkorpora-300-20.png},
  issn      = {2190-6858},
  pagetotal = {107},
  pdf       = {http://www.jlcl.org/2013_Heft2/H2013-2.pdf},
  year      = {2013}
}

BibTeX

@book{FrankJob:Mehler:Sutter:2013,
  editor    = {Barbara Frank-Job and Alexander Mehler and Tilmann Sutter},
  title     = {Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden
               und empirische Untersuchungen an Beispielen des WWW},
  publisher = {Springer VS},
  address   = {Wiesbaden},
  abstract  = {In diesem Band pr{\"a}sentieren Medien- und Informationswissenschaftler,
               Netzwerkforscher aus Informatik, Texttechnologie und Physik, Soziologen
               und Linguisten interdisziplin{\"a}r Aspekte der Erforschung komplexer
               Mehrebenen-Netzwerke. Im Zentrum ihres Interesses stehen Untersuchungen
               zum Zusammenhang zwischen sozialen und sprachlichen Netzwerken
               und ihrer Dynamiken, aufgezeigt an empirischen Beispielen aus
               dem Bereich des Web 2.0, aber auch an historischen Dokumentenkorpora
               sowie an Rezeptions-Netzwerken aus Kunst- und Literaturwissenschaft.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/DieDynamikSozialerUndSprachlicherNetzwerke.jpg},
  pagetotal = {240},
  year      = {2013}
}

Andy Lücking. 2013. Ikonische Gesten. Grundzüge einer linguistischen Theorie. De Gruyter. Zugl. Diss. Univ. Bielefeld (2011).

BibTeX

@book{Luecking:2013,
  author    = {Lücking, Andy},
  title     = {Ikonische Gesten. Grundzüge einer linguistischen Theorie},
  publisher = {De Gruyter},
  address   = {Berlin and Boston},
  note      = {Zugl. Diss. Univ. Bielefeld (2011)},
  abstract  = {Nicht-verbale Zeichen, insbesondere sprachbegleitende Gesten,
               spielen eine herausragende Rolle in der menschlichen Kommunikation.
               Um eine Analyse von Gestik innerhalb derjenigen Disziplinen, die
               sich mit der Erforschung und Modellierung von Dialogen besch{\"a}ftigen,
               zu ermöglichen, bedarf es einer entsprechenden linguistischen
               Rahmentheorie. „Ikonische Gesten“ bietet einen ersten zeichen-
               und wahrnehmungstheoretisch motivierten Rahmen an, in dem eine
               grammatische Analyse der Integration von Sprache und Gestik möglich
               ist. Ausgehend von einem Abriss semiotischer Zug{\"a}nge zu ikonischen
               Zeichen wird der vorherrschende {\"A}hnlichkeitsansatz unter Rückgriff
               auf Wahrnehmungstheorien zugunsten eines Exemplifikationsansatzes
               verworfen. Exemplifikation wird im Rahmen einer unifikationsbasierten
               Grammatik umgesetzt. Dort werden u.a. multimodale Wohlgeformtheit,
               Synchronie und multimodale Subkategorisierung als neue Gegenst{\"a}nde
               linguistischer Forschung eingeführt und im Rahmen einer integrativen
               Analyse von Sprache und Gestik modelliert.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/ikonischeGesten.jpg},
  year      = {2013}
}

Alexander Mehler and Laurent Romary. 2012. Handbook of Technical Communication. De Gruyter Mouton.

BibTeX

@book{Mehler:Romary:2012,
  author    = {Mehler, Alexander and Romary, Laurent},
  title     = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  address   = {Berlin},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/HandbookTechnicalCommunication.jpg},
  pagetotal = {839},
  year      = {2012}
}

BibTeX

@book{Hoenen:Jügel:2012,
  author    = {Hoenen, Armin and Jügel, Thomas},
  editor    = {Armin Hoenen and Thomas Jügel},
  title     = {Altüberlieferte Sprachen als Gegenstand der Texttechnologie --
               Ancient Languages as the Object of Text Technology},
  publisher = {JLCL},
  volume    = {27},
  number    = {2},
  abstract  = {‘Avestan’ is the name of the ritual language of Zor oastrianism,
               which was the state religion of the Iranian empire in Achaemenid,
               Arsacid and Sasanid times, covering a time span of more than 1200
               years. [1] It is named after the ‘Avesta’, i.e., the collection
               of holy scriptures that form the basis of the religion which was
               allegedly founded by Zarathushtra, also known as Zoroaster, by
               about the beginning of the first millennium B.C. Together with
               Vedic Sanskrit, Avestan represents one of the most archaic witnesses
               of the Indo-Iranian branch of the Indo-European languages, which
               makes it especially interesting for historical-comparative linguistics.
               This is why the texts of the Avesta were among the first objects
               of electronic corpus building that were undertaken in the framework
               of Indo-European studies, leading to the establishment of the
               TITUS database (‘Thesaurus indogermanischer Text- u nd Sprachmaterialien’).
               [2] Today, the complete Avestan corpus is available, together
               with elaborate search functions [3] and an extended version of
               the subcorpus of the so-called ‘Yasna’, which covers a great deal
               of the attestation of variant readings. [4] Right from the beginning
               of their computational work concerning the Avesta, the compilers
               [5] had to cope with the fact that the texts contained in it have
               been transmitted in a special script written from right to left,
               which was also used for printing them in the scholarly editions
               used until today. [6] It goes without saying that there was no
               way in the middle of the 1980s to encode the Avestan scriptures
               exactly as they are found in the manuscripts. Instead, we had
               to rely upon transcriptional devices that were dictated by the
               restrictions of character encoding as provided by the computer
               systems used. As the problems we had to face in this respect and
               the solutions we could apply are typical for the development of
               computational work on ancient languages, it seems worthwhile to
               sketch them out here.},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AltueberlieferteSprachen-300-20.png},
  issn      = {2190-6858},
  pdf       = {http://www.jlcl.org/2012_Heft2/H2012-2.pdf},
  year      = {2012}
}

Tim vor der Brück. 2012. Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen semantischen Repräsentationen. Springer.

BibTeX

@book{vor:der:Brueck:2012:a,
  author    = {vor der Brück, Tim},
  title     = {Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen
               semantischen Repr{\"a}sentationen},
  publisher = {Springer},
  address   = {Heidelberg, Germany},
  abstract  = {Eine gro{\ss}e Wissensbasis ist eine Voraussetzung für eine Vielzahl
               von Anwendungen im Bereich der automatischen Sprachverarbeitung,
               wie Frage-Antwort- oder Information-Retrieval-Systeme. Ein Mensch
               hat sich das erforderliche Wissen, um Informationen zu suchen
               oder Fragen zu beantworten, im Laufe seines Lebens angeeignet.
               Einem Computer muss dieses Wissen explizit mitgeteilt werden.
               Tim vor der Brück beschreibt einen Ansatz, wie ein Computer dieses
               Wissen {\"a}hnlich wie ein Mensch durch die Lektüre von Texten
               erwerben kann. Dabei kommen Methoden der Logik und des maschinellen
               Lernens zum Einsatz.},
  school    = {FernUniversit{\"a}t in Hagen},
  year      = {2012}
}

Matthias Dehmer, Frank Emmert-Streib and Alexander Mehler, eds. 2011. Towards an Information Theory of Complex Networks: Statistical Methods and Applications. Birkhäuser.

BibTeX

@book{Dehmer:EmmertStreib:Mehler:2009:a,
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
  title     = {Towards an Information Theory of Complex Networks: Statistical
               Methods and Applications},
  publisher = {Birkh{\"a}user},
  address   = {Boston/Basel},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/InformationTheoryComplexNetworks.jpg},
  pagetotal = {395},
  website   = {http://link.springer.com/book/10.1007/978-0-8176-4904-3/page/1},
  year      = {2011}
}

BibTeX

@book{Mehler:Kuehnberger:Lobin:Luengen:Storrer:Witt:2011,
  author    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  title     = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/modelingLearningAndProcessing-medium.jpg},
  pagetotal = {400},
  website   = {/books/texttechnologybook/},
  year      = {2011}
}

Ulli Waltinger. 2011. On Social Semantics in Information Retrieval. Südwestdeutscher Verlag für Hochschulschriften. Zugl. Diss Univ. Bielefeld (2010).

BibTeX

@book{Waltinger:2011,
  author    = {Waltinger, Ulli},
  title     = {On Social Semantics in Information Retrieval},
  publisher = {Südwestdeutscher Verlag für Hochschulschriften},
  address   = {Saarbrücken},
  note      = {Zugl. Diss Univ. Bielefeld (2010)},
  abstract  = {In this thesis we analyze the performance of social semantics
               in textual information retrieval. By means of collaboratively
               constructed knowledge derived from web-based social networks,
               inducing both common-sense and domain-specific knowledge as constructed
               by a multitude of users, we will establish an improvement in performance
               of selected tasks within different areas of information retrieval.
               This work connects the concepts and the methods of social networks
               and the semantic web to support the analysis of a social semantic
               web that combines human intelligence with machine learning and
               natural language processing. In this context, social networks,
               as instances of the social web, are capable in delivering social
               network data and document collections on a tremendous scale, inducing
               thematic dynamics that cannot be achieved by traditional expert
               resources. The question of an automatic conversion, annotation
               and processing, however, is central to the debate of the benefits
               of the social semantic web. Which kind of technologies and methods
               are available, adequate and contribute to the processing of this
               rapidly rising flood of information and at the same time being
               capable of using the wealth of information in this large, but
               more importantly decentralized internet. The present work researches
               the performance of social semantic-induced categorization by means
               of different document models. We will shed light on the question,
               to which level social networks and social ontologies contribute
               to selected areas within the information retrieval area, such
               as automatically determining term and text associations, identifying
               topics, text and web genre categorization, and also the domain
               of sentiment analysis. We will show in extensive evaluations,
               comparing the classical apparatus of text categorization -- Vector
               Space Model, Latent Semantic Analysis and Support Vector Maschine
               -- that significant improvements can be obtained by considering
               the collaborative knowledge derived from the social web.},
  pdf       = {https://pub.uni-bielefeld.de/download/2302025/2302028},
  website   = {http://www.ulliwaltinger.de/on-social-semantics-in-information-retrieval/},
  year      = {2011}
}

Alexander Mehler, Serge Sharoff and Marina Santini. 2010. Genres on the Web: Computational Models and Empirical Studies. Ed. by Alexander Mehler, Serge Sharoff and Marina Santini. Springer.

BibTeX

@book{Mehler:Sharoff:Santini:2010:a,
  author    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  editor    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  title     = {Genres on the Web: Computational Models and Empirical Studies},
  publisher = {Springer},
  address   = {Dordrecht},
  abstract  = {The volume 'Genres on the Web' has been designed for a wide audience,
               from the expert to the novice. It is a required book for scholars,
               researchers and students who want to become acquainted with the
               latest theoretical, empirical and computational advances in the
               expanding field of web genre research. The study of web genre
               is an overarching and interdisciplinary novel area of research
               that spans from corpus linguistics, computational linguistics,
               NLP, and text-technology, to web mining, webometrics, social network
               analysis and information studies. This book gives readers a thorough
               grounding in the latest research on web genres and emerging document
               types. The book covers a wide range of web-genre focussed subjects,
               such as: -The identification of the sources of web genres -Automatic
               web genre identification -The presentation of structure-oriented
               models -Empirical case studies One of the driving forces behind
               genre research is the idea of a genre-sensitive information system,
               which incorporates genre cues complementing the current keyword-based
               search and retrieval applications.},
  booktitle = {Genres on the Web: Computational Models and Empirical Studies},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/GenresOnTheWeb.jpg},
  pagetotal = {376},
  review    = {http://www.springerlink.com/content/ym07440380524721/},
  website   = {http://www.springer.com/computer/ai/book/978-90-481-9177-2},
  year      = {2010}
}

BibTeX

@book{Sutter:Mehler:2010,
  author    = {Sutter, Tilmann and Mehler, Alexander},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  title     = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
               zum Web 2.0},
  publisher = {Verlag für Sozialwissenschaften},
  address   = {Wiesbaden},
  abstract  = {Die Beitr{\"a}ge des Bandes untersuchen den Medienwandel von frühen
               europ{\"a}ischen Medienkulturen bis zu aktuellen Formen der Internetkommunikation
               unter soziologischer, kulturwissenschaftlicher und linguistischer
               Perspektive. Zwar haben sich die Massenmedien von den Beschr{\"a}nkungen
               sozialer Interaktionen gelöst, sie weisen dem Publikum aber eine
               distanzierte, blo{\ss} rezipierende Rolle zu. Dagegen eröffnen
               neue Formen 'interaktiver' Medien gesteigerte Möglichkeiten der
               Rückmeldung und der Mitgestaltung für die Nutzer. Der vorliegende
               Band fragt nach der Qualit{\"a}t dieses Medienwandels: Werden
               Medien tats{\"a}chlich interaktiv? Was bedeutet die Interaktivit{\"a}t
               neuer Medien? Werden die durch neue Medien eröffneten Beteiligungsmöglichkeiten
               realisiert?},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Medienwandel.jpg},
  pagetotal = {289},
  website   = {http://www.springer.com/de/book/9783531156422},
  year      = {2010}
}

BibTeX

@book{Santini:Rehm:Sharoff:Mehler:2009,
  author    = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
  editor    = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
  title     = {Automatic Genre Identification: Issues and Prospects},
  publisher = {GSCL},
  volume    = {24(1)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AutomaticGenreIdentification.png},
  pagetotal = {148},
  pdf       = {http://www.jlcl.org/2009_Heft1/JLCL24(1).pdf},
  year      = {2009}
}

BibTeX

@book{Luengen:Mehler:Storrer:2008:a,
  author    = {Mehler, Alexander},
  editor    = {Lüngen, Harald and Mehler, Alexander and Storrer, Angelika},
  title     = {Lexical-Semantic Resources in Automated Discourse Analysis},
  publisher = {GSCL},
  volume    = {23(2)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/LexicalSemanticResources-300-20.png},
  pagetotal = {111},
  pdf       = {{http://www.jlcl.org/2008_Heft2/JLCL23(2).pdf}},
  website   = {https://www.researchgate.net/publication/228956889_Lexical-Semantic_Resources_in_Automated_Discourse_Analysis},
  year      = {2008}
}

BibTeX

@book{Mehler:Koehler:2007:a,
  author    = {Mehler, Alexander and Köhler, Reinhard},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  title     = {Aspects of Automatic Text Analysis: Festschrift in Honor of Burghard Rieger},
  publisher = {Springer},
  series    = {Studies in Fuzziness and Soft Computing},
  address   = {Berlin/New York},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AspectsOfAutomaticTextAnalysis.jpg},
  pagetotal = {464},
  review    = {http://www.degruyter.com/view/j/zrs.2011.3.issue-2/zrs.2011.050/zrs.2011.050.xml},
  review2   = {http://irsg.bcs.org/informer/Informer27.pdf},
  website   = {http://www.springer.com/de/book/9783540375203},
  year      = {2007}
}

Alexander Mehler. 2005. Korpuslinguistik. Ed. by Alexander Mehler.Journal for Language Technology and Computational Linguistics (JLCL), 20(2).

BibTeX

@book{Mehler:2005:e,
  author    = {Mehler, Alexander},
  editor    = {Mehler, Alexander},
  title     = {Korpuslinguistik},
  volume    = {20(2)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Korpuslinguistik.png},
  pagetotal = {97},
  website   = {http://www.jlcl.org/2005_Heft2/LDV_Forum_Band_20_Heft_2.pdf},
  year      = {2005}
}

Alexander Mehler and Christian Wolff. 2005. Text Mining. Ed. by Alexander Mehler and Christian Wolff.Journal for Language Technology and Computational Linguistics (JLCL), 20(1). GSCL.

BibTeX

@book{Mehler:Wolff:2005:a,
  author    = {Mehler, Alexander and Wolff, Christian},
  editor    = {Mehler, Alexander and Wolff, Christian},
  title     = {Text Mining},
  publisher = {GSCL},
  volume    = {20(1)},
  series    = {Journal for Language Technology and Computational
                   Linguistics (JLCL)},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextMining.png},
  pagetotal = {143},
  website   = {http://www.jlcl.org/2005_Heft1/LDV-Forum1.2005.pdf},
  year      = {2005}
}

BibTeX

@book{Mehler:Lobin:2004:a,
  author    = {Mehler, Alexander and Lobin, Henning},
  editor    = {Mehler, Alexander and Lobin, Henning},
  title     = {Automatische Textanalyse. Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  address   = {Wiesbaden},
  pagetotal = {290},
  website   = {http://www.v-r.de/de/Mehler-Lobin-Automatische-Textanalyse/t/352526527/},
  year      = {2004}
}

BibTeX

@book{Mehler:2001:a,
  author    = {Mehler, Alexander},
  title     = {Textbedeutung. Zur prozeduralen Analyse und Repr{\"a}sentation
               struktureller {\"A}hnlichkeiten von Texten / Text Meaning – Procedural
               Analysis and Representation of Structural Similarities of Texts},
  publisher = {Peter Lang},
  volume    = {5},
  series    = {Computer Studies in Language and Speech},
  address   = {Frankfurt a. M.},
  note      = {Zugl. Diss. Univ. Trier},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/38648_cover_front.jpg},
  pagetotal = {401},
  website   = {https://www.peterlang.com/view/product/39259?tab=toc&format=PBK},
  year      = {2001}
}

Collections

Armin Hoenen, Alexander Mehler and Jost Gippert. 2016. Corpora and Resources for (Historical) Low Resource Languages. 31(2). JLCL.

BibTeX

@collection{GSCL:JLCL:2016:2,
  bibsource = {GSCL, http://www.gscl.info/},
  editor    = {Armin Hoenen and Alexander Mehler and Jost Gippert},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2017/10/Titelblatt-Heft2-2016.png},
  issn      = {2190-6858},
  number    = {2},
  pdf       = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
  publisher = {JLCL},
  title     = {{Corpora and Resources for (Historical) Low Resource Languages}},
  volume    = {31},
  year      = {2016}
}

In Books

BibTeX

@inbook{Mehler:et:al:2024:a,
  author    = {Mehler, Alexander and Bagci, Mevl{\"u}t and Schrottenbacher, Patrick
               and Henlein, Alexander and Konca, Maxim and Abrami, Giuseppe and B{\"o}nisch, Kevin
               and Stoeckel, Manuel and Spiekermann, Christian and Engel, Juliane},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  title     = {Towards New Data Spaces for the Study of Multiple Documents with
               Va.Si.Li-Lab: A Conceptual Analysis},
  booktitle = {Students', Graduates' and Young Professionals' Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  year      = {2024},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {259--303},
  abstract  = {The constitution of multiple documents has so far been studied
               essentially as a process in which a single learner consults a
               number (of segments) of different documents in the context of
               the task at hand in order to construct a mental model for the
               purpose of completing the task. As a result of this research focus,
               the constitution of multiple documents appears predominantly as
               a monomodal, non-interactive process in which mainly textual units
               are studied, supplemented by images, text-image relations and
               comparable artifacts. This approach is reflected in the contextual
               fixity of the research design, in which the learners under study
               search for information using suitably equipped computers. If,
               on the other hand, we consider the openness of multi-agent learning
               situations, this scenario lacks the aspects of interactivity,
               contextual openness and, above all, the multimodality of information
               objects, information processing and information exchange. This
               is where the chapter comes in. It describes Va.Si.Li-Lab as an
               instrument for multimodal measurement for studying and modeling
               multiple documents in the context of interactive learning in a
               multi-agent environment. To this end, the chapter places Va.Si.Li-Lab
               in the spectrum of evolutionary approaches that vary the combination
               of human and machine innovation and selection. It also combines
               the requirements of multimodal representational learning with
               various aspects of contextual plasticity to prepare Va.Si.Li-Lab
               as a system that can be used for experimental research. The chapter
               is conceptual in nature, designing a system of requirements using
               the example of Va.Si.Li-Lab to outline an experimental environment
               in which the study of Critical Online Reasoning (COR) as a group
               process becomes possible. Although the chapter illustrates some
               of these requirements with realistic data from the field of simulation-based
               learning, the focus is still conceptual rather than experimental,
               hypothesis-driven. That is, the chapter is concerned with the
               design of a technology for future research into COR processes.},
  isbn      = {978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0_12},
  url       = {https://doi.org/10.1007/978-3-031-69510-0_12},
  keywords  = {core, core_c08}
}

BibTeX

@inbook{Konca:et:al:2024:a,
  author    = {Konca, Maxim and Mehler, Alexander and L{\"u}cking, Andy and Baumartz, Daniel},
  editor    = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
               and Mehler, Alexander},
  title     = {Visualizing Domain-specific and Generic Critical Online Reasoning
               Related Structures of Online Texts: A Hybrid Approach},
  booktitle = {Students', Graduates' and Young Professionals' Critical Use of
               Online Information: Digital Performance Assessment and Training
               within and across Domains},
  year      = {2024},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {195--239},
  abstract  = {Besides ``traditional'' educational media, young professionals
               in higher education use the Internet to obtain information. To
               utilize their online research in professional contexts, they critically
               evaluate the information they access and its sources. One dimension
               of this evaluation is an assessment of the linguistic state of
               the online sources, either implicitly or explicitly. This computational
               educational linguistic study applies methods from computational
               linguistics to online sources visited by young professionals from
               three fields (law students, teacher trainees, and medicine student)
               and develops partly novel visualizations that allow to quickly
               discover similarities as well as differences between multi-heterogeneous
               Internet sources, that is, sources that exhibit various topics,
               genres, and textual structure, among others. The visualizations
               also allow a comparison of search behaviour between different
               professional fields. In this way, we found that (1) genre classification
               has a significant impact on reliability scores, (2) young professionals'
               search approaches vary by their professional field, and, (3) the
               best predictor of reliability is indeed the linguistic profile
               of an online source.},
  isbn      = {978-3-031-69510-0},
  doi       = {10.1007/978-3-031-69510-0_10},
  url       = {https://doi.org/10.1007/978-3-031-69510-0_10}
}

BibTeX

@inbook{Abrami:et:al:2020,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian
               and Kett, Attila and L{\"o}{\"o}ck, Simon and Schwarz, Lukas},
  editor    = {Daniela, Linda},
  title     = {Educational Technologies in the area of ubiquitous historical
               computing in virtual reality},
  booktitle = {New Perspectives on Virtual and Augmented Reality: Finding New
               Ways to Teach in a Transformed Learning Environment},
  year      = {2020},
  publisher = {Taylor \& Francis},
  abstract  = {At ever shorter intervals, new technologies are being developed
               that are opening up more and more areas of application. This regards,
               for example, Virtual Reality (VR) and Augmented Reality (AR) devices.
               In addition to the private sector, the public and education sectors,
               which already make intensive use of these devices, benefit from
               these technologies. However, especially in the field of historical
               education, there are not many frameworks for generating immersive
               virtual environments that can be used flexibly enough. This chapter
               addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile
               framework for the creation and use of virtual environments that
               serve to model historical processes in historical education. The
               paper describes the building blocks of VAnnotatoR and describes
               applications in historical education.},
  isbn      = {978-0-367-43211-9},
  url       = {https://www.routledge.com/New-Perspectives-on-Virtual-and-Augmented-Reality-Finding-New-Ways-to-Teach/Daniela/p/book/9780367432119}
}

BibTeX

@inbook{Zlatkin-Troitschanskaia:et:al:2019,
  author    = {Zlatkin-Troitschanskaia, Olga and Bisang, Walter and Mehler, Alexander
               and Banerjee, Mita and Roeper, Jochen},
  editor    = {Zlatkin-Troitschanskaia, Olga},
  title     = {Positive Learning in the Internet Age: Developments and Perspectives
               in the PLATO Program},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  year      = {2019},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {1--5},
  abstract  = {The Internet has become the main informational entity, i.e., a
               public source of information. The Internet offers many new benefits
               and opportunities for human learning, teaching, and research.
               However, by providing a vast amount of information from innumerable
               sources, it also enables the manipulation of information; there
               are countless examples of disseminated misinformation and false
               data in mass and social media. Much of the information presented
               online is conflicting, preselected, or algorithmically obscure,
               often colliding with fundamental humanistic values and posing
               moral or ethical problems.},
  isbn      = {978-3-030-26578-6},
  doi       = {10.1007/978-3-030-26578-6_1},
  url       = {https://doi.org/10.1007/978-3-030-26578-6_1}
}

BibTeX

@inbook{Mehler:Ramesh:2019,
  author    = {Mehler, Alexander and Ramesh, Visvanathan},
  editor    = {Zlatkin-Troitschanskaia, Olga},
  title     = {{TextInContext}: On the Way to a Framework for Measuring the Context-Sensitive
               Complexity of Educationally Relevant Texts---A Combined Cognitive
               and Computational Linguistic Approach},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  year      = {2019},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {167--195},
  abstract  = {We develop a framework for modeling the context sensitivity of
               text interpretation. As a point of reference, we focus on the
               complexity of educational texts. To open up a broader basis for
               representing phenomena of context sensitivity, we integrate a
               learning theory (i.e., the Cognitive Load Theory) with a theory
               of discourse comprehension (i.e., the Construction Integration
               Model) and a theory of cognitive semantics (i.e., the theory of
               Conceptual Spaces). The aim is to construct measures that view
               text complexity as a relational attribute by analogy to the relational
               concept of meaning in situation semantics. To this end, we reconstruct
               the situation semantic notion of relational meaning from the perspective
               of a computationally informed cognitive semantics. The aim is
               to prepare the development of measurements for predicting learning
               outcomes in the form of positive or negative learning. This prediction
               ideally depends on the underlying learning material, the learner's
               situational context, and knowledge retrieved from his or her long-term
               memory, which he or she uses to arrive at coherent mental representations
               of the underlying texts. Finally, our model refers to machine
               learning as a tool for modeling such memory content. In this way,
               the chapter integrates approaches from different disciplines (linguistic
               semantics, computational linguistics, cognitive science, and data
               science).},
  isbn      = {978-3-030-26578-6},
  doi       = {10.1007/978-3-030-26578-6_14},
  url       = {https://doi.org/10.1007/978-3-030-26578-6_14}
}

BibTeX

@inbook{Hoenen:2018,
  author    = {Hoenen, Armin},
  title     = {Recurrence Analysis Function, a Dynamic Heatmap for the Visualization
               of Verse Text and Beyond},
  booktitle = {Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools},
  publisher = {Heidelberg University Press},
  abstract  = {The Recurrence Analysis Function (ReAF) is a cross-linguistic
               visualization tool for (historical) verse text, especially handwritten
               epics. It can also provide a general visualization of various
               aspects of prose text. It aims to enable intuitive understanding
               through explorative data analysis of historical, especially bardic-oral
               texts.1 The assumption behind this is that bardic/born-oral and
               non-bardic/born-written texts differ drastically in the way they
               employ repetition. The ReAF in its first implementation, as presented
               here, is a language-independent tool that permits the visual exploration
               of such structures. Firstly, general aspects and formal characteristics
               of oral verse text are characterized, before the main technical
               details and some additional applications of the ReAF are explained
               and illustrated.},
  year      = {2018},
  editors   = {Bubenhofer, Noah and Kupietz, Marc},
  place     = {Heidelberg},
  url       = {https://heiup.uni-heidelberg.de/reader/download/345/345-69-80909-2-10-20180411.pdf}
}

BibTeX

@inbook{Mehler:Stegbauer:Frank-Job:2018,
  author    = {Alexander Mehler and Christian Stegbauer and Barbara Frank-Job},
  editor    = {Christian Stegbauer and Boris Holzer},
  title     = {{Ferdinand de Saussure. 1916. Cours de linguistique générale.
               Payot, Lausanne/Paris}},
  publisher = {Springer VS},
  address   = {Wiesbaden},
  booktitle = {Schlüsselwerke der Netzwerkforschung},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2017/11/Saussure2.pdf},
  year      = {2018}
}

BibTeX

@inbook{Mehler:et:al:2018,
  abstract  = {This chapter develops a computational linguistic model for analyzing
               and comparing multilingual data as well as its application to
               a large body of standardized assessment data from higher education.
               The approach employs both an automatic and a manual annotation
               of the data on several linguistic layers (including parts of speech,
               text structure and content). Quantitative features of the textual
               data are explored that are related to both the students' (domain-specific
               knowledge) test results and their level of academic experience.
               The respective analysis involves statistics of distance correlation,
               text categorization with respect to text types (questions and
               response options) as well as languages (English and German), and
               network analysis to assess dependencies between features. The
               correlation between correct test results of students and linguistic
               features of the verbal presentations of tests indicate to what
               extent language influences higher education test performance.
               It has also been found that this influence relates to specialized
               language. Thus, this integrative modeling approach contributes
               a test basis for a large-scale analysis of learning data and points
               to a number of subsequent, more detailed research questions.},
  address   = {Wiesbaden},
  author    = {Mehler, Alexander and Zlatkin-Troitschanskaia, Olga and Hemati, Wahed
               and Molerov, Dimitri and L{\"u}cking, Andy and Schmidt, Susanne},
  booktitle = {Positive Learning in the Age of Information: A Blessing or a Curse?},
  doi       = {10.1007/978-3-658-19567-0_10},
  editor    = {Zlatkin-Troitschanskaia, Olga and Wittum, Gabriel and Dengel, Andreas},
  isbn      = {978-3-658-19567-0},
  pages     = {145--193},
  publisher = {Springer Fachmedien Wiesbaden},
  title     = {Integrating Computational Linguistic Analysis of Multilingual
               Learning Data and Educational Measurement Approaches to Explore
               Learning in Higher Education},
  url       = {https://doi.org/10.1007/978-3-658-19567-0_10},
  year      = {2018}
}

BibTeX

@inbook{Sutter2010,
  author    = {Sutter, Tilmann and Mehler, Alexander},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  title     = {Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplin{\"a}ren
               Medienwissenschaft},
  pages     = {7--16},
  publisher = {VS Verlag f{\"u}r Sozialwissenschaften},
  address   = {Wiesbaden},
  abstract  = {Die Herausforderung, die der Wandel von Kommunikationsmedien f{\"u}r
               die Medienwissenschaft darstellt, resultiert nicht nur aus der
               ungeheuren Beschleunigung des Medienwandels. Die Herausforderung
               stellt sich auch mit der Frage, welches die neuen Formen und Strukturen
               sind, die aus dem Wandel der Medien hervorgehen. R{\"u}ckt man
               diese Frage in den Fokus der {\"U}berlegungen, kommen erstens
               Entwicklungen im Wechsel von Massenmedien zu neuen, „interaktiven``
               Medien in den Blick. Dies betrifft den Wandel von den alten Medien
               in Form von Einwegkommunikation zu den neuen Medien in Form von
               Netzkommunikation. Dieser Wandel wurde in zahlreichen Analysen
               als eine Revolution beschrieben: Im Unterschied zur einseitigen,
               r{\"u}ckkopplungsarmen Kommunikationsform der Massenmedien sollen
               neue, computergest{\"u}tzte Formen der Medienkommunikation „interaktiv``
               sein, d.h. gesteigerte R{\"u}ckkopplungs- und Eingriffsm{\"o}glichkeiten
               f{\"u}r die Adressaten und Nutzer bieten. Sozialwissenschaftlich
               bedeutsam ist dabei die Einsch{\"a}tzung der Qualit{\"a}t und
               des Umfangs dieser neuen M{\"o}glichkeiten und Leistungen. Denn
               bislang bedeutete Medienwandel im Kern eine zunehmende Ausdifferenzierung
               alter und neuer Medien mit je spezifischen Leistungen, d.h. neue
               Medien ersetzen die {\"a}lteren nicht, sondern sie erg{\"a}nzen
               und erweitern sie. Allerdings wird im Zuge des aktuellen Medienwandels
               immer deutlicher, dass die neuen Medien durchaus imstande sind,
               die Leistungen massenmedialer Verbreitung von Kommunikation zu
               {\"u}bernehmen. Stehen wir also, wie das schon seit l{\"a}ngerem
               k{\"u}hn vorhergesagt wird, vor der Etablierung eines Universalmediums,
               das in der Lage ist, die Formen und Funktionen anderer Medien
               zu {\"u}bernehmen?},
  booktitle = {Medienwandel als Wandel von Interaktionsformen},
  doi       = {10.1007/978-3-531-92292-8_1},
  isbn      = {978-3-531-92292-8},
  url       = {https://doi.org/10.1007/978-3-531-92292-8_1},
  year      = {2010}
}

In Collections

Andy Lücking. 2025. Referential Transparency Theory. Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online.

BibTeX

@incollection{Luecking:2025-wsk-rtt,
  booktitle = {Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online},
  url       = {https://www.degruyterbrill.com/database/WSK/entry/wsk__38780752/html},
  editor    = {Schierholz, Stefan J. and Giacomini, Laura},
  doi       = {10.1515/wsk},
  title     = {Referential Transparency Theory},
  author    = {Lücking, Andy},
  keywords  = {own,bookchapter},
  year      = {2025},
  publisher = {De Gruyter},
  address   = {Berlin and Boston}
  keywords  = {gemdis,neglab}
}

Andy Lücking. 2025. Deixis. Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online.

BibTeX

@incollection{Luecking:2025-wsk-deixis,
  booktitle = {Wörterbücher zur Sprach- und Kommunikationswissenschaft (WSK) Online},
  url       = {https://www.degruyterbrill.com/database/WSK/entry/wsk_idf6497d06-df52-478d-a30f-a3a99151d81e/html},
  editor    = {Schierholz, Stefan J. and Giacomini, Laura},
  year      = {2025},
  publisher = {De Gruyter},
  address   = {Berlin, Boston},
  doi       = {10.1515/wsk},
  title     = {Deixis},
  author    = {Lücking, Andy},
  keywords  = {gemdis}
}

Andy Lücking, Jonathan Ginzburg and Robin Cooper. 2021. Grammar in dialogue. Head Driven Phrase Structure Grammar: The handbook, 1155–1199.

BibTeX

@incollection{Luecking:Ginzburg:Cooper:2021,
  author    = {L\"{u}cking, Andy and Ginzburg, Jonathan and Cooper, Robin},
  title     = {Grammar in dialogue},
  chapter   = {26},
  pages     = {1155-1199},
  url       = {https://langsci-press.org/catalog/book/259},
  editor    = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
               and Koenig, Jean-Pierre},
  booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
  year      = {2021},
  series    = {Empirically Oriented Theoretical Morphology and
                  Syntax},
  number    = {9},
  address   = {Berlin},
  publisher = {Language Science Press},
  doi       = {10.5281/zenodo.5543318}
}

Andy Lücking. 2021. Gesture. Head Driven Phrase Structure Grammar: The handbook, 1201–1250.

BibTeX

@incollection{Luecking:2021,
  author    = {L\"{u}cking, Andy},
  title     = {Gesture},
  pages     = {1201-1250},
  chapter   = {27},
  url       = {https://langsci-press.org/catalog/book/259},
  editor    = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
               and Koenig, Jean-Pierre},
  booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
  year      = {2021},
  series    = {Empirically Oriented Theoretical Morphology and
                  Syntax},
  number    = {9},
  address   = {Berlin},
  publisher = {Language Science Press},
  doi       = {10.5281/zenodo.5543318}
}

BibTeX

@incollection{Henlein:et:al:2021,
  author    = {Alexander Henlein and Giuseppe Abrami and Attila Kett and Christian Spiekermann
               and Alexander Mehler},
  title     = {Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine},
  editor    = {Linda Daniela and Anna Visvizin},
  booktitle = {Remote Learning in Times of Pandemic - Issues, Implications and Best Practice},
  publisher = {Routledge},
  address   = {Thames, Oxfordshire, England, UK},
  year      = {2021},
  chapter   = {3}
}

Andy Lücking. 2019. Dialogue semantics: From cognitive structures to positive and negative learning. Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 197–205.

BibTeX

@incollection{Luecking:2019:a,
  author    = {L\"{u}cking, Andy},
  title     = {Dialogue semantics: {From} cognitive structures to positive and
               negative learning},
  year      = {2019},
  pages     = {197-205},
  publisher = {Springer Nature Switzerland AG},
  address   = {Cham, Switzerland},
  editor    = {Zlatkin-Troitschankskaia, Olga},
  booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
  doi       = {10.1007/978-3-030-26578-6},
  url       = {https://link.springer.com/chapter/10.1007/978-3-030-26578-6_15}
}

Andy Lücking. 2018. Witness-loaded and Witness-free Demonstratives. Atypical Demonstratives.

BibTeX

@incollection{Luecking:2018:a,
  author    = {Andy L\"{u}cking},
  title     = {Witness-loaded and Witness-free Demonstratives},
  booktitle = {Atypical Demonstratives},
  publisher = {De Gruyter},
  year      = {2018},
  editor    = {Marco Coniglio and Andrew Murphy and Eva Schlachter and Tonjes Veenstra},
  isbn      = {978-3-11-056029-9},
  url       = {https://www.degruyter.com/view/product/495228},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2020/05/Luecking-witness-loading-rg.pdf}
}

BibTeX

@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
  author    = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
  title     = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
               interaktive evolutionäre Verarbeitung natürlicher Sprache}},
  booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
               der germanistischen Sprachwissenschaft},
  publisher = {De Gruyter},
  editor    = {Henning Lobin and Roman Schneider and Andreas Witt},
  volume    = {6},
  address   = {Berlin},
  year      = {2018}
}

Alexander Mehler, Wahed Hemati, Tolga Uslu and Andy Lücking. 2018. A Multidimensional Model of Syntactic Dependency Trees for Authorship Attribution. Quantitative analysis of dependency structures.

BibTeX

@incollection{Mehler:Hemati:Uslu:Luecking:2018,
  author    = {Alexander Mehler and Wahed Hemati and Tolga Uslu and Andy Lücking},
  title     = {A Multidimensional Model of Syntactic Dependency Trees for Authorship
               Attribution},
  booktitle = {Quantitative analysis of dependency structures},
  publisher = {De Gruyter},
  editor    = {Jingyang Jiang and Haitao Liu},
  address   = {Berlin/New York},
  abstract  = {Abstract: In this chapter we introduce a multidimensional model
               of syntactic dependency trees. Our ultimate goal is to generate
               fingerprints of such trees to predict the author of the underlying
               sentences. The chapter makes a first attempt to create such fingerprints
               for sentence categorization via the detour of text categorization.
               We show that at text level, aggregated dependency structures actually
               provide information about authorship. At the same time, we show
               that this does not hold for topic detection. We evaluate our model
               using a quarter of a million sentences collected in two corpora:
               the first is sampled from literary texts, the second from Wikipedia
               articles. As a second finding of our approach, we show that quantitative
               models of dependency structure do not yet allow for detecting
               syntactic alignment in written communication. We conclude that
               this is mainly due to effects of lexical alignment on syntactic
               alignment.},
  keywords  = {Dependency structure, Authorship attribution, Text
                   categorization, Syntactic Alignment},
  year      = {2018}
}

BibTeX

@incollection{Hoenen:2017,
  author    = {Hoenen, Armin},
  title     = {{Beyond the tree – a theoretical model of contamination and a
               software to generate multilingual stemmata}},
  booktitle = {{Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome}},
  publisher = {AIUCD},
  url       = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf},
  year      = {2017}
}

BibTeX

@incollection{Mehler:Gleim:2015:a,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {Linguistic Networks -- An Online Platform for Deriving Collocation
               Networks from Natural Language Texts},
  booktitle = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
               and Frank-Job, Barbara},
  series    = {Understanding Complex Systems},
  year      = {2015}
}

BibTeX

@incollection{Mehler:Brueck:Gleim:Geelhaar:2015,
  author    = {Mehler, Alexander and vor der Brück, Tim and Gleim, Rüdiger and Geelhaar, Tim},
  title     = {Towards a Network Model of the Coreness of Texts: An Experiment
               in Classifying Latin Texts using the TTLab Latin Tagger},
  booktitle = {Text Mining: From Ontology Learning to Automated text Processing Applications},
  publisher = {Springer},
  editor    = {Chris Biemann and Alexander Mehler},
  series    = {Theory and Applications of Natural Language Processing},
  pages     = {87-112},
  address   = {Berlin/New York},
  abstract  = {The analysis of longitudinal corpora of historical texts requires
               the integrated development of tools for automatically preprocessing
               these texts and for building representation models of their genre-
               and register-related dynamics. In this chapter we present such
               a joint endeavor that ranges from resource formation via preprocessing
               to network-based text representation and classification. We start
               with presenting the so-called TTLab Latin Tagger (TLT) that preprocesses
               texts of classical and medieval Latin. Its lexical resource in
               the form of the Frankfurt Latin Lexicon (FLL) is also briefly
               introduced. As a first test case for showing the expressiveness
               of these resources, we perform a tripartite classification task
               of authorship attribution, genre detection and a combination thereof.
               To this end, we introduce a novel text representation model that
               explores the core structure (the so-called coreness) of lexical
               network representations of texts. Our experiment shows the expressiveness
               of this representation format and mediately of our Latin preprocessor.},
  website   = {http://link.springer.com/chapter/10.1007/978-3-319-12655-5_5},
  year      = {2015}
}

BibTeX

@incollection{Abrami:Mehler:Pravida:2015:b,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar},
  title     = {Fusing Text and Image Data with the Help of the OWLnotator},
  booktitle = {Human Interface and the Management of Information. Information
               and Knowledge Design},
  publisher = {Springer International Publishing},
  editor    = {Yamamoto, Sakae},
  volume    = {9172},
  series    = {Lecture Notes in Computer Science},
  pages     = {261-272},
  doi       = {10.1007/978-3-319-20612-7_25},
  isbn      = {978-3-319-20611-0},
  language  = {English},
  website   = {http://dx.doi.org/10.1007/978-3-319-20612-7_25},
  year      = {2015}
}

Armin Hoenen. 2014. Stemmatology, an interdisciplinary endeavour. Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities.

BibTeX

@incollection{Hoenen:2014plz,
  author    = {Hoenen, Armin},
  title     = {{Stemmatology, an interdisciplinary endeavour}},
  booktitle = {{Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities}},
  publisher = {DHd},
  url       = {http://dhd-wp.hab.de/files/book_of_abstracts.pdf},
  year      = {2014}
}

Alexander Mehler, Tim vor der Brück and Andy Lücking. 2014. Comparing Hand Gesture Vocabularies for HCI. Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece.

BibTeX

@incollection{Mehler:vor:der:Brueck:Luecking:2014,
  author    = {Mehler, Alexander and vor der Brück, Tim and Lücking, Andy},
  title     = {Comparing Hand Gesture Vocabularies for HCI},
  booktitle = {Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece},
  publisher = {Springer},
  address   = {Berlin/New York},
  abstract  = {HCI systems are often equipped with gestural interfaces drawing
               on a predefined set of admitted gestures. We provide an assessment
               of the fitness of such gesture vocabularies in terms of their
               learnability and naturalness. This is done by example of rivaling
               gesture vocabularies of the museum information system WikiNect.
               In this way, we do not only provide a procedure for evaluating
               gesture vocabularies, but additionally contribute to design criteria
               to be followed by the gestures.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Comparing-Gesture-Vocabularies-1_1.pdf},
  website   = {{http://link.springer.com/chapter/10.1007/978-3-319-07230-2_8#page-1}},
  year      = {2014}
}

Ineta Sejane and Steffen Eger. 2013. Semantic typologies by means of network analysis of bilingual dictionaries. Approaches to Measuring Linguistic Differences, 447–474.

BibTeX

@incollection{Sejane:Eger:2013,
  author    = {Sejane, Ineta and Eger, Steffen},
  title     = {Semantic typologies by means of network analysis of bilingual dictionaries},
  booktitle = {Approaches to Measuring Linguistic Differences},
  publisher = {De Gruyter},
  editor    = {Borin, Lars and Saxena, Anju},
  pages     = {447-474},
  bibtexkey = {eger-sejane_network-typologies2013},
  doi       = {10.1515/9783110305258.447},
  inlg      = {English [eng]},
  src       = {degruyter},
  srctrickle = {degruyter#/books/9783110305258/9783110305258.447/9783110305258.447.xml},
  url       = {http://www.degruyter.com/view/books/9783110305258/9783110305258.447/9783110305258.447.xml},
  year      = {2013}
}

BibTeX

@incollection{Mehler:Stegbauer:Gleim:2013,
  author    = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
  title     = {Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation
               am Beispiel des GuttenPlag Wiki: eine Vorstudie},
  booktitle = {Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden
               und empirische Untersuchungen am Beispiel des WWW},
  publisher = {VS Verlag},
  editor    = {Frank-Job, Barbara and Mehler, Alexander and Sutter, Tilman},
  address   = {Wiesbaden},
  year      = {2013}
}

Tim vor der Brück. 2012. Hyponym Extraction Employing a Weighted Graph Kernel. Statistical and Machine Learning Approaches for Network Analysis.

BibTeX

@incollection{vor:der:Brueck:2012:b,
  author    = {vor der Brück, Tim},
  title     = {Hyponym Extraction Employing a Weighted Graph Kernel},
  booktitle = {Statistical and Machine Learning Approaches for Network Analysis},
  publisher = {Wiley},
  editor    = {Matthias Dehmer and Subhash C. Basak},
  address   = {Hoboken, New Jersey},
  year      = {2012}
}

Alexander Mehler, Laurent Romary and Dafydd Gibbon. 2012. Introduction: Framing Technical Communication. Handbook of Technical Communication, 8:1–26.

BibTeX

@incollection{Mehler:Romary:Gibbon:2012,
  author    = {Mehler, Alexander and Romary, Laurent and Gibbon, Dafydd},
  title     = {Introduction: Framing Technical Communication},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  pages     = {1-26},
  address   = {Berlin and Boston},
  year      = {2012}
}

BibTeX

@incollection{Mehler:Schwandt:Gleim:Ernst:2012,
  author    = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Ernst, Alexandra},
  title     = {Inducing Linguistic Networks from Historical Corpora: Towards
               a New Method in Historical Semantics},
  booktitle = {Proceedings of the Conference on New Methods in Historical Corpora},
  publisher = {Narr},
  editor    = {Paul Bennett and Martin Durrell and Silke Scheible and Richard J. Whitt},
  volume    = {3},
  series    = {Corpus linguistics and Interdisciplinary perspectives
                   on language (CLIP)},
  pages     = {257--274},
  address   = {Tübingen},
  year      = {2012}
}

BibTeX

@incollection{Luecking:Ptock:Bergmann:2012,
  author    = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
  title     = {Assessing Agreement on Segmentations by Means of Staccato, the
               Segmentation Agreement Calculator according to Thomann},
  booktitle = {Gesture and Sign Language in Human-Computer Interaction and Embodied
               Communication},
  publisher = {Springer},
  editor    = {Eleni Efthimiou and Georgios Kouroupetroglou and Stavroula-Evita Fotina},
  volume    = {7206},
  series    = {Lecture Notes in Artificial Intelligence},
  pages     = {129-138},
  address   = {Berlin and Heidelberg},
  abstract  = {Staccato, the Segmentation Agreement Calculator According to Thomann
               , is a software tool for assessing the degree of agreement of
               multiple segmentations of some time-related data (e.g., gesture
               phases or sign language constituents). The software implements
               an assessment procedure developed by Bruno Thomann and will be
               made publicly available. The article discusses the rationale of
               the agreement assessment procedure and points at future extensions
               of Staccato.},
  booksubtitle = {9th International Gesture Workshop, GW 2011, Athens,
                   Greece, May 2011, Revised Selected Papers},
  website   = {http://link.springer.com/chapter/10.1007/978-3-642-34182-3_12},
  year      = {2012}
}

BibTeX

@incollection{Luecking:Pfeiffer:2012,
  author    = {Lücking, Andy and Pfeiffer, Thies},
  title     = {Framing Multimodal Technical Communication. With Focal Points
               in Speech-Gesture-Integration and Gaze Recognition},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  chapter   = {18},
  pages     = {591-644},
  website   = {http://www.degruyter.com/view/books/9783110224948/9783110224948.591/9783110224948.591.xml},
  year      = {2012}
}

Petra Kubina, Olga Abramov and Andy Lücking. 2012. Barrier-free Communication. Handbook of Technical Communication, 8:645–706.

BibTeX

@incollection{Kubina:Abramov:Luecking:2012,
  author    = {Kubina, Petra and Abramov, Olga and Lücking, Andy},
  title     = {Barrier-free Communication},
  booktitle = {Handbook of Technical Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Alexander Mehler and Laurent Romary},
  volume    = {8},
  series    = {Handbooks of Applied Linguistics},
  chapter   = {19},
  pages     = {645-706},
  address   = {Berlin and Boston},
  editora   = {Dafydd Gibbon},
  editoratype = {collaborator},
  website   = {http://www.degruyter.com/view/books/9783110224948/9783110224948.645/9783110224948.645.xml},
  year      = {2012}
}

BibTeX

@incollection{Mehler:2011:c,
  author    = {Mehler, Alexander},
  title     = {Social Ontologies as Generalized Nearly Acyclic Directed Graphs:
               A Quantitative Graph Model of Social Ontologies by Example of
               Wikipedia},
  booktitle = {Towards an Information Theory of Complex Networks: Statistical
               Methods and Applications},
  publisher = {Birkh{\"a}user},
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
  pages     = {259-319},
  address   = {Boston/Basel},
  year      = {2011}
}

Peter Geibel, Alexander Mehler and Kai-Uwe Kühnberger. 2011. Learning Methods for Graph Models of Document Structure. Modeling, Learning and Processing of Text Technological Data Structures.

BibTeX

@incollection{Geibel:Mehler:Kuehnberger:2011:a,
  author    = {Geibel, Peter and Mehler, Alexander and Kühnberger, Kai-Uwe},
  title     = {Learning Methods for Graph Models of Document Structure},
  booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  website   = {http://www.springerlink.com/content/p095331472h76v56/},
  year      = {2011}
}

BibTeX

@incollection{Mehler:Waltinger:2011:a,
  author    = {Mehler, Alexander and Waltinger, Ulli},
  title     = {Integrating Content and Structure Learning: A Model of Hypertext
               Zoning and Sounding},
  booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
               and Storrer, Angelika and Witt, Andreas},
  series    = {Studies in Computational Intelligence},
  address   = {Berlin/New York},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-642-22613-7_15},
  year      = {2011}
}

BibTeX

@incollection{Waltinger:Mehler:Loesch:Horstmann:2011,
  author    = {Waltinger, Ulli and Mehler, Alexander and Lösch, Mathias and Horstmann, Wolfram},
  title     = {Hierarchical Classification of OAI Metadata Using the DDC Taxonomy},
  booktitle = {Advanced Language Technologies for Digital Libraries (ALT4DL)},
  publisher = {Springer},
  editor    = {Raffaella Bernardi and Sally Chambers and Bjoern Gottfried and Frederique Segond
               and Ilya Zaihrayeu},
  series    = {LNCS},
  pages     = {29-40},
  address   = {Berlin},
  abstract  = {In the area of digital library services, the access to subject-specific
               metadata of scholarly publications is of utmost interest. One
               of the most prevalent approaches for metadata exchange is the
               XML-based Open Archive Initiative (OAI) Protocol for Metadata
               Harvesting (OAI-PMH). However, due to its loose requirements regarding
               metadata content there is no strict standard for consistent subject
               indexing specified, which is furthermore needed in the digital
               library domain. This contribution addresses the problem of automatic
               enhancement of OAI metadata by means of the most widely used universal
               classification schemes in libraries—the Dewey Decimal Classification
               (DDC). To be more specific, we automatically classify scientific
               documents according to the DDC taxonomy within three levels using
               a machine learning-based classifier that relies solely on OAI
               metadata records as the document representation. The results show
               an asymmetric distribution of documents across the hierarchical
               structure of the DDC taxonomy and issues of data sparseness. However,
               the performance of the classifier shows promising results on all
               three levels of the DDC.},
  website   = {http://www.springerlink.com/content/x20257512g818377/},
  year      = {2011}
}

Alexander Mehler. 2010. Minimum Spanning Markovian Trees: Introducing Context-Sensitivity into the Generation of Spanning Trees. Structural Analysis of Complex Networks, 381–401.

BibTeX

@incollection{Mehler:2010:a,
  author    = {Mehler, Alexander},
  title     = {Minimum Spanning Markovian Trees: Introducing Context-Sensitivity
               into the Generation of Spanning Trees},
  booktitle = {Structural Analysis of Complex Networks},
  publisher = {Birkh{\"a}user Publishing},
  editor    = {Dehmer, Matthias},
  pages     = {381-401},
  address   = {Basel},
  abstract  = {This chapter introduces a novel class of graphs: Minimum Spanning
               Markovian Trees (MSMTs). The idea behind MSMTs is to provide spanning
               trees that minimize the costs of edge traversals in a Markovian
               manner, that is, in terms of the path starting with the root of
               the tree and ending at the vertex under consideration. In a second
               part, the chapter generalizes this class of spanning trees in
               order to allow for damped Markovian effects in the course of spanning.
               These two effects, (1) the sensitivity to the contexts generated
               by consecutive edges and (2) the decreasing impact of more antecedent
               (or 'weakly remembered') vertices, are well known in cognitive
               modeling [6, 10, 21, 23]. In this sense, the chapter can also
               be read as an effort to introduce a graph model to support the
               simulation of cognitive systems. Note that MSMTs are not to be
               confused with branching Markov chains or Markov trees [20] as
               we focus on generating spanning trees from given weighted undirected
               networks.},
  website   = {https://www.researchgate.net/publication/226700676_Minimum_Spanning_Markovian_Trees_Introducing_Context-Sensitivity_into_the_Generation_of_Spanning_Trees},
  year      = {2010}
}

BibTeX

@incollection{Santini:Mehler:Sharoff:2009,
  author    = {Santini, Marina and Mehler, Alexander and Sharoff, Serge},
  title     = {Riding the Rough Waves of Genre on the Web: Concepts and Research Questions},
  booktitle = {Genres on the Web: Computational Models and Empirical Studies},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
  pages     = {3-32},
  address   = {Berlin/New York},
  abstract  = {This chapter outlines the state of the art of empirical and computational
               webgenre research. First, it highlights why the concept of genre
               is profitable for a range of disciplines. At the same time, it
               lists a number of recent interpretations that can inform and influence
               present and future genre research. Last but not least, it breaks
               down a series of open issues that relate to the modelling of the
               concept of webgenre in empirical and computational studies.},
  year      = {2009}
}

Alexander Mehler. 2009. Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types. Linguistic Modeling of Information and Markup Languages. Contributions to Language Technology.

BibTeX

@incollection{Mehler:2009:b,
  author    = {Mehler, Alexander},
  title     = {Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types},
  booktitle = {Linguistic Modeling of Information and Markup Languages. Contributions
               to Language Technology},
  publisher = {Springer},
  editor    = {Witt, Andreas and Metzing, Dieter},
  series    = {Text, Speech and Language Technology},
  address   = {Dordrecht},
  abstract  = {In this chapter we develop a representation model of web document
               networks. Based on the notion of uncertain web document structures,
               the model is defined as a template which grasps nested manifestation
               levels of hypertext types. Further, we specify the model on the
               conceptual, formal and physical level and exemplify it by reconstructing
               competing web document models.},
  website   = {http://www.springerlink.com/content/t27782w8j2125112/},
  year      = {2009}
}

Alexander Mehler. 2009. Generalized Shortest Paths Trees: A Novel Graph Class Applied to Semiotic Networks. Analysis of Complex Networks: From Biology to Linguistics, 175–220.

BibTeX

@incollection{Mehler:2009:c,
  author    = {Mehler, Alexander},
  title     = {Generalized Shortest Paths Trees: A Novel Graph Class Applied
               to Semiotic Networks},
  booktitle = {Analysis of Complex Networks: From Biology to Linguistics},
  publisher = {Wiley-VCH},
  editor    = {Dehmer, Matthias and Emmert-Streib, Frank},
  pages     = {175-220},
  address   = {Weinheim},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2009_b.pdf},
  website   = {https://www.researchgate.net/publication/255666602_1_Generalised_Shortest_Paths_Trees_A_Novel_Graph_Class_Applied_to_Semiotic_Networks},
  year      = {2009}
}

Tim vor der Brück and Sven Hartrumpf. 2009. A Readability Checker Based on Deep Semantic Indicators. Human Language Technology. Challenges of the Information Society, 5603:232–244.

BibTeX

@incollection{vor:der:Brueck:Hartrumpf:2009,
  author    = {vor der Brück, Tim and Hartrumpf, Sven},
  title     = {A Readability Checker Based on Deep Semantic Indicators},
  booktitle = {Human Language Technology. Challenges of the Information Society},
  publisher = {Springer},
  editor    = {Zygmunt Vetulani and Hans Uszkoreit},
  volume    = {5603},
  series    = {Lecture Notes in Computer Science (LNCS)},
  pages     = {232--244},
  address   = {Berlin, Germany},
  abstract  = {One major reason that readability checkers are still far away
               from judging the understandability of texts consists in the fact
               that no semantic information is used. Syntactic, lexical, or morphological
               information can only give limited access for estimating the cognitive
               difficulties for a human being to comprehend a text. In this paper
               however, we present a readability checker which uses semantic
               information in addition. This information is represented as semantic
               networks and is derived by a deep syntactico-semantic analysis.
               We investigate in which situations a semantic readability indicator
               can lead to superior results in comparison with ordinary surface
               indicators like sentence length. Finally, we compute the weights
               of our semantic indicators in the readability function based on
               the user ratings collected in an online evaluation.},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-642-04235-5_20},
  year      = {2009}
}

Alexander Mehler. 2009. Artifizielle Interaktivität. Eine semiotische Betrachtung. Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen zum Web 2.0.

BibTeX

@incollection{Mehler:2009:d,
  author    = {Mehler, Alexander},
  title     = {Artifizielle Interaktivit{\"a}t. Eine semiotische Betrachtung},
  booktitle = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
               zum Web 2.0},
  publisher = {VS},
  editor    = {Sutter, Tilmann and Mehler, Alexander},
  address   = {Wiesbaden},
  year      = {2009}
}

Alexander Mehler, Barbara Job, Philippe Blanchard and Hans-Jürgen Eikmeyer. 2008. Sprachliche Netzwerke. Netzwerkanalyse und Netzwerktheorie, 413–427.

BibTeX

@incollection{Mehler:Job:Blanchard:Eikmeyer:2008,
  author    = {Mehler, Alexander and Job, Barbara and Blanchard, Philippe and Eikmeyer, Hans-Jürgen},
  title     = {Sprachliche Netzwerke},
  booktitle = {Netzwerkanalyse und Netzwerktheorie},
  publisher = {VS},
  editor    = {Stegbauer, Christian},
  pages     = {413-427},
  address   = {Wiesbaden},
  abstract  = {In diesem Kapitel beschreiben wir so genannte sprachliche Netzwerke.
               Dabei handelt es sich um Netzwerke sprachlicher Einheiten, die
               in Zusammenhang mit ihrer Einbettung in das Netzwerk jener Sprachgemeinschaft
               analysiert werden, welche diese Einheiten und deren Vernetzung
               hervorgebracht hat. Wir erörtern ein Dreistufenmodell zur Analyse
               solcher Netzwerke und exemplifizieren dieses Modell anhand mehrerer
               Spezialwikis. Ein Hauptaugenmerk des Kapitels liegt dabei auf
               einem Mehrebenennetzwerkmodell, und zwar in Abkehr von den unipartiten
               Graphmodellen der Theorie komplexer Netzwerke.},
  year      = {2008}
}

Alexander Mehler. 2008. Large Text Networks as an Object of Corpus Linguistic Studies. Corpus Linguistics. An International Handbook of the Science of Language and Society, 328–382.

BibTeX

@incollection{Mehler:2008:b,
  author    = {Mehler, Alexander},
  title     = {Large Text Networks as an Object of Corpus Linguistic Studies},
  booktitle = {Corpus Linguistics. An International Handbook of the Science of
               Language and Society},
  publisher = {De Gruyter},
  editor    = {Lüdeling, Anke and Kytö, Merja},
  pages     = {328–382},
  address   = {Berlin/New York},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_a.pdf},
  year      = {2008}
}

BibTeX

@incollection{Mehler:Sutter:2008,
  author    = {Mehler, Alexander and Sutter, Tilmann},
  title     = {Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen},
  booktitle = {Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs,
               Wikis, Podcasts und Communities aus interdisziplin{\"a}rer Sicht},
  publisher = {Herbert von Halem},
  editor    = {Zerfa{\ss}, Ansgar and Welker, Martin and Schmidt, Jan},
  pages     = {267-300},
  address   = {Köln},
  abstract  = {This article addresses challenges in maintaining and annotating
               image resources in the field of iconographic research. We focus
               on the task of bringing together generic and extensible techniques
               for resource and anno- tation management with the highly specific
               demands in this area of research. Special emphasis is put on the
               interrelation of images, image segements and textual contents.
               In addition, we describe the architecture, data model and user
               interface of the open annotation system used in the image database
               application that is a part of the eHumanities Desktop.},
  year      = {2008}
}

Alexander Mehler and Reinhard Köhler. 2007. Machine Learning in a Semiotic Perspective. Aspects of Automatic Text Analysis, 1–29.

BibTeX

@incollection{Mehler:Koehler:2007:b,
  author    = {Mehler, Alexander and Köhler, Reinhard},
  title     = {Machine Learning in a Semiotic Perspective},
  booktitle = {Aspects of Automatic Text Analysis},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  series    = {Studies in Fuzziness and Soft Computing},
  pages     = {1-29},
  address   = {Berlin/New York},
  abstract  = {Gegenstand des folgenden Aufsatzes ist der konnotative Aspekt
               der Bedeutungen von Texten. Den Ausgangspunkt der {\"U}berlegungen
               zur Konnotation des Textes bildet die Auffassung, wonach Wort-
               und Textbedeutungskonstitution Ergebnis eines zirkul{\"a}ren Prozesses
               sind, der für die Emergenz einer Hierarchie ineinander geschachtelter
               Spracheinheiten verantwortlich zeichnet. Der Proze{\ss} der Zeichenartikulation
               erfolgt entlang dieser Ebenen und erzeugt durch Verbindung von
               (konnotativer) Inhalts- und Ausdrucksseite auf Textebene das Textzeichen.
               Im Gegensatz zu einer strikten Interpretation des Fregeschen Kompositionalit{\"a}tsprinzips,
               derzufolge die Bedeutungen sprachlicher Einheiten als fixierte,
               kontextfreie Grö{\ss}en vorauszusetzen sind, behandelt der vorliegende
               Ansatz bereits die lexikalische Bedeutung als Grö{\ss}e, die in
               Abh{\"a}ngigkeit von ihrem Kontext variieren kann. Aus semiotischer
               Perspektive ist es vor allem der Gestaltcharakter, welcher die
               konnotative Textbedeutung einer Anwendung des FregePrinzips entzieht.
               Anders ausgedrückt: Die konnotative Bedeutung eines Textes ist
               keineswegs in eine Struktur 'atomarer' Repr{\"a}sentationen zerlegbar.
               Die hierarchische Organisation von Texten erweist sich insofern
               als komplex, als ihre Bedeutungen aus einem zirkul{\"a}ren Proze{\ss}
               resultieren, der best{\"a}tigend und/oder ver{\"a}ndernd auf die
               Bedeutungen der Textkonstituenten einwirkt. Diese Zirkularit{\"a}t
               bedingt, da{\ss} Texte nicht nur als Orte der Manifestation von
               Wortbedeutungsstrukturen anzusehen sind, sondern zugleich als
               Ausgangspunkte für die Modifikation und Emergenz solcher Strukturen
               dienen. Im folgenden wird unter Rekurs auf den Kopenhagener Strukturalismus
               ein Modell der konnotativen Bedeutung von Texten entwickelt, das
               sich unter anderem an dem glossematischen Begriff der Konstante
               orientiert. Die Formalisierung des Modells erfolgt mit Hilfe des
               Konzeptes der unscharfen Menge. Zu diesem Zweck werden die unscharfen
               Verwendungsregularit{\"a}ten von Wörtern auf der Basis eines zweistufigen
               Verfahrens analysiert, welches die syntagmatischen und paradigmatischen
               Regularit{\"a}ten des Wortgebrauches berücksichtigt. Die Rolle
               der Satzebene innerhalb des Prozesses der konnotativen Textbedeutungskonstitution
               wird angedeutet. Abschlie{\ss}end erfolgt eine Exemplifizierung
               des Algorithmus anhand der automatischen Analyse eines Textcorpus.},
  website   = {http://rd.springer.com/chapter/10.1007/978-3-540-37522-7_1},
  year      = {2007}
}

Tim vor der Brück and Sven Hartrumpf. October, 2007. A Semantically Oriented Readability Checker for German. Proceedings of the 3rd Language & Technology Conference, 270–274.

BibTeX

@incollection{vor:der:Brueck:Hartrumpf:2007,
  author    = {vor der Brück, Tim and Hartrumpf, Sven},
  title     = {A Semantically Oriented Readability Checker for German},
  booktitle = {Proceedings of the 3rd Language \& Technology Conference},
  publisher = {Wydawnictwo Poznańskie},
  editor    = {Zygmunt Vetulani},
  pages     = {270--274},
  address   = {Poznań, Poland},
  abstract  = {One major reason that readability checkers are still far away
               from judging the understandability of texts consists in the fact
               that no semantic information is used. Syntactic, lexical, or morphological
               information can only give limited access for estimating the cognitive
               difficulties for a human being to comprehend a text. In this paper
               however, we present a readability checker which uses semantic
               information in addition. This information is represented as semantic
               networks and is derived by a deep syntactico-semantic analysis.
               We investigate in which situations a semantic readability indicator
               can lead to superior results in comparison with ordinary surface
               indicators like sentence length. Finally, we compute the correlations
               and absolute errors for our semantic indicators related to user
               ratings collected in an online evaluation.},
  isbn      = {978-83-7177-407-2},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf07_online.pdf},
  url       = {http://pi7.fernuni-hagen.de/papers/brueck_hartrumpf07_online.pdf},
  year      = {2007}
}

Tim vor der Brück and Johannes Leveling. 2007. Parameter Learning for a Readability Checking Tool. Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML.

BibTeX

@incollection{vor:der:Brueck:Leveling:2007,
  author    = {vor der Brück, Tim and Leveling, Johannes},
  title     = {Parameter Learning for a Readability Checking Tool},
  booktitle = {Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML},
  publisher = {Gesellschaft für Informatik},
  editor    = {Alexander Hinneburg},
  address   = {Halle/Saale, Germany},
  abstract  = {This paper describes the application of machine learning methods
               to determine parameters for DeLite, a readability checking tool.
               DeLite pinpoints text segments that are difficult to understand
               and computes for a given text a global readability score, which
               is a weighted sum of normalized indicator values. Indicator values
               are numeric properties derived from linguistic units in the text,
               such as the distance between a verb and its complements or the
               number of possible antecedents for a pronoun. Indicators are normalized
               by means of a derivation of the Fermi function with two parameters.
               DeLite requires individual parameters for this normalization function
               and a weight for each indicator to compute the global readability
               score. Several experiments to determine these parameters were
               conducted, using different machine learning approaches. The training
               data consists of more than 300 user ratings of texts from the
               municipality domain. The weights for the indicators are learned
               using two approaches: i) robust regression with linear optimization
               and ii) an approximative iterative linear regression algorithm.
               For evaluation, the computed readability scores are compared to
               user ratings. The evaluation showed that iterative linear regression
               yields a smaller square error than robust regression although
               this method is only approximative. Both methods yield results
               outperforming a first manual setting, and for both methods, basically
               the same set of non-zero weights remain.},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.6079},
  year      = {2007}
}

Alexander Mehler. 2007. Aspectos Metodológicos da Semiótica Computacional. Computação, Cognição e Semiose, 145–157.

BibTeX

@incollection{Mehler:2004:2007,
  author    = {Mehler, Alexander},
  title     = {Aspectos Metodológicos da Semiótica Computacional},
  booktitle = {Computação, Cognição e Semiose},
  publisher = {EDUFBA},
  editor    = {Queiroz, João and Gudwin, Ricardo and Loula, Angelo},
  pages     = {145-157},
  address   = {Federal University of Bahia},
  year      = {2007}
}

Alexander Mehler. 2007. Compositionality in Quantitative Semantics. A Theoretical Perspective on Text Mining. Aspects of Automatic Text Analysis, 139–167.

BibTeX

@incollection{Mehler:2007:b,
  author    = {Mehler, Alexander},
  title     = {Compositionality in Quantitative Semantics. A Theoretical Perspective
               on Text Mining},
  booktitle = {Aspects of Automatic Text Analysis},
  publisher = {Springer},
  editor    = {Mehler, Alexander and Köhler, Reinhard},
  series    = {Studies in Fuzziness and Soft Computing},
  pages     = {139-167},
  address   = {Berlin/New York},
  abstract  = {This chapter introduces a variant of the principle of compositionality
               in quantitative text semantics as an alternative to the bag-of-features
               approach. The variant includes effects of context-sensitive interpretation
               as well as processes of meaning constitution and change in the
               sense of usage-based semantics. Its starting point is a combination
               of semantic space modeling and text structure analysis. The principle
               is implemented by means of a hierarchical constraint satisfaction
               process which utilizes the notion of hierarchical text structure
               superimposed by graph-inducing coherence relations. The major
               contribution of the chapter is a conceptualization and formalization
               of the principle of compositionality in terms of semantic spaces
               which tackles some well known deficits of existing approaches.
               In particular this relates to the missing linguistic interpretability
               of statistical meaning representations.},
  website   = {http://www.springerlink.com/content/x214w527g42x0116/},
  year      = {2007}
}

Alexander Mehler. 2006. A Network Perspective on Intertextuality. Exact Methods in the Study of Language and Text, 437–446.

BibTeX

@incollection{Mehler:2006:d,
  author    = {Mehler, Alexander},
  title     = {A Network Perspective on Intertextuality},
  booktitle = {Exact Methods in the Study of Language and Text},
  publisher = {De Gruyter},
  editor    = {Grzybek, Peter and Köhler, Reinhard},
  series    = {Quantitative Linguistics},
  pages     = {437-446},
  address   = {Berlin/New York},
  year      = {2006}
}

Alexander Mehler and Rüdiger Gleim. 2006. The Net for the Graphs – Towards Webgenre Representation for Corpus Linguistic Studies. WaCky! Working Papers on the Web as Corpus, 191–224.

BibTeX

@incollection{Mehler:Gleim:2006:b,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {The Net for the Graphs – Towards Webgenre Representation for Corpus
               Linguistic Studies},
  booktitle = {WaCky! Working Papers on the Web as Corpus},
  publisher = {Gedit},
  editor    = {Baroni, Marco and Bernardini, Silvia},
  pages     = {191-224},
  address   = {Bologna},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.510.4125},
  year      = {2006}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Ipke Wachsmuth. 2006. Deictic Object Reference in Task-oriented Dialogue. Situated Communication, 155–207.

BibTeX

@incollection{Kranstedt:et:al:2006:b,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Wachsmuth, Ipke},
  title     = {Deictic Object Reference in Task-oriented Dialogue},
  booktitle = {Situated Communication},
  publisher = {De Gruyter Mouton},
  editor    = {Gert Rickheit and Ipke Wachsmuth},
  pages     = {155--207},
  address   = {Berlin},
  abstract  = {This chapter presents an original approach towards a detailed
               understanding of the usage of pointing gestures accompanying referring
               expressions. This effort is undertaken in the context of human-machine
               interaction integrating empirical studies, theory of grammar and
               logics, and simulation techniques. In particular, we take steps
               to classify the role of pointing in deictic expressions and to
               model the focussed area of pointing gestures, the so-called pointing
               cone. This pointing cone serves as a central concept in a formal
               account of multi-modal integration at the linguistic speech-gesture
               interface as well as in a computational model of processing multi-modal
               deictic expressions.},
  keywords  = {own},
  website   = {http://pub.uni-bielefeld.de/publication/1894485},
  year      = {2006}
}

BibTeX

@incollection{Kranstedt:et:al:2006:a,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Wachsmuth, Ipke},
  title     = {Deixis: How to Determine Demonstrated Objects Using a Pointing Cone},
  booktitle = {Gesture in Human-Computer Interaction and Simulation},
  publisher = {Springer},
  editor    = {Sylvie Gibet and Nicolas Courty and Jean-Francois Kamp},
  pages     = {300--311},
  address   = {Berlin},
  abstract  = {We present a collaborative approach towards a detailed understanding
               of the usage of pointing gestures accompanying referring expressions.
               This effort is undertaken in the context of human-machine interaction
               integrating empirical studies, theory of grammar and logics, and
               simulation techniques. In particular, we attempt to measure the
               precision of the focussed area of a pointing gesture, the so-called
               pointing cone. The pointing cone serves as a central concept in
               a formal account of multi-modal integration at the linguistic
               speech-gesture interface as well as in a computational model of
               processing multi-modal deictic expressions.},
  anote     = {6th International Gesture Workshop, Berder Island,
                   France, 2005, Revised Selected Papers},
  keywords  = {own},
  website   = {http://www.springerlink.com/content/712036hp5v2q8408/},
  year      = {2006}
}

Alexander Mehler. 2006. Stratified Constraint Satisfaction Networks in Synergetic Multi-Agent Simulations of Language Evolution. Artificial Cognition Systems, 140–174.

BibTeX

@incollection{Mehler:2006:e,
  author    = {Mehler, Alexander},
  title     = {Stratified Constraint Satisfaction Networks in Synergetic Multi-Agent
               Simulations of Language Evolution},
  booktitle = {Artificial Cognition Systems},
  publisher = {Idea Group Inc.},
  editor    = {Loula, Angelo and Gudwin, Ricardo and Queiroz, João},
  pages     = {140-174},
  address   = {Hershey},
  abstract  = {Ehedem = Mehler:2005:e},
  year      = {2006}
}

BibTeX

@incollection{Mehler:2005:b,
  author    = {Mehler, Alexander},
  title     = {Eigenschaften der textuellen Einheiten und Systeme / Properties
               of Textual Units and Systems},
  booktitle = {Quantitative Linguistik. Ein internationales Handbuch / Quantitative
               Linguistics. An International Handbook},
  publisher = {De Gruyter},
  editor    = {Köhler, Reinhard and Altmann, Gabriel and Piotrowski, Raijmund G.},
  pages     = {325-348},
  address   = {Berlin/New York},
  year      = {2005}
}

Alexander Mehler. 2004. Textmining. Texttechnologie. Perspektiven und Anwendungen, 329–352.

BibTeX

@incollection{Mehler:2004:h,
  author    = {Mehler, Alexander},
  title     = {Textmining},
  booktitle = {Texttechnologie. Perspektiven und Anwendungen},
  publisher = {Stauffenburg},
  editor    = {Lobin, Henning and Lemnitzer, Lothar},
  pages     = {329-352},
  address   = {Tübingen},
  year      = {2004}
}

Alexander Mehler and Henning Lobin. 2004. Aspekte der texttechnologischen Modellierung. Automatische Textanalyse: Systeme und Methoden zur Annotation und Analyse natürlichsprachlicher Texte, 1–21.

BibTeX

@incollection{Mehler:Lobin:2004:b,
  author    = {Mehler, Alexander and Lobin, Henning},
  title     = {Aspekte der texttechnologischen Modellierung},
  booktitle = {Automatische Textanalyse: Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  editor    = {Mehler, Alexander and Lobin, Henning},
  pages     = {1-21},
  address   = {Wiesbaden},
  year      = {2004}
}

BibTeX

@incollection{Mehler:2003:d,
  author    = {Mehler, Alexander},
  title     = {Textmodellierung: Mehrstufige Modellierung generischer Bausteine
               der Text{\"a}hnlichkeitsmessung},
  booktitle = {Automatische Textanalyse: Systeme und Methoden zur Annotation
               und Analyse natürlichsprachlicher Texte},
  publisher = {Verlag für Sozialwissenschaften},
  editor    = {Mehler, Alexander and Lobin, Henning},
  pages     = {101-120},
  address   = {Wiesbaden},
  image     = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AutomatischeTextanalyse2.jpg},
  year      = {2004}
}

BibTeX

@incollection{Mehler:2003,
  author    = {Mehler, Alexander},
  title     = {Konnotative Textbedeutungen: zur Modellierung struktureller Aspekte
               der Bedeutungen von Texten},
  booktitle = {Korpuslinguistische Untersuchungen zur quantitativen und systemtheoretischen
               Linguistik},
  publisher = {Gardez! Verlag},
  editor    = {Köhler, Reinhard},
  pages     = {320-347},
  address   = {Sankt Augustin},
  pdf       = {http://ubt.opus.hbz-nrw.de/volltexte/2004/279/pdf/10_mehler.pdf},
  year      = {2003}
}

Alexander Mehler. 2002. Textbedeutungsrekonstruktion. Grundzüge einer Architektur zur Modellierung der Bedeutungen von Texten. Prozesse der Bedeutungskonstruktion, 445–486.

BibTeX

@incollection{Mehler:2002:b,
  author    = {Mehler, Alexander},
  title     = {Textbedeutungsrekonstruktion. Grundzüge einer Architektur zur
               Modellierung der Bedeutungen von Texten},
  booktitle = {Prozesse der Bedeutungskonstruktion},
  publisher = {Peter Lang},
  editor    = {Pohl, Inge},
  pages     = {445-486},
  address   = {Frankfurt a. M.},
  year      = {2002}
}

Alexander Mehler and Rodney Clarke. 2001. Systemic Functional Hypertexts (SFHT): Modeling Contexts in Hypertexts. Organizational Semiotics. Evolving a Science of Information Systems, 153–170.

BibTeX

@incollection{Mehler:Clarke:2001,
  author    = {Mehler, Alexander and Clarke, Rodney},
  title     = {Systemic Functional Hypertexts (SFHT): Modeling Contexts in Hypertexts},
  booktitle = {Organizational Semiotics. Evolving a Science of Information Systems},
  publisher = {Kluwer},
  editor    = {Liu, Kecheng and Clarke, Rodney J. and Andersen, Peter B. and Stamper, Ronald K.},
  pages     = {153-170},
  address   = {Boston},
  abstract  = {IFIP TC8 / WG8.1 Working Conference on Organizational Semiotics.
               July 23-25, 2001, Montreal, Canada},
  website   = {http://link.springer.com/chapter/10.1007/978-0-387-35611-2_10},
  year      = {2001}
}

In Proceedings

BibTeX

@inproceedings{Schrottenbacher:et:al:2026:a,
  author    = {Schrottenbacher, Patrick and Mehler, Alexander and Bernhardt, Vivienne
               and Rohe, Leon and Abrami, Giuseppe},
  title     = {ReEmote: Towards Emotion Representation in {VR} Through {Va.Si.Li}-Lab},
  booktitle = {Proceedings of XR Salento 2026},
  year      = {2026},
  publisher = {Springer International Publishing},
  keywords  = {VR, XR, affective computing, virtual humans, emotion detection, FACES},
  abstract  = {Human social interactions are inherently multimodal, shaped not
               only by what speakers convey but also by cues such as facial expressions,
               posture, and gestures. Together, these channels shape both participants'
               perceptions and behaviors, further reinforcing conversational
               feedback loops. This multimodal system extends to VR, where avatars
               serve as proxies for human interaction, making both visual and
               auditory fidelity essential for engaging. To properly utilize
               the emotional expression space that virtual environments allow,
               we introduce ReEmote. ReEmote extends the capabilities of Va.Si.Li-Lab,
               a collaborative, multi-user VR platform built on Ubiq. While Va.Si.Li-Lab
               supports user emotional expression through facial and hand tracking,
               ReEmote extends this by introducing schema-based emotion mappings
               that affect both avatars and their environments. This fosters
               immersive, emotionally aware environments that are beneficial
               for human and chatbot agent interactions, where human users and
               virtual agents share an emotional expression space. By enabling
               richer emotional dynamics, ReEmote opens up new ways of designing
               affective and engaging virtual experiences.In this paper, we describe
               the design choices behind ReEmote and present an evaluation of
               the graphical validity of the emotion representation introduced
               by ReEmote. Our results indicate that emotions can be validly
               represented through avatar facial expressions that users can quickly
               identify as Ekman's basic emotions.This opens up several possibilities
               for extending emotion-related text-to-speech (TTS) applications
               in Extended Reality (XR) with ReEmote. The paper also outlines
               use cases for XR-based TTS applications.},
  note      = {accepted}
}

BibTeX

@inproceedings{Richer:et:al:2026,
  title     = {TTLab at SemEval-2026 Task 10: Transformer-based Approaches for
               Psycholinguistic Conspiracy Detection in Social Media Discourse},
  author    = {Richer, Samuel and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

BibTeX

@inproceedings{Tratzsch:et:al2026,
  title     = {SemEval-2026 Task 11: Reducing Content Effects Using Layered Activation Steering},
  author    = {Tratzsch, Noah and Al-Raian, Asmaa and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

BibTeX

@inproceedings{Missaoui:et:al:2026,
  title     = {SemEval-2026 Task 3: Dimensional Aspect-Based Sentiment Analysis},
  author    = {Missaoui, Yahya and Kebede, Solomon and Marreddy, Mounika and Mehler, Alexander},
  booktitle = {Proceedings of the International Workshop on Semantic Evaluation (SemEval-2026)},
  year      = {2026},
  publisher = {Association for Computational Linguistics},
  note      = {accepted}
}

BibTeX

@inproceedings{weiss:et:al:2026,
  title     = {From Images to Topics: Evaluating Vision-Language Models for Topic
               Classification of Election Advertising},
  author    = {Weiss, Julia and Burger, Axel and Roßmann, Joss and Meurer, Jan Eric
               and Abusaleh, Ali},
  booktitle = {Proceedings of the 18th ACM Web Science Conference 2026},
  eventdate = {May, 2026},
  location  = {Braunschweig, Germany},
  year      = {2026},
  keywords  = {Multimodal Large Language Models, Political communication, Privacy-aware AI, new-data-spaces, circlet},
  note      = {accepted}
}

BibTeX

@inproceedings{Hammerla:Mehler:2026:a,
  title     = {{Gutenberg+}: A More Temporally Faithful Corpus for Diachronic {NLP}},
  author    = {Leon Hammerla and Alexander Mehler},
  booktitle = {Proceedings Workshop on Structured Linguistic Data and Evaluation
               (SLiDE 2026), co-located with the Language Resources and Evaluation
               Conference (LREC 2026)},
  address   = {Palma de Mallorca (Spain)},
  year      = {2026},
  keywords  = {neglab},
  note      = {accepted}
}

BibTeX

@inproceedings{Abusaleh:et:al:2026:sarf,
  title     = {TTLab at AraSentEval: SARF (صرف) Sentiment Analysis via Root-based
               Fusion for Multi-Dialectal Arabic},
  author    = {Abusaleh, Ali and Verma, Bhuvanesh and Mehler, Alexander},
  booktitle = {Proceedings of the 7th Workshop on Open-Source Arabic Corpora
               and Processing Tools (OSACT7), co-located with the Language Resources
               and Evaluation Conference (LREC 2026)},
  eventdate = {May, 2026},
  location  = {Palma, Mallorca, Spain},
  year      = {2026},
  keywords  = {NLP, Sentiment Analysis, Arabic analysis, new-data-spaces, circlet, satek},
  abstract  = {Arabic sentiment analysis is challenged by morphological complexity
               and lexical variation across Arabic dialects, compounded by subjectivity
               in how speakers and writers express sentiment. In this paper,
               we present our submission for the AraSentEval 2026 Shared Task
               on Arabic Dialect Sentiment Analysis. We propose SARF (صرف) a
               multi-view architectural framework that integrates surface-level
               context with stemmed and rooted morphological perspectives using
               a shared MARBERTv2 encoder. Our system employs a hybrid BERT-CNN-BiLSTM-Attention
               architecture to capture both local sentiment n-grams and global
               sequential dependencies. Experimental results show that while
               individual morphological normalization strategies (stemming or
               rooting) may degrade performance, their joint integration via
               cross-morphological attention provides robust features across
               diverse dialects. Our final system achieved a competitive macro-F1-score
               of 0.9263, ranking 2nd out of 15 participating teams.},
  note      = {accepted}
}

BibTeX

@inproceedings{Abusaleh:et:al:2026,
  title     = {Learning to Detect Cross-Modal Negation: An Analysis of Latent
               Representations and an Attention-Based Solution},
  author    = {Abusaleh, Ali and Hammerla, Leon and Mehler, Alexander},
  booktitle = {2026 8th International Conference on Natural Language Processing (ICNLP)},
  eventdate = {2026-03-20/2026-03-22},
  location  = {Xi'an,China},
  year      = {2026},
  keywords  = {Vision language model, Natural language processing, Cross-modal retrieval, negation detection, video analysis, Multimodal analysis, Political Communication, neglab, new-data-spaces, circlet},
  abstract  = {Detecting high-level semantic concepts like negation across modalities
               remains a challenge for current multimodal systems. We analyze
               this as a fundamental representation learning problem, providing
               the first evidence that negation does not form a linearly or non-linearly
               separable class in the latent spaces of standard vision-language
               models (VLMs). We demonstrate that pretrained embeddings primarily
               encode modality-specific features, lacking a generalizable negation
               signal. To overcome this, we propose a novel cross-modal attention
               architecture that explicitly models inter-modal dependencies,
               achieving performance gains of up to +7.03% F1 over unimodal baselines.
               Our analysis reveals a key asymmetry: while textual negation often
               appears independently, visual negation is semantically dependent
               on linguistic context, a finding validated through our statistical
               analysis of 3,222 political video-text pairs automatically annotated
               via Qwen2.5-VL. By combining this analysis with self-supervised
               video representations (JEPA2), we advance the modeling of temporal
               negation. This work provides new methods and insights for learning
               robust, semantically-aligned representations in multimodal systems.},
  note      = {accepted}
}

BibTeX

@inproceedings{Schaaf:et:al:2026,
  title     = {GhostWriter: Hidden AI-Generated Texts over Multiple Languages,
               Domains and Generators},
  author    = {Schaaf, Manuel and Bönisch, Kevin and Mehler, Alexander},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  month     = {May},
  year      = {2026},
  pages     = {10497--10516},
  keywords  = {Corpus, Natural Language Generation; Validation of LRs, AI-generated Text Detection, core, core_b05},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  doi       = {10.63317/57fd7juh5zek},
  abstract  = {The advent of Transformer-based Large Language Models (LLMs) has
               led to an unprecedented surge of AI-generated text (AIGT) across
               online platforms and academic domains. While these models exhibit
               near-human fluency and stylistic coherence, their widespread adoption
               has raised concerns about authorship integrity, research quality,
               and the recursive contamination of training corpora with synthetic
               data. These developments underscore the need for reliable AIGT
               detection methods and benchmark datasets, particularly for malicious
               or deceptive *ghostwriting* scenarios where AIGT is intentionally
               crafted to evade detection. To address this, we present **GhostWriter**,
               a large-scale, bilingual (German and English), multi-generator,
               and multi-domain dataset for AIGT detection. The dataset comprises
               human- and AI-authored texts produced under domain-specific *ghostwriting*
               conditions, including examples intentionally embedded within otherwise
               human-written texts to obscure their AI origin. With **GhostWriter**,
               we (i) aim to expand the resources available for German AIGT datasets,
               (ii) emphasize mixed or fused synthesizations—since most existing
               corpora are limited to the document level—and (iii) introduce
               specifically crafted malicious ghostwriting scenarios across multiple
               domains and generators.}
}

BibTeX

@inproceedings{Dahmann:et:al:2026,
  title     = {Towards the Generation and Application of Dynamic Web-Based Visualization
               of UIMA-based Annotations for Big-Data Corpora with the Help of
               Unified Dynamic Annotation Visualizer},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  year      = {2026},
  pages     = {6695--6705},
  author    = {Dahmann, Thiemo and Schneider, Julian and Stephan, Philipp and Abrami, Giuseppe
               and Mehler, Alexander},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  doi       = {10.63317/5ce2aaity4yz},
  keywords  = {NLP, UIMA, Annotations, dynamic visualization, uce},
  abstract  = {The automatic and manual annotation of unstructured corpora is
               a routine task in many scientific fields and is supported by a
               variety of existing software solutions. Despite this variety,
               few solutions currently support annotation visualization, especially
               for dynamic generation and interaction. To bridge this gap and
               visualize annotated corpora based on user-, project-, or corpus-specific
               aspects, we developed Unified Dynamic Annotation Visualizer (UDAV).
               UDAV is a web-based solution that implements features not supported
               by comparable tools, enabling a customizable and extensible toolbox
               for interacting with annotations and allowing integration into
               existing big-data frameworks. We exemplify UDAV through a range
               of visualizations and also provide an evaluation of corpus import
               and processing performance.},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2026/pdf/2026.lrec2026-1.533.pdf},
  video     = {https://www.youtube.com/watch?v=LFBiGlmEDog}
}

BibTeX

@inproceedings{Verma:Mehler:2026,
  title     = {Predicting Topic (Co-)Occurrence Using Topic Networks Built from
               the Project Gutenberg Corpus},
  booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation
               Conference (LREC 2026)},
  pages     = {860--869},
  address   = {Palma, Mallorca, Spain},
  publisher = {European Language Resources Association (ELRA)},
  editor    = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy
               and Krek, Simon and Toral, Antonio},
  year      = {2026},
  author    = {Verma, Bhuvanesh and Mehler, Alexander},
  doi       = {10.63317/58x3h7gjbpb4},
  keywords  = {Topic Evolution, Topic Network,Time-aware Networks, Temporal Autocorrelation, Project Gutenberg, satek},
  abstract  = {Although temporal topic modeling has been widely applied to scientific
               and legal texts, literary corpora have largely been overlooked
               in this regard. To address this issue, we analyze topic evolution
               in a subset of the Project Gutenberg (PG) corpus. We model this
               subset as a sequence of topic networks that capture the emergence,
               persistence, and interaction of thematic structures over decades.
               Using supervised topic representations, we predict nodes (topics)
               and edges (topic pairings) to forecast future topics and their
               co-occurrence. Our experiments demonstrate moderate to strong
               temporal persistence in topic connectivity patterns across three
               topic systems, with ROC-AUC and AP values consistently above 0.85.
               We find that the temporal span of topic networks significantly
               impacts predictive performance: longer spans improve the stability
               and recall of topic presence, while shorter spans better capture
               evolving topic relationships. Overall, our findings demonstrate
               the predictability of topics in literary texts over time.} pdf
               = {http://www.lrec-conf.org/proceedings/lrec2026/pdf/2026.lrec2026-1.65.pdf}
}

BibTeX

@inproceedings{Bisang:Mehler:2026,
  title     = {Linguistic Features as Predictors of Students' Performance in
               Domain-Specific Critical Online Reasoning Tasks},
  author    = {Bisang, Walter and Mehler, Alexander},
  booktitle = {International Test Commission Conference (ITC) 2026},
  eventdate = {2026-06-30/2026-07-03},
  location  = {Auckland, New Zealand},
  note      = {accepted},
  year      = {2026},
  keywords  = {core,core_b05}
}

BibTeX

@inproceedings{Verma:et:al:2026,
  title     = {Predicting Convincingness in Political Speech: How Emotional Tone
               Shapes Persuasive Strength},
  booktitle = {Proceedings of the 15th Workshop on Computational Approaches to
               Subjectivity, Sentiment, \& Social Media Analysis},
  year      = {2026},
  author    = {Verma, Bhuvanesh and Marreddy, Mounika and Mehler, Alexander},
  keywords  = {Argument Detection, Argument Quality Assessment,Topic Modelling, Persuasiveness, Convincingness, Emotion Analysis, Argument Mining, satek},
  abstract  = {Emotional tone plays a central role in persuasion, yet its impact
               on computational assessments of political argument quality in
               real world election campaign speeches remains understudied. In
               this work, we investigate whether positive emotional framing correlates
               with higher perceived convincingness in political arguments. We
               fine-tune language models on argument quality datasets and test
               their ability to transfer convincingness predictions to real-world
               campaign speeches. Using a corpus of U.S. presidential campaign
               speeches, we analyze emotional polarity in relation to predicted
               persuasive strength to test whether positively framed arguments
               are judged more convincing than neutral or negative ones. Our
               empirical analysis shows that political parties rely heavily on
               argumentation during their election campaigns. Also, we found
               the evidence that politicians strategically employ emotional cues
               within their arguments during these campaign speeches, with positive
               emotions being more strongly associated with persuasive strength,
               for example in topics such as USMCA’s Effect on American Jobs
               and Agriculture, Border Control Policies, Progressive Tax Reforms.
               At the same time, we find that negative emotions have a weaker
               yet still non-negligible influence on voter persuasion in topics
               such as City Crime and Civil Unrest and White Supremacist Violence
               (Charlottesville Incident).},
  note      = {accepted}
}

BibTeX

@inproceedings{Cong:et:al:2026a,
  author    = {Cong, Longwei and Hammerla, Leon and Hahn, Sonja and Gombert, Sebastian
               and Drachsler, Hendrik and Kr{\"o}hne, Ulf},
  title     = {Automatic Short Answer Grading with LLMs: From Memorization to Reasoning},
  booktitle = {Proceedings of the 16th International Learning Analytics \& Knowledge
               Conference (LAK26)},
  series    = {LAK26},
  year      = {2026},
  pubstate  = {forthcoming},
  location  = {Bergen, Norway},
  note      = {accepted},
  abstract  = {Short-answer questions provide valuable insights into students’
               understanding and cognitive processes for learning analytics.
               However, they are difficult to grade automatically as they require
               a high level of language comprehension. Automatic Short Answer
               Grading (ASAG) is therefore essential in large-scale educational
               settings. Recent work has applied encode-only pre-trained language
               models (PLMs), such as BERT, and generative large language models
               (LLMs) to ASAG. Although fine-tuned BERT-based models currently
               produce state-of-the-art results, they depend on substantial annotated
               datasets, which are frequently expensive and insufficient. This
               paper examines the performance of fine-tuning of several PLMs
               and LLMs for different dataset sizes and compares the results
               to those of prompt-based approaches. General-purpose and domain-specific
               models were fine-tuned on datasets ranging from 800 to 26,674
               student responses. Different prompt engineering strategies were
               tested including rubric-based prompts. Our results demonstrate
               that fine-tuned LLMs and rubric-based prompting can match or exceed
               the performance of BERT-based models. Rubric-based prompts with
               open-source model deliver comparable results without the need
               for annotation data or hardware-intensive training, while also
               mitigating data protection concerns. This work provides empirical
               evidence of the role of LLMs in ASAG and paves the way for future
               research into resource-efficient, interpretable and reasoning-driven
               grading.}
}

BibTeX

@inproceedings{Luecking:Hammerla:Mehler:2026,
  author    = {Lücking, Andy and Hammerla, Leon and Mehler, Alexander},
  title     = {Not every quantifier can be negated},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung}, Special Session ``Philosophical
               and Linguistic Approaches to Negation (PhilLingNeg)''},
  series    = {SuB'30},
  location  = {Frankfurt am Main},
  year      = {2026},
  pubstate  = {forthcoming},
  keywords  = {neglab},
  note      = {accepted}
}

BibTeX

@inproceedings{rahim2025generative,
  title     = {GENERATIVE AI ON CGM: TOWARDS A FOUNDATION MODEL FOR GLUCOSE PREDICTION,
               ROOT CAUSE ANALYSIS AND ANOMALY DETECTION},
  author    = {Rahim, Mehdi and Abusaleh, Ali},
  booktitle = {DIABETES TECHNOLOGY \& THERAPEUTICS},
  volume    = {27},
  pages     = {E144--E144},
  year      = {2025},
  organization = {MARY ANN LIEBERT, INC 140 HUGUENOT STREET, 3RD FL, NEW ROCHELLE, NY 10801 USA}
}

BibTeX

@inproceedings{Hammerla:et:al:2025b,
  author    = {Hammerla, Leon and Lücking, Andy and Reinert, Carolin and Mehler, Alexander},
  title     = {{D}-Neg: Syntax-Aware Graph Reasoning for Negation Detection},
  editor    = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
               and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
               and Singh, Dhirendra Pratap},
  booktitle = {Proceedings of the 14th International Joint Conference on Natural
               Language Processing and the 4th Conference of the Asia-Pacific
               Chapter of the Association for Computational Linguistics},
  month     = {dec},
  year      = {2025},
  address   = {Mumbai, India},
  publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-ijcnlp.89/},
  pages     = {1432--1454},
  isbn      = {979-8-89176-303-6},
  abstract  = {Despite the communicative importance of negation, its detection
               remains challenging. Previous approaches perform poorly in out-of-domain
               scenarios, and progress outside of English has been slow due to
               a lack of resources and robust models. To address this gap, we
               present D-Neg: a syntax-aware graph reasoning model based on a
               transformer that incorporates syntactic embeddings by attention-gating.
               D-Neg uses graph attention to represent syntactic structures,
               emulating the effectiveness of rule-based dependency approaches
               for negation detection. We train D-Neg using 7 English resources
               and their translations into 10 languages, all aligned at the annotation
               level. We conduct an evaluation of all these datasets in in-domain
               and out-of-domain settings. Our work represents a significant
               advance in negation detection, enabling more effective cross-lingual
               research.},
  keywords  = {neglab}
}

BibTeX

@inproceedings{Hammerla:et:al:2025a,
  author    = {Hammerla, Leon and Mehler, Alexander and Abrami, Giuseppe},
  title     = {Standardizing Heterogeneous Corpora with {DUUR}: A Dual Data-
               and Process-Oriented Approach to Enhancing NLP Pipeline Integration},
  editor    = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
               and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
               and Singh, Dhirendra Pratap},
  booktitle = {Proceedings of the 14th International Joint Conference on Natural
               Language Processing and the 4th Conference of the Asia-Pacific
               Chapter of the Association for Computational Linguistics},
  month     = {dec},
  year      = {2025},
  address   = {Mumbai, India},
  publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-ijcnlp.87/},
  pages     = {1410--1425},
  isbn      = {979-8-89176-303-6},
  abstract  = {Despite their success, LLMs are too computationally expensive
               to replace task- or domain-specific NLP systems. However, the
               variety of corpus formats makes reusing these systems difficult.
               This underscores the importance of maintaining an interoperable
               NLP landscape. We address this challenge by pursuing two objectives:
               standardizing corpus formats and enabling massively parallel corpus
               processing. We present a unified conversion framework embedded
               in a massively parallel, microservice-based, programming language-independent
               NLP architecture designed for modularity and extensibility. It
               allows for the integration of external NLP conversion tools and
               supports the addition of new components that meet basic compatibility
               requirements. To evaluate our dual data- and process-oriented
               approach to standardization, we (1) benchmark its efficiency in
               terms of processing speed and memory usage, (2) demonstrate the
               benefits of standardized corpus formats for NLP downstream tasks,
               and (3) illustrate the advantages of incorporating custom formats
               into a corpus format ecosystem.},
  keywords  = {neglab,duui}
}

BibTeX

@inproceedings{Hahn:et:al:2025,
  author    = {Sonja Hahn and Leon Hammerla and Corinna Hankeln and Sebastian Groß
               and Christina Röpers and Ulf Kröhne},
  title     = {Constructed Responses beyond NLP – Auswertungsansätze für graphische Antworten},
  booktitle = {Inproceedings of 12. Jahrestagung der Gesellschaft für empirische
               Bildungsforschung (GEBF 2025)},
  location  = {Mannheim, Deutschland},
  year      = {2025}
}

BibTeX

@inproceedings{Larsson:et:al:2025-spa-qna,
  title     = {Finding Answers to Questions: {Bridging} between Type-based and
               Computational Neuroscience Approaches},
  author    = {Larsson, Staffan and Ginzburg, Jonathan and Cooper, Robin and Lücking, Andy},
  booktitle = {Proceedings of the 16th International Conference on Computational Semantics},
  editor    = {Evang, Kilian and Kallmeyer, Laura and Pogodalla, Sylvain},
  location  = {Düsseldorf, Germany},
  publisher = {Association for Computational Linguistics},
  url       = {https://preview.aclanthology.org/iwcs-25-ingestion/2025.iwcs-1.12/},
  pages     = {128--136},
  series    = {IWCS},
  year      = {2025}
}

BibTeX

@inproceedings{Christof:et:al:2025,
  author    = {Christof, Roman and Zeidi, Farnaz and Messelhäußer, Manuela and Mentzer, Dirk
               and Koenig, Renate and Childs, Liam and Mehler, Alexander},
  title     = {{M}ed{L}ink{DE} {--} {M}ed{DRA} Entity Linking for {G}erman with
               Guided Chain of Thought Reasoning},
  editor    = {Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn
               and Peng, Violet},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural
               Language Processing},
  month     = {nov},
  year      = {2025},
  address   = {Suzhou, China},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.emnlp-main.1609/},
  doi       = {10.18653/v1/2025.emnlp-main.1609},
  pages     = {31569--31581},
  isbn      = {979-8-89176-332-6},
  pdf       = {https://aclanthology.org/2025.emnlp-main.1609.pdf},
  abstract  = {In pharmacovigilance, effective automation of medical data structuring,
               especially linking entities to standardized terminologies such
               as MedDRA, is critical. This challenge is rarely addressed for
               German data. With MedLinkDE we address German MedDRA entity linking
               for adverse drug reactions in a two-step approach: (1) retrieval
               of medical terms with fine-tuned embedding models, followed (2)
               by guided chain-of-thought re-ranking using LLMs. To this end,
               we introduce RENOde, a German real-world MedDRA dataset consisting
               of reportings from patients and healthcare professionals. To overcome
               the challenges posed by the linguistic diversity of these reports,
               we generate synthetic data mapping the two reporting styles of
               patients and healthcare professionals. Our embedding models, fine-tuned
               on these synthetic, quasi-personalized datasets, show competitive
               performance with real datasets in terms of accuracy at high top-
               recall, providing a robust basis for re-ranking. Our subsequent
               guided Chain of Thought (CoT) re-ranking, informed by MedDRA coding
               guidelines, improves entity linking accuracy by approximately
               15{\%} (Acc@1) compared to embedding-only strategies. In this
               way, our approach demonstrates the feasibility of entity linking
               in medical reports under the constraints of data scarcity by relying
               on synthetic data reflecting different informant roles of reporting
               persons.}
}

BibTeX

@inproceedings{Luecking:Voll:Rott:Henlein:Mehler:2025-fraga,
  title     = {Head and Hand Movements During Turn Transitions: Data-Based Multimodal
               Analysis Using the {Frankfurt VR Gesture--Speech Alignment Corpus}
               ({FraGA})},
  author    = {Lücking, Andy and Voll, Felix and Rott, Daniel and Henlein, Alexander
               and Mehler, Alexander},
  year      = {2025},
  booktitle = {Proceedings of the 29th Workshop on The Semantics and Pragmatics
               of Dialogue -- Full Papers},
  series    = {SemDial'25 -- Bialogue},
  publisher = {SEMDIAL},
  url       = {http://semdial.org/anthology/Z25-Luecking_semdial_3316.pdf},
  pages     = {146--156},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Abrami:et:al:2025:c,
  author    = {Abrami, Giuseppe and Bundan, Daniel and Manolis, Chrisowaladis
               and Mehler, Alexander},
  title     = {VR-ParlExplorer: A Hypertext System for the Collaborative Interaction
               in Parliamentary Debate Spaces},
  year      = {2025},
  isbn      = {9798400715341},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3720553.3746672},
  doi       = {10.1145/3720553.3746672},
  abstract  = {The enhanced visualization and interaction with information in
               collaborative VR environments enabled by chatbots is currently
               rather limited. To fill this gap and create a concrete application
               that combines spatial and virtual concepts of hypertext systems
               based on the use of LLMs, we present VR-ParlExplorer as a system
               for virtualizing plenary debates that allows users to interact
               with virtual members of parliament through chatbots. VR-ParlExplorer
               is implemented as a Plugin for Va.Si.Li-Lab to enable immersion
               in the dynamics of communication in parliamentary debates. The
               paper describes the functionality of VR-ParlExplorer and discusses
               specifics of the use case it addresses.},
  booktitle = {Proceedings of the 36th ACM Conference on Hypertext and Social Media},
  pages     = {177--183},
  numpages  = {7},
  location  = {Chicago, USA},
  series    = {HT '25},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3720553.3746672}
}

BibTeX

@inproceedings{Bundan:Abrami:Mehler:2025,
  author    = {Bundan, Daniel and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Multimodal Docker Unified {UIMA} Interface: New Horizons for Distributed
               Microservice-Oriented Processing of Corpora using {UIMA}},
  booktitle = {Proceedings of the 21st Conference on Natural Language Processing
               (KONVENS 2025): Long and Short Papers},
  year      = {2025},
  editor    = {Wartena, Christian and Heid, Ulrich},
  location  = {Hildesheim, Germany},
  address   = {Hannover, Germany},
  publisher = {HsH Applied Academics},
  pages     = {257--268},
  series    = {KONVENS '25},
  url       = {https://aclanthology.org/2025.konvens-1.22/},
  pdf       = {https://aclanthology.org/2025.konvens-1.22.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2025/09/Poster_Multimodal_DUUI_KONVENS_2025.pdf},
  keywords  = {duui,neglab,new-data-spaces,circlet}
}

BibTeX

@inproceedings{marreddy2025iclr,
  title     = {Multi-modal brain encoding models for multi-modal stimuli},
  author    = {Oota, Subba Reddy and Pahwa, Khushbu and Mounika, Marreddy and Singh, Maneesh
               and Gupta, Manish and Raju, Bapi S.},
  booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)},
  year      = {2025}
}

Mounika Marreddy, Subba Reddy Oota and Manish Gupta. 2025. Large language models are human-like annotators. European Conference on Information Retrieval, 291–299.

BibTeX

@inproceedings{marreddy:et:al:2025-ecir,
  title     = {Large language models are human-like annotators},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Gupta, Manish},
  booktitle = {European Conference on Information Retrieval},
  pages     = {291--299},
  year      = {2025},
  organization = {Springer}
}

BibTeX

@inproceedings{Momen:Schaaf:Mehler:2025,
  title     = {Filling the Temporal Void: Recovering Missing Publication Years
               in the Project Gutenberg Corpus Using {LLM}s},
  author    = {Momen, Omar and Schaaf, Manuel and Mehler, Alexander},
  editor    = {Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2025},
  month     = {jul},
  year      = {2025},
  address   = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.findings-acl.890/},
  pages     = {17318--17334},
  isbn      = {979-8-89176-256-5},
  abstract  = {Analysing texts spanning long periods of time is critical for
               researchers in historical linguistics and related disciplines.
               However, publicly available corpora suitable for such analyses
               are scarce. The Project Gutenberg (PG) corpus presents a significant
               yet underutilized opportunity in this context, due to the absence
               of accurate temporal metadata. We take advantage of language models
               and information retrieval to explore four sources of information
               {--} Open Web, Wikipedia, Open Library API, and PG books texts
               {--} to add missing temporal metadata to the PG corpus. Through
               20 experiments employing state-of-the-art Large Language Models
               (LLMs) and Retrieval-Augmented Generation (RAG) methods, we estimate
               the production years of all PG books. We curate an enriched metadata
               repository for the PG corpus and propose a refined version for
               it, which includes 53,774 books with a total of 3.8 billion tokens
               in 11 languages, produced between 1600 and 2000. This work provides
               a new resource for computational linguistics and humanities studies
               focusing on diachronic analyses. The final dataset and all experiments
               data are publicly available (https://github.com/OmarMomen14/pg-dates).},
  pdf       = {https://aclanthology.org/2025.findings-acl.890.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2025. Postmodern Quantification with Stuff. Proceedings of Sinn und Bedeutung, 29:917–934.

BibTeX

@inproceedings{Luecking:Ginzburg:2025-mass-nouns,
  title     = {Postmodern Quantification with Stuff},
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung}},
  volume    = {29},
  series    = {SuB'29},
  editor    = {Longo, Federica and Panizza, Daniele},
  year      = {2025},
  doi       = {10.18148/sub/2024.v29.1254},
  url       = {https://doi.org/10.18148/sub/2024.v29.1254},
  pdf       = {https://ojs.ub.uni-konstanz.de/sub/index.php/sub/article/view/1254/1207},
  pages     = {917-934},
  location  = {Noto (Syracuse), Italy}
}

BibTeX

@inproceedings{Boenisch:et:al:2025,
  title     = {Towards Unified, Dynamic and Annotation-based Visualisations and
               Exploration of Annotated Big Data Corpora with the Help of Unified
               Corpus Explorer},
  author    = {B{\"o}nisch, Kevin and Abrami, Giuseppe and Mehler, Alexander},
  editor    = {Dziri, Nouha and Ren, Sean (Xiang) and Diao, Shizhe},
  booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
               Chapter of the Association for Computational Linguistics: Human
               Language Technologies (System Demonstrations)},
  year      = {2025},
  address   = {Albuquerque, New Mexico},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2025.naacl-demo.42/},
  pages     = {522--534},
  isbn      = {979-8-89176-191-9},
  abstract  = {The annotation and exploration of large text corpora, both automatic
               and manual, presents significant challenges across multiple disciplines,
               including linguistics, digital humanities, biology, and legal
               science. These challenges are exacerbated by the heterogeneity
               of processing methods, which complicates corpus visualization,
               interaction, and integration. To address these issues, we introduce
               the Unified Corpus Explorer (UCE), a standardized, dockerized,
               open-source and dynamic Natural Language Processing (NLP) application
               designed for flexible and scalable corpus navigation. Herein,
               UCE utilizes the UIMA format for NLP annotations as a standardized
               input, constructing interfaces and features around those annotations
               while dynamically adapting to the corpora and their extracted
               annotations. We evaluate UCE based on a user study and demonstrate
               its versatility as a corpus explorer based on generative AI.},
  note      = {Best Demo Award},
  pdf       = {https://aclanthology.org/2025.naacl-demo.42.pdf},
  keywords  = {uce,new-data-spaces,circlet,core,core_c08}
}

BibTeX

@inproceedings{Abrami:et:al:2025:b,
  author    = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
  title     = {DUUI: A Toolbox for the Construction of a new Kind of Natural
               Language Processing},
  year      = {2025},
  booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
               und Data Humanities},
  numpages  = {3},
  location  = {Bielefeld, Germany},
  series    = {DHd 2025},
  publisher = {Zenodo},
  keywords  = {duui,core,core_c08},
  pages     = {446--448},
  doi       = {10.5281/zenodo.14943128},
  url       = {https://doi.org/10.5281/zenodo.14943128},
  poster    = {https://zenodo.org/records/14944575}
}

BibTeX

@inproceedings{Kroehne:et:al:2024,
  author    = {Ulf Kröhne and Leon Hammerla and Corinna Hankeln and Marc Müller and Sonja Hahn},
  title     = {How much training data are required? Automatic scoring using prompting
               compared to text classification tasks as fine-tuning large-language
               models},
  booktitle = {Inproceedings of 53. Kongress der Deutschen Gesellschaft für Psychologie
               / 15. ÖGP Conference},
  location  = {Wien, Österreich},
  year      = {2024}
}

BibTeX

@inproceedings{Raithel:et:al:2024,
  title     = {Overview of {\#}{SMM}4{H} 2024 {--} Task 2: Cross-Lingual Few-Shot
               Relation Extraction for Pharmacovigilance in {F}rench, {G}erman,
               and {J}apanese},
  author    = {Raithel, Lisa and Thomas, Philippe and Verma, Bhuvanesh and Roller, Roland
               and Yeh, Hui-Syuan and Yada, Shuntaro and Grouin, Cyril and Wakamiya, Shoko
               and Aramaki, Eiji and M{\"o}ller, Sebastian and Zweigenbaum, Pierre},
  editor    = {Xu, Dongfang and Gonzalez-Hernandez, Graciela},
  booktitle = {Proceedings of The 9th Social Media Mining for Health Research
               and Applications (SMM4H 2024) Workshop and Shared Tasks},
  month     = {aug},
  year      = {2024},
  address   = {Bangkok, Thailand},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2024.smm4h-1.39/},
  pages     = {170--182},
  abstract  = {This paper provides an overview of Task 2 from the Social Media
               Mining for Health 2024 shared task ({\#}SMM4H 2024), which focused
               on Named Entity Recognition (NER, Subtask 2a) and the joint task
               of NER and Relation Extraction (RE, Subtask 2b) for detecting
               adverse drug reactions (ADRs) in German, Japanese, and French
               texts written by patients. Participants were challenged with a
               few-shot learning scenario, necessitating models that can effectively
               generalize from limited annotated examples. Despite the diverse
               strategies employed by the participants, the overall performance
               across submissions from three teams highlighted significant challenges.
               The results underscored the complexity of extracting entities
               and relations in multi-lingual contexts, especially from the noisy
               and informal nature of user-generated content. Further research
               is required to develop robust systems capable of accurately identifying
               and associating ADR-related information in low-resource and multilingual
               settings.}
}

BibTeX

@inproceedings{Henlein:Luecking:Mehler:2024,
  title     = {Virtually Restricting Modalities in Interactions: Va.Si.Li-Lab
               for Experimental Multimodal Research},
  author    = {Henlein, Alexander and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
               (MMSYM 2024), Frankfurt, 25-27 September 2024},
  pages     = {96--97},
  year      = {2024},
  pdf       = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Luecking:Mehler:Henlein:2024,
  title     = {The Gesture–Prosody Link in Multimodal Grammar},
  author    = {L{\"u}cking, Andy and Mehler, Alexander and Henlein, Alexander},
  booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
               (MMSYM 2024), Frankfurt, 25-27 September 2024},
  pages     = {128--129},
  year      = {2024},
  pdf       = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf},
  keywords  = {gemdis}
}

Jonathan Ginzburg, Chris Eliasmith and Andy Lücking. 2024. Swann's name: Towards a Dialogical Brain Semantics. Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Ginzburg:Eliasmith:Luecking:2024-swann,
  title     = {Swann's name: {Towards} a Dialogical Brain Semantics},
  author    = {Ginzburg, Jonathan and Eliasmith, Chris and Lücking, Andy},
  year      = {2024},
  booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
  series    = {SemDial'24 -- TrentoLogue},
  location  = {Università di Trento, Palazzo Piomarta, Rovereto},
  url       = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-3007/},
  pdf       = {http://semdial.org/anthology/Z24-Ginzburg_semdial_0007.pdf}
}

BibTeX

@inproceedings{Luecking:Mehler:Henlein:2024-classifier,
  title     = {The Linguistic Interpretation of Non-emblematic Gestures Must
               be agreed in Dialogue: Combining Perceptual Classifiers and Grounding/Clarification
               Mechanisms},
  author    = {Lücking, Andy and Mehler, Alexander and Henlein, Alexander},
  year      = {2024},
  booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
  series    = {SemDial'24 -- TrentoLogue},
  location  = {Università di Trento, Palazzo Piomarta, Rovereto},
  url       = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-4031/},
  pdf       = {http://semdial.org/anthology/Z24-Lucking_semdial_0031.pdf},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Boenisch:Mehler:2024,
  title     = {Finding Needles in Emb(a)dding Haystacks: Legal Document Retrieval
               via Bagging and SVR Ensembles},
  author    = {B\"{o}nisch, Kevin and Mehler, Alexander},
  year      = {2024},
  booktitle = {Proceedings of the 2nd Legal Information Retrieval meets Artificial
               Intelligence Workshop LIRAI 2024},
  location  = {Poznan, Poland},
  publisher = {CEUR-WS.org},
  address   = {Aachen, Germany},
  series    = {CEUR Workshop Proceedings},
  abstract  = {We introduce a retrieval approach leveraging Support Vector Regression
               (SVR) ensembles, bootstrap aggregation (bagging), and embedding
               spaces on the German Dataset for Legal Information Retrieval (GerDaLIR).
               By conceptualizing the retrieval task in terms of multiple binary
               needle-in-a-haystack subtasks, we show improved recall over the
               baselines (0.849 > 0.803 | 0.829) using our voting ensemble, suggesting
               promising initial results, without training or fine-tuning any
               deep learning models. Our approach holds potential for further
               enhancement, particularly through refining the encoding models
               and optimizing hyperparameters.},
  archiveprefix = {arXiv},
  eprint    = {2501.05018},
  url       = {https://arxiv.org/pdf/2501.05018},
  keywords  = {legal information retrieval, support vector regression, word embeddings, bagging ensemble}
}

BibTeX

@inproceedings{Boenisch:et:al:2024,
  author    = {B\"{o}nisch, Kevin and Stoeckel, Manuel and Mehler, Alexander},
  title     = {HyperCausal: Visualizing Causal Inference in 3D Hypertext},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3677049},
  doi       = {10.1145/3648188.3677049},
  abstract  = {We present HyperCausal, a 3D hypertext visualization framework
               for exploring causal inference in generative Large Language Models
               (LLMs). HyperCausal maps the generative processes of LLMs into
               spatial hypertexts, where tokens are represented as nodes connected
               by probability-weighted edges. The edges are weighted by the prediction
               scores of next tokens, depending on the underlying language model.
               HyperCausal facilitates navigation through the causal space of
               the underlying LLM, allowing users to explore predicted word sequences
               and their branching. Through comparative analysis of LLM parameters
               such as token probabilities and search algorithms, HyperCausal
               provides insight into model behavior and performance. Implemented
               using the Hugging Face transformers library and Three.js, HyperCausal
               ensures cross-platform accessibility to advance research in natural
               language processing using concepts from hypertext research. We
               demonstrate several use cases of HyperCausal and highlight the
               potential for detecting hallucinations generated by LLMs using
               this framework. The connection with hypertext research arises
               from the fact that HyperCausal relies on user interaction to unfold
               graphs with hierarchically appearing branching alternatives in
               3D space. This approach refers to spatial hypertexts and early
               concepts of hierarchical hypertext structures. A third connection
               concerns hypertext fiction, since the branching alternatives mediated
               by HyperCausal manifest non-linearly organized reading threads
               along artificially generated texts that the user decides to follow
               optionally depending on the reading context.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {330–-336},
  numpages  = {7},
  keywords  = {3D hypertext, large language models, visualization},
  location  = {Poznan, Poland},
  series    = {HT '24},
  video     = {https://www.youtube.com/watch?v=ANHFTupnKhI}
}

BibTeX

@inproceedings{Baumartz:et:al:2024,
  author    = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
               and Braunheim, Dominik},
  title     = {Measuring Group Creativity of Dialogic Interaction Systems by
               Means of Remote Entailment Analysis},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3675140},
  doi       = {10.1145/3648188.3675140},
  abstract  = {We present a procedure for assessing group creativity that allows
               us to compare the contributions of human interlocutors and chatbots
               based on generative AI such as ChatGPT. We focus on everyday creativity
               in terms of dialogic communication and test four hypotheses about
               the difference between human and artificial communication. Our
               procedure is based on a test that requires interlocutors to cooperatively
               interpret a sequence of sentences for which we control for coherence
               gaps with reference to the notion of entailment. Using NLP methods,
               we automatically evaluate the spoken or written contributions
               of interlocutors (human or otherwise). The paper develops a routine
               for automatic transcription based on Whisper, for sampling texts
               based on their entailment relations, for analyzing dialogic contributions
               along their semantic embeddings, and for classifying interlocutors
               and interaction systems based on them. In this way, we highlight
               differences between human and artificial conversations under conditions
               that approximate free dialogic communication. We show that despite
               their obvious classificatory differences, it is difficult to see
               clear differences even in the domain of dialogic communication
               given the current instruments of NLP.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {153–-166},
  numpages  = {14},
  keywords  = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP, core, core_b05, core_c08},
  location  = {Poznan, Poland},
  series    = {HT '24}
}

BibTeX

@inproceedings{Abrami:et:al:2024:b,
  author    = {Abrami, Giuseppe and Wontke, Dominik Alexander and Singh, Gurpreet
               and Mehler, Alexander},
  title     = {Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change
               and User Feedback in Va.Si.Li-Lab},
  year      = {2024},
  isbn      = {9798400705953},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3648188.3675154},
  doi       = {10.1145/3648188.3675154},
  abstract  = {Flexibility, adaptability, modularity, and extensibility in the
               context of a collaborative system are critical features for multi-user
               hypertext systems. In addition to facilitating acceptance and
               increasing reusability, these features simplify development cycles
               and enable a larger range of application areas. However, especially
               in virtual 3D hypertext systems, many of the features are only
               partially available or not available at all. To fill this gap,
               we present an approach to virtual hypertext systems for the realization
               of dynamic event systems. Such an event system can be created
               and serialized simultaneously at run time regarding the modification
               of situational, environmental parameters. This includes informing
               users and allowing them to participate in the environmental dynamics
               of the system. We present Va.Si.Li-ES as a module of Va.Si.Li-Lab,
               describe several environmental scenarios that can be adapted,
               and provide use cases in the context of 3D hypertext systems.},
  booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
  pages     = {357–-368},
  numpages  = {12},
  keywords  = {Collaborative Simulation, Environmental Event System, Hypertext, Ubiq, Va.Si.Li-Lab, Virtual Reality},
  location  = {Poznan, Poland},
  series    = {HT '24}
}

BibTeX

@inproceedings{Henlein:et:al:2024-vicom,
  title     = {An Outlook for AI Innovation in Multimodal Communication Research},
  author    = {Henlein, Alexander and Bauer, Anastasia and Bhattacharjee, Reetu
               and Ćwiek, Aleksandra and Gregori, Alina and Kügler, Frank and Lemanski, Jens
               and Lücking, Andy and Mehler, Alexander and Prieto, Pilar and Sánchez-Ramón, Paula G.
               and Schepens, Job and Schulte-Rüther, Martin and Schweinberger, Stefan R.
               and von Eiff, Celina I.},
  editor    = {Duffy, Vincent G.},
  year      = {2024},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management.},
  series    = {HCII 2024. Lecture Notes in Computer Science},
  publisher = {Springer},
  address   = {Cham},
  pages     = {182--234},
  isbn      = {978-3-031-61066-0},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Abrami:Mehler:2024,
  author    = {Abrami, Giuseppe and Mehler, Alexander},
  title     = {Efficient, uniform and scalable parallel NLP pre-processing with
               DUUI: Perspectives and Best Practice for the Digital Humanities},
  year      = {2024},
  month     = {08},
  editor    = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
  booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
  location  = {Washington, DC, USA},
  series    = {DH},
  keywords  = {duui, core, core_c08},
  publisher = {Zenodo},
  doi       = {10.5281/zenodo.13761079},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Poster.pdf},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Abstract.pdf},
  url       = {https://doi.org/10.5281/zenodo.13761079},
  pages     = {15--18},
  numpages  = {4}
}

BibTeX

@inproceedings{Luecking:et:al:2024,
  abstract  = {Purpose: Based on the examples of English and German, we investigate
               to what extent parsers trained on modern variants of these languages
               can be transferred to older language levels without loss. Methods:
               We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
               which covers, roughly, the time period from 1800 until today,
               in conjunction with the further development of the annotation
               tool DependencyAnnotator. DoTT consists of a collection of diachronic
               corpora enriched with dependency annotations using 3 parsers,
               6 pre-trained language models, 5 newly trained models for German,
               and two tag sets (TIGER and Universal Dependencies). To assess
               how the different parsers perform on texts from different time
               periods, we created a gold standard sample as a benchmark. Results:
               We found that the parsers/models perform quite well on modern
               texts (document-level LAS ranging from 82.89 to 88.54) and slightly
               worse on older texts, as expected (average document-level LAS
               84.60 vs. 86.14), but not significantly. For German texts, the
               (German) TIGER scheme achieved slightly better results than UD.
               Conclusion: Overall, this result speaks for the transferability
               of parsers to past language levels, at least dating back until
               around 1800. This very transferability, it is however argued,
               means that studies of language change in the field of dependency
               syntax can draw on dependency distance but miss out on some grammatical
               phenomena.},
  address   = {Torino, Italy},
  author    = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
               and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {4641--4653},
  publisher = {ELRA and ICCL},
  title     = {Dependencies over Times and Tools ({D}o{TT})},
  url       = {https://aclanthology.org/2024.lrec-main.415},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
  year      = {2024}
}

BibTeX

@inproceedings{Konca:et:al:2024,
  abstract  = {A useful semantic role-annotated resource for training semantic
               role models for the German language is missing. We point out some
               problems of previous resources and provide a new one due to a
               combined translation and alignment process: The gold standard
               CoNLL-2012 semantic role annotations are translated into German.
               Semantic role labels are transferred due to alignment models.
               The resulting dataset is used to train a German semantic role
               model. With F1-scores around 0.7, the major roles achieve competitive
               evaluation scores, but avoid limitations of previous approaches.
               The described procedure can be applied to other languages as well.},
  address   = {Torino, Italy},
  author    = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  month     = {may},
  pages     = {7717--7727},
  publisher = {ELRA and ICCL},
  title     = {{G}erman {SRL}: Corpus Construction and Model Training},
  url       = {https://aclanthology.org/2024.lrec-main.682},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_GERMAN_SRL.pdf},
  year      = {2024}
}

BibTeX

@inproceedings{Abrami:et:al:2024:a,
  abstract  = {In 2022, the largest German-speaking corpus of parliamentary protocols
               from three different centuries, on a national and federal level
               from the countries of Germany, Austria, Switzerland and Liechtenstein,
               was collected and published - GerParCor. Through GerParCor, it
               became possible to provide for the first time various parliamentary
               protocols which were not available digitally and, moreover, could
               not be retrieved and processed in a uniform manner. Furthermore,
               GerParCor was additionally preprocessed using NLP methods and
               made available in XMI format. In this paper, GerParCor is significantly
               updated by including all new parliamentary protocols in the corpus,
               as well as adding and preprocessing further parliamentary protocols
               previously not covered, so that a period up to 1797 is now covered.
               Besides the integration of a new, state-of-the-art and appropriate
               NLP preprocessing for the handling of large text corpora, this
               update also provides an overview of the further reuse of GerParCor
               by presenting various provisioning capabilities such as API's,
               among others.},
  address   = {Torino, Italy},
  author    = {Abrami, Giuseppe and Bagci, Mevl{\"u}t and Mehler, Alexander},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational
               Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  editor    = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
               and Sakti, Sakriani and Xue, Nianwen},
  pages     = {7707--7716},
  publisher = {ELRA and ICCL},
  title     = {{G}erman Parliamentary Corpus ({G}er{P}ar{C}or) Reloaded},
  url       = {https://aclanthology.org/2024.lrec-main.681},
  pdf       = {https://aclanthology.org/2024.lrec-main.681.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/GerParCor_Reloaded_Poster.pdf},
  video     = {https://www.youtube.com/watch?v=5X-w_oXOAYo},
  keywords  = {gerparcor,corpus},
  year      = {2024}
}

BibTeX

@inproceedings{Marreddy:et:al:2023acl,
  title     = {How does the brain process syntactic structure while listening?},
  author    = {Oota, Subba Reddy and Marreddy, Mounika and Gupta, Manish and Bapi, Raju},
  booktitle = {Findings of the Association for Computational Linguistics: ACL 2023},
  pages     = {6624--6647},
  year      = {2023}
}

BibTeX

@inproceedings{Marreddy:et:al:2023icassp,
  title     = {Neural architecture of speech},
  author    = {Oota, Subba Reddy and Pahwa, Khushbu and Marreddy, Mounika and Gupta, Manish
               and Raju, Bapi S},
  booktitle = {ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech
               and Signal Processing (ICASSP)},
  pages     = {1--5},
  year      = {2023},
  organization = {IEEE}
}

BibTeX

@inproceedings{Gregori:et:al:2023-vicom,
  author    = {Gregori, Alina and Amici, Federica and Brilmayer, Ingmar and {\'{C}}wiek, Aleksandra
               and Fritzsche, Lennart and Fuchs, Susanne and Henlein, Alexander and Herbort, Oliver
               and K{\"u}gler, Frank and Lemanski, Jens and Liebal, Katja and L{\"u}cking, Andy
               and Mehler, Alexander and Nguyen, Kim Tien and Pouw, Wim and Prieto, Pilar
               and Rohrer, Patrick Louis and S{\'a}nchez-Ram{\'o}n, Paula G. and Schulte-R{\"u}ther, Martin
               and Schumacher, Petra B. and Schweinberger, Stefan R. and Struckmeier, Volker
               and Trettenbrein, Patrick C. and von Eiff, Celina I.},
  editor    = {Duffy, Vincent G.},
  title     = {A Roadmap for Technological Innovation in Multimodal Communication Research},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  year      = {2023},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {402--438},
  abstract  = {Multimodal communication research focuses on how different means
               of signalling coordinate to communicate effectively. This line
               of research is traditionally influenced by fields such as cognitive
               and neuroscience, human-computer interaction, and linguistics.
               With new technologies becoming available in fields such as natural
               language processing and computer vision, the field can increasingly
               avail itself of new ways of analyzing and understanding multimodal
               communication. As a result, there is a general hope that multimodal
               research may be at the ``precipice of greatness'' due to technological
               advances in computer science and resulting extended empirical
               coverage. However, for this to come about there must be sufficient
               guidance on key (theoretical) needs of innovation in the field
               of multimodal communication. Absent such guidance, the research
               focus of computer scientists might increasingly diverge from crucial
               issues in multimodal communication. With this paper, we want to
               further promote interaction between these fields, which may enormously
               benefit both communities. The multimodal research community (represented
               here by a consortium of researchers from the Visual Communication
               [ViCom] Priority Programme) can engage in the innovation by clearly
               stating which technological tools are needed to make progress
               in the field of multimodal communication. In this article, we
               try to facilitate the establishment of a much needed common ground
               on feasible expectations (e.g., in terms of terminology and measures
               to be able to train machine learning algorithms) and to critically
               reflect possibly idle hopes for technical advances, informed by
               recent successes and challenges in computer science, social signal
               processing, and related domains.},
  isbn      = {978-3-031-35748-0},
  pdf       = {https://pure.mpg.de/rest/items/item_3511464_5/component/file_3520176/content},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Boenisch:et:al:2023,
  title     = {{Bundestags-Mine}: Natural Language Processing for Extracting
               Key Information from Government Documents},
  isbn      = {9781643684734},
  issn      = {1879-8314},
  url       = {http://dx.doi.org/10.3233/FAIA230996},
  doi       = {10.3233/faia230996},
  booktitle = {Legal Knowledge and Information Systems},
  publisher = {IOS Press},
  author    = {B\"{o}nisch, Kevin and Abrami, Giuseppe and Wehnert, Sabine and Mehler, Alexander},
  year      = {2023}
}

BibTeX

@inproceedings{Leonhardt:et:al:2023,
  title     = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
  author    = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
               and Mehler, Alexander},
  editor    = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
  year      = {2023},
  address   = {Singapore},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2023.findings-emnlp.29},
  pages     = {385--399},
  pdf       = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
  abstract  = {Automatic analysis of large corpora is a complex task, especially
               in terms of time efficiency. This complexity is increased by the
               fact that flexible, extensible text analysis requires the continuous
               integration of ever new tools. Since there are no adequate frameworks
               for these purposes in the field of NLP, and especially in the
               context of UIMA, that are not outdated or unusable for security
               reasons, we present a new approach to address the latter task:
               Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
               and feature-rich framework for automatic distributed analysis
               of text corpora that leverages Big Data experience and virtualization
               with Docker. We evaluate DUUI{'}s communication approach against
               a state-of-the-art approach and demonstrate its outstanding behavior
               in terms of time efficiency, enabling the analysis of big text
               data.},
  keywords  = {duui, core, core_c08}
}

Jonathan Ginzburg and Andy Lücking. 2023. Referential Transparency and Inquisitivity. Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond the Sentence Boundary, 11–20.

BibTeX

@inproceedings{Ginzburg:Luecking:2023-wh,
  author    = {Ginzburg, Jonathan and Lücking, Andy},
  author+an = {2=highlight},
  keywords  = {own,conference},
  title     = {Referential Transparency and Inquisitivity},
  booktitle = {Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond
               the Sentence Boundary},
  series    = {InqBnB4'23},
  pages     = {11-20},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.inqbnb-1.2/},
  pdf       = {https://aclanthology.org/2023.inqbnb-1.2.pdf}
}

Andy Lücking. 2023. Towards Referential Transparent Annotations of Quantified Noun Phrases. Proceedings of the 2023 Joint ACL–ISO Workshop on Interoperable Semantic Annotation, 47–55.

BibTeX

@inproceedings{Luecking:2023-rtt-annotation,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Towards Referential Transparent Annotations of Quantified Noun Phrases},
  booktitle = {Proceedings of the 2023 Joint ACL--ISO Workshop on Interoperable
               Semantic Annotation},
  series    = {ISA-19},
  pages     = {47-55},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.isa-1.7/},
  pdf       = {https://aclanthology.org/2023.isa-1.7.pdf}
}

BibTeX

@inproceedings{Larsson:Cooper:Ginzburg:Luecking:2023-ttr-spa,
  author    = {Larsson, Staffan and Cooper, Robin and Ginzburg, Jonathan and Lücking, Andy},
  author+an = {4=highlight},
  keywords  = {own,conference},
  title     = {{TTR} at the {SPA}: {Relating} type-theoretical semantics to neural
               semantic pointers},
  booktitle = {Proceedings of Natural Logic Meets Machine Learning IV},
  series    = {NALOMA'23},
  location  = {Nancy, France, hosted with IWCS 2023},
  year      = {2023},
  url       = {https://aclanthology.org/2023.naloma-1.5/},
  pdf       = {https://aclanthology.org/2023.naloma-1.5.pdf}
}

BibTeX

@inproceedings{Henlein:et:al:2023c,
  title     = {Towards grounding multimodal semantics in interaction data with Va.Si.Li-Lab},
  author    = {Henlein, Alexander and Lücking, Andy and Bagci, Mevlüt and Mehler, Alexander},
  booktitle = {Proceedings of the 8th Conference on Gesture and Speech in Interaction (GESPIN)},
  location  = {Nijmegen, Netherlands},
  year      = {2023},
  keywords  = {vasililab, gemdis},
  pdf       = {https://www.gespin2023.nl/documents/talks_and_posters/GeSpIn_2023_papers/GeSpIn_2023_paper_1692.pdf}
}

BibTeX

@inproceedings{Babbili:et:al:2023,
  author    = {Babbili, Shaduan and B\"{o}nisch, Kevin and Heinrich, Yannick
               and Stephan, Philipp and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Viki LibraRy: A Virtual Reality Library for Collaborative Browsing
               and Navigation through Hypertext},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609079},
  doi       = {10.1145/3603163.3609079},
  abstract  = {We present Viki LibraRy, a virtual-reality-based system for generating
               and exploring online information as a spatial hypertext. It creates
               a virtual library based on Wikipedia in which Rooms are used to
               make data available via a RESTful backend. In these Rooms, users
               can browse through all articles of the corresponding Wikipedia
               category in the form of Books. In addition, users can access different
               Rooms, through virtual portals. Beyond that, the explorations
               can be done alone or collaboratively, using Ubiq.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {6},
  numpages  = {3},
  keywords  = {virtual reality simulation, virtual reality, virtual hypertext, virtual museum},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609079}
}

BibTeX

@inproceedings{Gagel:et:al:2023,
  author    = {Gagel, Julian and Hustedt, Jasper and L\"{u}ttig, Timo and Berg, Theresa
               and Abrami, Giuseppe and Mehler, Alexander},
  title     = {News in Time and Space: Global Event Exploration in Virtual Reality},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609080},
  doi       = {10.1145/3603163.3609080},
  abstract  = {We present News in Time and Space (NiTS), a virtual reality application
               for visualization, filtering and interaction with geo-referenced
               events based on GDELT. It can be used both via VR glasses and
               as a desktop solution for shared use by multiple users with Ubiq.
               The aim of NiTS is to provide overviews of global events and trends
               in order to create a resource for their monitoring and analysis.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {7},
  numpages  = {3},
  keywords  = {virtual hypertext, human data interaction, spatial computing, virtual reality simulation, geographic information systems, virtual reality},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609080}
}

BibTeX

@inproceedings{Abrami:et:al:2023,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Bagci, Mevl\"{u}t and Schrottenbacher, Patrick
               and Henlein, Alexander and Spiekermann, Christian and Engel, Juliane
               and Schreiber, Jakob},
  title     = {Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in
               Virtual Reality and Its Potential Fields of Application},
  year      = {2023},
  isbn      = {9798400702327},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3603163.3609076},
  doi       = {10.1145/3603163.3609076},
  abstract  = {During the last thirty years a variety of hypertext approaches
               and virtual environments -- some virtual hypertext environments
               -- have been developed and discussed. Although the development
               of virtual and augmented reality technologies is rapid and improving,
               and many technologies can be used at affordable conditions, their
               usability for hypertext systems has not yet been explored. At
               the same time, even for virtual three-dimensional virtual and
               augmented environments, there is no generally accepted concept
               that is similar or nearly as elegant as hypertext. This gap will
               have to be filled in the next years and a good concept should
               be developed; in this article we aim to contribute in this direction
               and also introduce a prototype for a possible implementation of
               criteria for virtual hypertext simulations.},
  booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
  articleno = {22},
  numpages  = {9},
  keywords  = {VaSiLiLab, virtual hypertext, virtual reality, virtual reality simulation, authoring system, gemdis},
  location  = {Rome, Italy},
  series    = {HT '23},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609076}
}

BibTeX

@inproceedings{Henlein:et:al:2023b,
  author    = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
               and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
               and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
               and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
  editor    = {Duffy, Vincent G.},
  title     = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  year      = {2023},
  publisher = {Springer Nature Switzerland},
  address   = {Cham},
  pages     = {461--479},
  abstract  = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
               scene framework using SemAF (Semantic Annotation Framework) as
               a scheme for annotating discourse structures. SeSB integrates
               a variety of tools and resources by using SemAF and UIMA as a
               unified data structure to generate 3D scenes from textual descriptions.
               Based on VR, SeSB allows its users to change annotations through
               body movements instead of symbolic manipulations: from annotations
               in texts to corrections in editing steps to adjustments in generated
               scenes, all this is done by grabbing and moving objects. We evaluate
               SeSB in comparison with a state-of-the-art open source text-to-scene
               method (the only one which is publicly available) and find that
               our approach not only performs better, but also allows for modeling
               a greater variety of scenes.},
  isbn      = {978-3-031-35748-0},
  doi       = {10.1007/978-3-031-35748-0_32},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{Mehler:et:al:2023:a,
  abstract  = {Simulation-based learning is a method in which learners learn
               to master real-life scenarios and tasks from simulated application
               contexts. It is particularly suitable for the use of VR technologies,
               as these allow immersive experiences of the targeted scenarios.
               VR methods are also relevant for studies on online learning, especially
               in groups, as they provide access to a variety of multimodal learning
               and interaction data. However, VR leads to a trade-off between
               technological conditions of the observability of such data and
               the openness of learner behavior. We present Va.Si.Li-Lab, a VR-L
               ab for Simulation-based Learn ing developed to address this trade-off.
               Va.Si.Li-Lab uses a graph-theoretical model based on hypergraphs
               to represent the data diversity of multimodal learning and interaction.
               We develop this data model in relation to mono- and multimodal,
               intra- and interpersonal data and interleave it with ISO-Space
               to describe distributed multiple documents from the perspective
               of their interactive generation. The paper adds three use cases
               to motivate the broad applicability of Va.Si.Li-Lab and its data
               model.},
  address   = {Cham},
  author    = {Mehler, Alexander and Bagci, Mevl{\"u}t and Henlein, Alexander
               and Abrami, Giuseppe and Spiekermann, Christian and Schrottenbacher, Patrick
               and Konca, Maxim and L{\"u}cking, Andy and Engel, Juliane and Quintino, Marc
               and Schreiber, Jakob and Saukel, Kevin and Zlatkin-Troitschanskaia, Olga},
  booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
               and Risk Management},
  editor    = {Duffy, Vincent G.},
  isbn      = {978-3-031-35741-1},
  pages     = {539--565},
  publisher = {Springer Nature Switzerland},
  title     = {A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab},
  year      = {2023},
  doi       = {10.1007/978-3-031-35741-1_39},
  keywords  = {gemdis}
}

BibTeX

@inproceedings{marreddy:et:al:2022multi,
  title     = {Multi-task text classification using graph convolutional networks
               for large-scale low resource language},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  booktitle = {2022 international joint conference on neural networks (IJCNN)},
  pages     = {1--8},
  year      = {2022},
  organization = {IEEE}
}

BibTeX

@inproceedings{Ebert:et:al:2022,
  abstract  = {``Behavioromics'' is a term that has been invented to cover the
               study of multimodal interaction from various disciplines and points
               of view. These disciplines and points of view, however, lack a
               platform for exchange. The workshop session on ``Semantic, artificial
               and computational interaction studies'' provides such a platform.
               We motivate behavioromics, sketch its historical background, and
               summarize this year's contributions.},
  address   = {Cham},
  author    = {Ebert, Cornelia and L{\"u}cking, Andy and Mehler, Alexander},
  booktitle = {HCI International 2022 - Late Breaking Papers. Multimodality in
               Advanced Interaction Environments},
  editor    = {Kurosu, Masaaki and Yamamoto, Sakae and Mori, Hirohiko and Schmorrow, Dylan D.
               and Fidopiastis, Cali M. and Streitz, Norbert A. and Konomi, Shin'ichi},
  isbn      = {978-3-031-17618-0},
  pages     = {36--47},
  publisher = {Springer Nature Switzerland},
  title     = {Introduction to the 2nd Edition of ``Semantic, Artificial and
               Computational Interaction Studies''},
  doi       = {https://doi.org/10.1007/978-3-031-17618-0_3},
  year      = {2022}
}

BibTeX

@inproceedings{Ahmed:et:al:2022,
  title     = {Tafsir Dataset: A Novel Multi-Task Benchmark for Named Entity
               Recognition and Topic Modeling in Classical {A}rabic Literature},
  author    = {Ahmed, Sajawel and van der Goot, Rob and Rehman, Misbahur and Kruse, Carl
               and {\"O}zsoy, {\"O}mer and Mehler, Alexander and Roig, Gemma},
  booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
  month     = {oct},
  year      = {2022},
  address   = {Gyeongju, Republic of Korea},
  publisher = {International Committee on Computational Linguistics},
  url       = {https://aclanthology.org/2022.coling-1.330},
  pages     = {3753--3768},
  abstract  = {Various historical languages, which used to be lingua franca of
               science and arts, deserve the attention of current NLP research.
               In this work, we take the first data-driven steps towards this
               research line for Classical Arabic (CA) by addressing named entity
               recognition (NER) and topic modeling (TM) on the example of CA
               literature. We manually annotate the encyclopedic work of Tafsir
               Al-Tabari with span-based NEs, sentence-based topics, and span-based
               subtopics, thus creating the Tafsir Dataset with over 51,000 sentences,
               the first large-scale multi-task benchmark for CA. Next, we analyze
               our newly generated dataset, which we make open-source available,
               with current language models (lightweight BiLSTM, transformer-based
               MaChAmP) along a novel script compression method, thereby achieving
               state-of-the-art performance for our target task CA-NER. We also
               show that CA-TM from the perspective of historical topic models,
               which are central to Arabic studies, is very challenging. With
               this interdisciplinary work, we lay the foundations for future
               research on automatic analysis of CA literature.}
}

Jonathan Ginzburg and Andy Lücking. 2022. The Integrated Model of Memory: A Dialogical Perspective. Proceedings of SemDial 2022, 6–17.

BibTeX

@inproceedings{Ginzburg:Luecking:2022:a,
  title     = {The Integrated Model of Memory: {A} Dialogical Perspective},
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  booktitle = {Proceedings of SemDial 2022},
  series    = {SemDial 2022 -- DubDial},
  location  = {Dublin, Ireland},
  year      = {2022},
  editor    = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
  pages     = {6-17},
  url       = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3004/},
  pdf       = {http://semdial.org/anthology/Z22-Ginzburg_semdial_0004.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2022. How to repair a slip of the tongue?. Proceedings of SemDial 2022, 35–46.

BibTeX

@inproceedings{Luecking:Ginzburg:2022:a,
  title     = {How to repair a slip of the tongue?},
  author    = {Lücking, Andy and Ginzburg, Jonathan},
  booktitle = {Proceedings of SemDial 2022},
  series    = {SemDial 2022 -- DubDial},
  location  = {Dublin, Ireland},
  year      = {2022},
  editor    = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
  pages     = {35-46},
  url       = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3007/},
  pdf       = {http://semdial.org/anthology/Z22-Lücking_semdial_0007.pdf}
}

BibTeX

@inproceedings{Henlein:Mehler:2022,
  title     = {What do Toothbrushes do in the Kitchen? How Transformers Think
               our World is Structured},
  author    = {Henlein, Alexander and Mehler, Alexander},
  booktitle = {Proceedings of the 2022 Conference of the North American Chapter
               of the Association for Computational Linguistics: Human Language
               Technologies},
  year      = {2022},
  address   = {Seattle, United States},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2022.naacl-main.425},
  doi       = {10.18653/v1/2022.naacl-main.425},
  pages     = {5791--5807},
  abstract  = {Transformer-based models are now predominant in NLP.They outperform
               approaches based on static models in many respects. This success
               has in turn prompted research that reveals a number of biases
               in the language models generated by transformers. In this paper
               we utilize this research on biases to investigate to what extent
               transformer-based language models allow for extracting knowledge
               about object relations (X occurs in Y; X consists of Z; action
               A involves using X).To this end, we compare contextualized models
               with their static counterparts. We make this comparison dependent
               on the application of a number of similarity measures and classifiers.
               Our results are threefold:Firstly, we show that the models combined
               with the different similarity measures differ greatly in terms
               of the amount of knowledge they allow for extracting. Secondly,
               our results suggest that similarity measures perform much worse
               than classifier-based approaches. Thirdly, we show that, surprisingly,
               static models perform almost as well as contextualized models
               {--} in some cases even better.}
}

Giuseppe Abrami, Mevlüt Bagci, Leon Hammerla and Alexander Mehler. 2022. German Parliamentary Corpus (GerParCor). Proceedings of the Language Resources and Evaluation Conference, 1900–1906.

BibTeX

@inproceedings{Abrami:Bagci:Hammerla:Mehler:2022,
  author    = {Abrami, Giuseppe and Bagci, Mevlüt and Hammerla, Leon and Mehler, Alexander},
  editor    = {Calzolari, Nicoletta and B\'echet, Fr\'ed\'eric and Blache, Philippe
               and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara
               and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H\'el\`ene
               and Odijk, Jan and Piperidis, Stelios},
  title     = {German Parliamentary Corpus (GerParCor)},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  year      = {2022},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {1900--1906},
  abstract  = {Parliamentary debates represent a large and partly unexploited
               treasure trove of publicly accessible texts. In the German-speaking
               area, there is a certain deficit of uniformly accessible and annotated
               corpora covering all German-speaking parliaments at the national
               and federal level. To address this gap, we introduce the German
               Parliamentary Corpus (GerParCor). GerParCor is a genre-specific
               corpus of (predominantly historical) German-language parliamentary
               protocols from three centuries and four countries, including state
               and federal level data. In addition, GerParCor contains conversions
               of scanned protocols and, in particular, of protocols in Fraktur
               converted via an OCR process based on Tesseract. All protocols
               were preprocessed by means of the NLP pipeline of spaCy3 and automatically
               annotated with metadata regarding their session date. GerParCor
               is made available in the XMI format of the UIMA project. In this
               way, GerParCor can be used as a large corpus of historical texts
               in the field of political communication for various tasks in NLP.},
  url       = {https://aclanthology.org/2022.lrec-1.202},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2022/06/GerParCor_LREC_2022.pdf},
  keywords  = {gerparcor},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.202.pdf}
}

BibTeX

@inproceedings{Luecking:Stoeckel:Abrami:Mehler:2022,
  author    = {L{\"u}cking, Andy and Stoeckel, Manuel and Abrami, Giuseppe and Mehler, Alexander},
  title     = {I still have Time(s): Extending {HeidelTime} for {German} Texts},
  booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference},
  series    = {LREC 2022},
  location  = {Marseille, France},
  year      = {2022},
  url       = {https://aclanthology.org/2022.lrec-1.505},
  pdf       = {https://aclanthology.org/2022.lrec-1.505.pdf}
}

BibTeX

@inproceedings{Marreddy:et:al:2011,
  title     = {Clickbait detection in telugu: Overcoming nlp challenges in resource-poor
               languages using benchmarked techniques},
  author    = {Marreddy, Mounika and Oota, Subba Reddy and Vakada, Lakshmi Sireesha
               and Chinni, Venkata Charan and Mamidi, Radhika},
  booktitle = {2021 International Joint Conference on Neural Networks (IJCNN)},
  pages     = {1--8},
  year      = {2021},
  organization = {IEEE},
  doi       = {10.1109/IJCNN52387.2021.9534382},
  url       = {https://ieeexplore.ieee.org/document/9534382},
  abstract  = {Clickbait headlines have become a nudge in social media and news
               websites. The methods to identify clickbaits are largely being
               developed for En- glish. There is a need for the same in other
               languages as well with the increase in the usage of social me-
               dia platforms in different languages. In this work, we present
               an annotated clickbait dataset of 112,657 headlines that can be
               used for building an automated clickbait detection system for
               Telugu, a resource-poor language. Our contribution in this paper
               includes (i) generation of the latest pre-trained language models,
               including RoBERTa, ALBERT, and ELECTRA trained on a large Telugu
               corpora of 8,015,588 sentences that we had collected, (ii) data
               analysis and benchmarking the performance of different approaches
               ranging from hand-crafted features to state-of-the-art models.
               We show that the pre-trained language models trained on Telugu
               outperform the existing pre-trained models viz. BERT-Mulingual-Case,
               XLM-MLM, and XLM-R on clickbait task. On a large Telugu clickbait
               dataset of 112,657 samples, the Light Gradient Boosted Machines
               (LGBM) model achieves an F1- score of 0.94 for clickbait headlines.
               For Non-Clickbait headlines, F1-score of 0.93 is obtained which
               is similar to that of Clickbait class. We open-source our dataset,
               pre-trained models, and code}
}

Jonathan Ginzburg and Andy Lücking. 2021. Requesting clarifications with speech and gestures. Proceedings of the 1st Workshop on Multimodal Semantic Representations, 21–31.

BibTeX

@inproceedings{Ginzburg:Luecking:2021-clarifications,
  title     = {Requesting clarifications with speech and gestures},
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  series    = {MMSR},
  year      = {2021},
  booktitle = {Proceedings of the 1st Workshop on Multimodal Semantic Representations},
  location  = {Groningen, Netherlands (Online)},
  publisher = {Association for Computational Linguistics},
  url       = {https://aclanthology.org/2021.mmsr-1.3},
  pdf       = {https://aclanthology.org/2021.mmsr-1.3.pdf},
  pages     = {21--31},
  abstract  = {In multimodal natural language interaction both speech and non-speech
               gestures are involved in the basic mechanism of grounding and
               repair. We discuss a couple of multimodal clarification requests
               and argue that gestures, as well as speech expressions, underlie
               comparable parallelism constraints. In order to make this precise,
               we slightly extend the formal dialogue framework KoS to cover
               also gestural counterparts of verbal locutionary propositions.}
}

Alexander Mehler, Daniel Baumartz and Tolga Uslu. 2021. SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).

BibTeX

@inproceedings{Mehler:Uslu:Baumartz:2021,
  author    = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
  title     = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
  series    = {QUALICO 2021},
  location  = {Tokyo, Japan},
  year      = {2021},
  poster    = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2021. Saying and shaking `No'. Proceedings of the 28th International Conference on Head-Driven Phrase Structure Grammar, Online (Frankfurt/Main), 283–299.

BibTeX

@inproceedings{Luecking:Ginzburg:2021:a,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Saying and shaking `No'},
  booktitle = {{Proceedings of the 28th International Conference on Head-Driven
               Phrase Structure Grammar, Online (Frankfurt/Main)}},
  editor    = {M{\"u}ller, Stefan and Melnik, Nurit},
  issn      = {1535-1793},
  doi       = {10.21248/hpsg.2021.15},
  publisher = {University Library},
  address   = {Frankfurt/Main},
  pages     = {283--299},
  year      = {2021}
}

BibTeX

@inproceedings{Ginzburg:Luecking:2021:b,
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  title     = {I thought pointing is rude: {A} dialogue-semantic analysis of
               pointing at the addressee},
  booktitle = {Proceedings of \textit{Sinn und Bedeutung 25}},
  series    = {SuB 25},
  year      = {2021},
  pages     = {276-291},
  editor    = {Grosz, Patrick and Mart{\'i}, Luisa and Pearson, Hazel and Sudo, Yasutada
               and Zobel, Sarah},
  note      = {Special Session: Gestures and Natural Language
                  Semantics},
  location  = {University College London (Online)},
  url       = {https://ojs.ub.uni-konstanz.de/sub/index.php/sub/article/view/937}
}

BibTeX

@inproceedings{Fischer:et:al:2021,
  author    = {Fischer, Pascal and Smajic, Alen and Abrami, Giuseppe and Mehler, Alexander},
  title     = {Multi-Type-TD-TSR - Extracting Tables from Document Images using
               a Multi-stage Pipeline for Table Detection and Table Structure
               Recognition: from OCR to Structured Table Representations},
  booktitle = {Proceedings of the 44th German Conference on Artificial Intelligence},
  series    = {KI2021},
  location  = {Berlin, Germany},
  year      = {2021},
  url       = {https://www.springerprofessional.de/multi-type-td-tsr-extracting-tables-from-document-images-using-a/19711570},
  pdf       = {https://arxiv.org/pdf/2105.11021.pdf}
}

BibTeX

@inproceedings{Klement:et:al:2021,
  author    = {Klement, Mark and Henlein, Alexander and Mehler, Alexander},
  title     = {VoxML Annotation Tool Review and Suggestions for Improvement},
  booktitle = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation (ISA-17, Note for special track on visual
               information annotation)},
  series    = {ISA-17},
  location  = {Groningen, Netherlands},
  month     = {June},
  year      = {2021},
  pdf       = {https://sigsem.uvt.nl/isa17/32_Klement-Paper.pdf}
}

BibTeX

@inproceedings{Abrami:et:al:2021,
  author    = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila
               and Adeberg, Pascal and Mehler, Alexander},
  title     = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective
               document views for ubiquitous annotation},
  booktitle = {Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation},
  series    = {ISA-17},
  publisher = {Association for Computational Linguistics},
  address   = {Groningen, The Netherlands (online)},
  month     = {June},
  editor    = {Bunt, Harry},
  year      = {2021},
  url       = {https://aclanthology.org/2021.isa-1.7},
  pages     = {65--75},
  keywords  = {textannotator, biofid},
  pdf       = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf},
  abstract  = {We argue that mainly due to technical innovation in the landscape
               of annotation tools, a conceptual change in annotation models
               and processes is also on the horizon. It is diagnosed that these
               changes are bound up with multi-media and multi-perspective facilities
               of annotation tools, in particular when considering virtual reality
               (VR) and augmented reality (AR) applications, their potential
               ubiquitous use, and the exploitation of externally trained natural
               language pre-processing methods. Such developments potentially
               lead to a dynamic and exploratory heuristic construction of the
               annotation process. With TextAnnotator an annotation suite is
               introduced which focuses on multi-mediality and multi-perspectivity
               with an interoperable set of task-specific annotation modules
               (e.g., for word classification, rhetorical structures, dependency
               trees, semantic roles, and more) and their linkage to VR and mobile
               implementations. The basic architecture and usage of TextAnnotator
               is described and related to the above mentioned shifts in the
               field.}
}

BibTeX

@inproceedings{Ginzburg:Luecking:2020:a,
  author    = {Ginzburg, Jonathan and L{\"u}cking, Andy},
  title     = {On Laughter and Forgetting and Reconversing: {A} neurologically-inspired
               model of conversational context},
  booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
  series    = {SemDial/WatchDial},
  year      = {2020},
  location  = {Brandeis University, Waltham, New Jersey (Online)},
  url       = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3008/},
  pdf       = {http://semdial.org/anthology/Z20-Ginzburg_semdial_0008.pdf}
}

Andy Lücking and Jonathan Ginzburg. 2020. Towards the score of communication. Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue.

BibTeX

@inproceedings{Luecking:Ginzburg:2020,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Towards the score of communication},
  booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
  series    = {SemDial/WatchDial},
  year      = {2020},
  location  = {Brandeis University, Waltham, New Jersey (Online)},
  url       = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3016/},
  pdf       = {http://semdial.org/anthology/Z20-Luecking_semdial_0016.pdf}
}

Giuseppe Abrami, Alexander Mehler and Manuel Stoeckel. 2020. TextAnnotator: A web-based annotation suite for texts. Proceedings of the Digital Humanities 2020.

BibTeX

@inproceedings{Abrami:Mehler:Stoeckel:2020,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
  title     = {{TextAnnotator}: A web-based annotation suite for texts},
  booktitle = {Proceedings of the Digital Humanities 2020},
  series    = {DH 2020},
  location  = {Ottawa, Canada},
  year      = {2020},
  url       = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html},
  doi       = {http://dx.doi.org/10.17613/tenm-4907},
  abstract  = {The TextAnnotator is a tool for simultaneous and collaborative
               annotation of texts with visual annotation support, integration
               of knowledge bases and, by pipelining the TextImager, a rich variety
               of pre-processing and automatic annotation tools. It includes
               a variety of modules for the annotation of texts, which contains
               the annotation of argumentative, rhetorical, propositional and
               temporal structures as well as a module for named entity linking
               and rapid annotation of named entities. Especially the modules
               for annotation of temporal, argumentative and propositional structures
               are currently unique in web-based annotation tools. The TextAnnotator,
               which allows the annotation of texts as a platform, is divided
               into a front- and a backend component. The backend is a web service
               based on WebSockets, which integrates the UIMA Database Interface
               to manage and use texts. Texts are made accessible by using the
               ResourceManager and the AuthorityManager, based on user and group
               access permissions. Different views of a document can be created
               and used depending on the scenario. Once a document has been opened,
               access is gained to the annotations stored within annotation views
               in which these are organized. Any annotation view can be assigned
               with access permissions and by default, each user obtains his
               or her own user view for every annotated document. In addition,
               with sufficient access permissions, all annotation views can also
               be used and curated. This allows the possibility to calculate
               an Inter-Annotator-Agreement for a document, which shows an agreement
               between the annotators. Annotators without sufficient rights cannot
               display this value so that the annotators do not influence each
               other. This contribution is intended to reflect the current state
               of development of TextAnnotator, demonstrate the possibilities
               of an instantaneous Inter-Annotator-Agreement and trigger a discussion
               about further functions for the community.},
  keywords  = {textannotator, biofid},
  poster    = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf}
}

BibTeX

@inproceedings{Abrami:Stoeckel:Mehler:2020,
  author    = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
  title     = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
               Annotation of Texts},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {891--900},
  isbn      = {979-10-95546-34-4},
  abstract  = {The annotation of texts and other material in the field of digital
               humanities and Natural Language Processing (NLP) is a common task
               of research projects. At the same time, the annotation of corpora
               is certainly the most time- and cost-intensive component in research
               projects and often requires a high level of expertise according
               to the research interest. However, for the annotation of texts,
               a wide range of tools is available, both for automatic and manual
               annotation. Since the automatic pre-processing methods are not
               error-free and there is an increasing demand for the generation
               of training data, also with regard to machine learning, suitable
               annotation tools are required. This paper defines criteria of
               flexibility and efficiency of complex annotations for the assessment
               of existing annotation tools. To extend this list of tools, the
               paper describes TextAnnotator, a browser-based, multi-annotation
               system, which has been developed to perform platform-independent
               multimodal annotations and annotate complex textual structures.
               The paper illustrates the current state of development of TextAnnotator
               and demonstrates its ability to evaluate annotation quality (inter-annotator
               agreement) at runtime. In addition, it will be shown how annotations
               of different users can be performed simultaneously and collaboratively
               on the same document from different platforms using UIMA as the
               basis for annotation.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.112},
  keywords  = {textannotator, biofid},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}

BibTeX

@inproceedings{Abrami:Henlein:Kett:Mehler:2020,
  author    = {Abrami, Giuseppe and Henlein, Alexander and Kett, Attila and Mehler, Alexander},
  title     = {{Text2SceneVR}: Generating Hypertexts with VAnnotatoR as a Pre-processing
               Step for Text2Scene Systems},
  booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media},
  series    = {HT ’20},
  year      = {2020},
  location  = {Virtual Event, USA},
  isbn      = {9781450370981},
  publisher = {Association for Computing Machinery},
  address   = {New York, NY, USA},
  url       = {https://doi.org/10.1145/3372923.3404791},
  doi       = {10.1145/3372923.3404791},
  pages     = {177–186},
  numpages  = {10},
  pdf       = {https://dl.acm.org/doi/pdf/10.1145/3372923.3404791}
}

BibTeX

@inproceedings{Stoeckel:et:al:2020,
  author    = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander},
  title     = {{Voting for POS tagging of Latin texts: Using the flair of FLAIR
               to better Ensemble Classifiers by Example of Latin}},
  booktitle = {Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies
               for Historical and Ancient Languages},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association (ELRA)},
  pages     = {130--135},
  abstract  = {Despite the great importance of the Latin language in the past,
               there are relatively few resources available today to develop
               modern NLP tools for this language. Therefore, the EvaLatin Shared
               Task for Lemmatization and Part-of-Speech (POS) tagging was published
               in the LT4HALA workshop. In our work, we dealt with the second
               EvaLatin task, that is, POS tagging. Since most of the available
               Latin word embeddings were trained on either few or inaccurate
               data, we trained several embeddings on better data in the first
               step. Based on these embeddings, we trained several state-of-the-art
               taggers and used them as input for an ensemble classifier called
               LSTMVoter. We were able to achieve the best results for both the
               cross-genre and the cross-time task (90.64\% and 87.00\%) without
               using additional annotated data (closed modality). In the meantime,
               we further improved the system and achieved even better results
               (96.91\% on classical, 90.87\% on cross-genre and 87.35\% on cross-time).},
  url       = {https://www.aclweb.org/anthology/2020.lt4hala-1.21},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/LT4HALA/pdf/2020.lt4hala-1.21.pdf}
}

BibTeX

@inproceedings{Henlein:et:al:2020,
  author    = {Henlein, Alexander and Abrami, Giuseppe and Kett, Attila and Mehler, Alexander},
  title     = {Transfer of ISOSpace into a 3D Environment for Annotations and Applications},
  booktitle = {Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation},
  month     = {May},
  year      = {2020},
  address   = {Marseille},
  publisher = {European Language Resources Association},
  pages     = {32--35},
  abstract  = {People's visual perception is very pronounced and therefore it
               is usually no problem for them to describe the space around them
               in words. Conversely, people also have no problems imagining a
               concept of a described space. In recent years many efforts have
               been made to develop a linguistic concept for spatial and spatial-temporal
               relations. However, the systems have not really caught on so far,
               which in our opinion is due to the complex models on which they
               are based and the lack of available training data and automated
               taggers. In this paper we describe a project to support spatial
               annotation, which could facilitate annotation by its many functions,
               but also enrich it with many more information. This is to be achieved
               by an extension by means of a VR environment, with which spatial
               relations can be better visualized and connected with real objects.
               And we want to use the available data to develop a new state-of-the-art
               tagger and thus lay the foundation for future systems such as
               improved text understanding for Text2Scene.},
  url       = {https://www.aclweb.org/anthology/2020.isa-1.4},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/ISA16/pdf/2020.isa-1.4.pdf}
}

BibTeX

@inproceedings{Hildebrand:Hemati:Mehler:2020,
  author    = {Hildebrand, Jonathan and Hemati, Wahed and Mehler, Alexander},
  title     = {Recognizing Sentence-level Logical Document Structures with the
               Help of Context-free Grammars},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {5282--5290},
  abstract  = {Current sentence boundary detectors split documents into sequentially
               ordered sentences by detecting their beginnings and ends. Sentences,
               however, are more deeply structured even on this side of constituent
               and dependency structure: they can consist of a main sentence
               and several subordinate clauses as well as further segments (e.g.
               inserts in parentheses); they can even recursively embed whole
               sentences and then contain multiple sentence beginnings and ends.
               In this paper, we introduce a tool that segments sentences into
               tree structures to detect this type of recursive structure. To
               this end, we retrain different constituency parsers with the help
               of modified training data to transform them into sentence segmenters.
               With these segmenters, documents are mapped to sequences of sentence-related
               “logical document structures”. The resulting segmenters aim to
               improve downstream tasks by providing additional structural information.
               In this context, we experiment with German dependency parsing.
               We show that for certain sentence categories, which can be determined
               automatically, improvements in German dependency parsing can be
               achieved using our segmenter for preprocessing. The assumption
               suggests that improvements in other languages and tasks can be
               achieved.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.650},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.650.pdf}
}

BibTeX

@inproceedings{Henlein:Mehler:2020,
  author    = {Henlein, Alexander and Mehler, Alexander},
  title     = {{On the Influence of Coreference Resolution on Word Embeddings
               in Lexical-semantic Evaluation Tasks}},
  booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
  month     = {May},
  year      = {2020},
  address   = {Marseille, France},
  publisher = {European Language Resources Association},
  pages     = {27--33},
  abstract  = {Coreference resolution (CR) aims to find all spans of a text that
               refer to the same entity. The F1-Scores on these task have been
               greatly improved by new developed End2End-approaches and transformer
               networks. The inclusion of CR as a pre-processing step is expected
               to lead to improvements in downstream tasks. The paper examines
               this effect with respect to word embeddings. That is, we analyze
               the effects of CR on six different embedding methods and evaluate
               them in the context of seven lexical-semantic evaluation tasks
               and instantiation/hypernymy detection. Especially in the last
               tasks we hoped for a significant increase in performance. We show
               that all word embedding approaches do not benefit significantly
               from pronoun substitution. The measurable improvements are only
               marginal (around 0.5\% in most test cases). We explain this result
               with the loss of contextual information, reduction of the relative
               occurrence of rare words and the lack of pronouns to be replaced.},
  url       = {https://www.aclweb.org/anthology/2020.lrec-1.4},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.4.pdf}
}

BibTeX

@inproceedings{Kuehn:Abrami:Mehler:2020,
  author    = {Vincent K{\"{u}}hn and Giuseppe Abrami and Alexander Mehler},
  editor    = {Jessie Y. C. Chen and Gino Fragomeni},
  title     = {WikNectVR: {A} Gesture-Based Approach for Interacting in Virtual
               Reality Based on WikNect and Gestural Writing},
  booktitle = {Virtual, Augmented and Mixed Reality. Design and Interaction -
               12th International Conference, {VAMR} 2020, Held as Part of the
               22nd {HCI} International Conference, {HCII} 2020, Copenhagen,
               Denmark, July 19-24, 2020, Proceedings, Part {I}},
  series    = {Lecture Notes in Computer Science},
  volume    = {12190},
  pages     = {299--312},
  publisher = {Springer},
  year      = {2020},
  url       = {https://doi.org/10.1007/978-3-030-49695-1_20},
  doi       = {10.1007/978-3-030-49695-1_20},
  timestamp = {Tue, 14 Jul 2020 10:55:57 +0200},
  biburl    = {https://dblp.org/rec/conf/hci/KuhnAM20.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

BibTeX

@inproceedings{Luecking:Ginzburg:2019,
  author    = {L{\"u}cking, Andy and Ginzburg, Jonathan},
  title     = {Not few but all quantifiers can be negated: towards a referentially
               transparent semantics of quantified noun phrases},
  booktitle = {Proceedings of the Amsterdam Colloquium 2019},
  series    = {AC'19},
  location  = {University of Amsterdam},
  year      = {2019},
  pages     = {269-278},
  url       = {http://events.illc.uva.nl/AC/AC2019/},
  pdf       = {http://events.illc.uva.nl/AC/AC2019/uploaded_files/inlineitem/L_cking_and_Ginzburg_Not_few_but_all_quantifiers_ca.pdf}
}

Stefan Schweter and Sajawel Ahmed. 2019. Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection. Proceedings of the 15th Conference on Natural Language Processing (KONVENS).

BibTeX

@inproceedings{Schweter:Ahmed:2019,
  author    = {Stefan Schweter and Sajawel Ahmed},
  title     = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
  booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
  location  = {Erlangen, Germany},
  year      = {2019}
}

BibTeX

@inproceedings{Stoeckel:Hemati:Mehler:2019,
  title     = {When Specialization Helps: Using Pooled Contextualized Embeddings
               to Detect Chemical and Biomedical Entities in {S}panish},
  author    = {Stoeckel, Manuel and Hemati, Wahed and Mehler, Alexander},
  booktitle = {Proceedings of The 5th Workshop on BioNLP Open Shared Tasks},
  month     = {nov},
  year      = {2019},
  address   = {Hong Kong, China},
  publisher = {Association for Computational Linguistics},
  url       = {https://www.aclweb.org/anthology/D19-5702},
  doi       = {10.18653/v1/D19-5702},
  pages     = {11--15},
  abstract  = {The recognition of pharmacological substances, compounds and proteins
               is an essential preliminary work for the recognition of relations
               between chemicals and other biomedically relevant units. In this
               paper, we describe an approach to Task 1 of the PharmaCoNER Challenge,
               which involves the recognition of mentions of chemicals and drugs
               in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF
               sequence tagger with stacked Pooled Contextualized Embeddings,
               word and sub-word embeddings using the open-source framework FLAIR.
               We present a new corpus composed of articles and papers from Spanish
               health science journals, termed the Spanish Health Corpus, and
               use it to train domain-specific embeddings which we incorporate
               in our model training. We achieve a result of 89.76{\%} F1-score
               using pre-trained embeddings and are able to improve these results
               to 90.52{\%} F1-score using specialized embeddings.}
}

BibTeX

@inproceedings{Ahmed:Stoeckel:Driller:Pachzelt:Mehler:2019,
  author    = {Sajawel Ahmed and Manuel Stoeckel and Christine Driller and Adrian Pachzelt
               and Alexander Mehler},
  title     = {{BIOfid Dataset: Publishing a German Gold Standard for Named Entity
               Recognition in Historical Biodiversity Literature}},
  publisher = {Association for Computational Linguistics},
  year      = {2019},
  booktitle = {Proceedings of the 23rd Conference on Computational Natural Language
               Learning (CoNLL)},
  address   = {Hong Kong, China},
  url       = {https://www.aclweb.org/anthology/K19-1081},
  doi       = {10.18653/v1/K19-1081},
  pages     = {871--880},
  abstract  = {The Specialized Information Service Biodiversity Research (BIOfid)
               has been launched to mobilize valuable biological data from printed
               literature hidden in German libraries for over the past 250 years.
               In this project, we annotate German texts converted by OCR from
               historical scientific literature on the biodiversity of plants,
               birds, moths and butterflies. Our work enables the automatic extraction
               of biological information previously buried in the mass of papers
               and volumes. For this purpose, we generated training data for
               the tasks of Named Entity Recognition (NER) and Taxa Recognition
               (TR) in biological documents. We use this data to train a number
               of leading machine learning tools and create a gold standard for
               TR in biodiversity literature. More specifically, we perform a
               practical analysis of our newly generated BIOfid dataset through
               various downstream-task evaluations and establish a new state
               of the art for TR with 80.23{\%} F-score. In this sense, our paper
               lays the foundations for future work in the field of information
               extraction in biology texts.},
  keywords  = {biofid}
}

Alexander Mehler and Giuseppe Abrami. October 10–11. VAnnotatoR: A framework for the multimodal reconstruction of historical situations and spaces. Proceedings of the Time Machine Conference.

BibTeX

@inproceedings{Mehler:Abrami:2019,
  author    = {Mehler, Alexander and Abrami, Giuseppe},
  title     = {{VAnnotatoR}: A framework for the multimodal reconstruction of
               historical situations and spaces},
  booktitle = {Proceedings of the Time Machine Conference},
  year      = {2019},
  date      = {October 10-11},
  address   = {Dresden, Germany},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/09/TimeMachineConference.pdf}
}

Alex Hunziker, Hasanagha Mammadov, Wahed Hemati and Alexander Mehler. 2019. Corpus2Wiki: A MediaWiki-based Tool for Automatically Generating Wikiditions in Digital Humanities. INF-DH-2019.

BibTeX

@inproceedings{Hunziker:et:al:2019,
  author    = {Hunziker, Alex and Mammadov, Hasanagha and Hemati, Wahed and Mehler, Alexander},
  title     = {{Corpus2Wiki}: A MediaWiki-based Tool for Automatically Generating
               Wikiditions in Digital Humanities},
  booktitle = {INF-DH-2019},
  year      = {2019},
  editor    = {Burghardt, Manuel AND Müller-Birn, Claudia},
  publisher = {Gesellschaft für Informatik e.V.},
  address   = {Bonn}
}

Armin Hoenen. June, 2019. Rooting through Direction – New and Old Approaches. DHd 2019.

BibTeX

@inproceedings{Hoenen:2019dhd,
  author    = {Hoenen, Armin},
  title     = {{Rooting through Direction -- New and Old Approaches}},
  booktitle = {DHd 2019},
  url       = {https://zenodo.org/record/2596095#.XKtQb3Wg-vo},
  year      = {2019},
  month     = {jun}
}

Armin Hoenen. 2019. Interpreting and Post-Correcting the Minimum Spanning Tree. DGfS 2019.

BibTeX

@inproceedings{Hoenen:2019dgfs,
  author    = {Hoenen, Armin},
  title     = {{Interpreting and Post-Correcting the Minimum Spanning Tree}},
  booktitle = {DGfS 2019},
  url       = {http://www.dgfs2019.uni-bremen.de/abstracts/poster/Hoenen.pdf},
  year      = {2019}
}

Armin Hoenen. 2019. eLearning the URLCoFi – Digital Didactics for Humanists. AIUCD 2019.

BibTeX

@inproceedings{Hoenen:2019aiucd,
  author    = {Hoenen, Armin},
  title     = {{eLearning the URLCoFi – Digital Didactics for Humanists}},
  booktitle = {AIUCD 2019},
  url       = {http://aiucd2019.uniud.it/wp-content/uploads/2019/01/BoA-2019_PROVV.pdf},
  year      = {2019}
}

Andy Lücking, Robin Cooper, Staffan Larsson and Jonathan Ginzburg. May, 2019. Distribution is not enough – Going Firther. Proceedings of Natural Language and Computer Science.

BibTeX

@inproceedings{Luecking:Cooper:Larsson:Ginzburg:2019,
  author    = {Lücking, Andy and Cooper, Robin and Larsson, Staffan and Ginzburg, Jonathan},
  title     = {Distribution is not enough -- Going {Firther}},
  booktitle = {Proceedings of Natural Language and Computer Science},
  maintitle = {The 13th International Conference on Computational
                  Semantics (IWCS 2019)},
  series    = {NLCS 6},
  location  = {Gothenburg, Sweden},
  month     = {May},
  year      = {2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/05/Distribution_is_not_enough.pdf}
}

BibTeX

@inproceedings{Abrami:et:al:2019,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias
               and Helfrich, Philipp},
  title     = {{TextAnnotator}: A flexible framework for semantic annotations},
  booktitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
               Semantic Annotation, (ISA-15)},
  series    = {ISA-15},
  location  = {Gothenburg, Sweden},
  month     = {May},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf},
  year      = {2019},
  keywords  = {textannotator, biofid},
  abstract  = {Modern annotation tools should meet at least the following general
               requirements: they can handle diverse data and annotation levels
               within one tool, and they support the annotation process with
               automatic (pre-)processing outcomes as much as possible. We developed
               a framework that meets these general requirements and that enables
               versatile and browser-based annotations of texts, the TextAnnotator.
               It combines NLP methods of pre-processing with methods of flexible
               post-processing. Infact, machine learning (ML) requires a lot
               of training and test data, but is usually far from achieving perfect
               results. Producing high-level annotations for ML and post-correcting
               its results are therefore necessary. This is the purpose of TextAnnotator,
               which is entirely implemented in ExtJS and provides a range of
               interactive visualizations of annotations. In addition, it allows
               for flexibly integrating knowledge resources, e.g. in the course
               of post-processing named entity recognition. The paper describes
               TextAnnotator’s architecture together with three use cases: annotating
               temporal structures, argument structures and named entity linking.}
}

BibTeX

@inproceedings{Uslu:Mehler:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
  booktitle = {{Proceedings of the 20th International Conference on Computational
               Linguistics and Intelligent Text Processing, (CICLing 2019)}},
  location  = {La Rochelle, France},
  series    = {{CICLing 2019}},
  title     = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
  year      = {2019}
}

Tolga Uslu, Alexander Mehler, Clemens Schulz and Daniel Baumartz. 2019. BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).

BibTeX

@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
  author    = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
  booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
  location  = {Utrecht, Netherlands},
  series    = {{DH2019}},
  title     = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
  year      = {2019},
  url       = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}

BibTeX

@inproceedings{Abrami:Mehler:Spiekermann:2019,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian},
  title     = {{Graph-based Format for Modeling Multimodal Annotations in Virtual
               Reality by Means of VAnnotatoR}},
  booktitle = {Proceedings of the 21th International Conference on Human-Computer
               Interaction, HCII 2019},
  series    = {HCII 2019},
  location  = {Orlando, Florida, USA},
  editor    = {Stephanidis, Constantine and Antona, Margherita},
  month     = {July},
  publisher = {Springer International Publishing},
  address   = {Cham},
  pages     = {351--358},
  abstract  = {Projects in the field of Natural Language Processing (NLP), the
               Digital Humanities (DH) and related disciplines dealing with machine
               learning of complex relationships between data objects need annotations
               to obtain sufficiently rich training and test sets. The visualization
               of such data sets and their underlying Human Computer Interaction
               (HCI) are perennial problems of computer science. However, despite
               some success stories, the clarity of information presentation
               and the flexibility of the annotation process may decrease with
               the complexity of the underlying data objects and their relationships.
               In order to face this problem, the so-called VAnnotatoR was developed,
               as a flexible annotation tool using 3D glasses and augmented reality
               devices, which enables annotation and visualization in three-dimensional
               virtual environments. In addition, multimodal objects are annotated
               and visualized within a graph-based approach.},
  isbn      = {978-3-030-30712-7},
  pdf       = {https://link.springer.com/content/pdf/10.1007\%2F978-3-030-30712-7_44.pdf},
  year      = {2019}
}

BibTeX

@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
  author    = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
  title     = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
               Visualisierung thematischer Makrostrukturen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

BibTeX

@inproceedings{Abrami:Spiekermann:Mehler:2019,
  author    = {Abrami, Giuseppe and Spiekermann, Christian and Mehler, Alexander},
  title     = {{VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke
               in dreidimensionalen virtuellen Umgebungen}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_VAnnotatoR_DHd2019.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHDVAnnotatoRPoster.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

BibTeX

@inproceedings{Hemati:Mehler:Uslu:Abrami:2019,
  author    = {Hemati, Wahed and Mehler, Alexander and Uslu, Tolga and Abrami, Giuseppe},
  title     = {{Der TextImager als Front- und Backend für das verteilte NLP von
               Big Digital Humanities Data}},
  booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
               Countries, DHd 2019},
  series    = {DHd 2019},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Der-TextImager-als-Fron-und-Backend.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD19_TextImager.pdf},
  location  = {Frankfurt, Germany},
  year      = {2019}
}

BibTeX

@inproceedings{HOENEN18.285,
  author    = {Armin Hoenen},
  title     = {{Multi Modal Distance - An Approach to Stemma Generation With Weighting}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/285.pdf},
  language  = {english}
}

BibTeX

@inproceedings{HOENEN18.314,
  author    = {Armin Hoenen},
  title     = {{From Manuscripts to Archetypes through Iterative Clustering}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/314.pdf},
  language  = {english}
}

BibTeX

@inproceedings{HOENEN18.349,
  author    = {Armin Hoenen and Niko Schenk},
  title     = {{Knowing the Author by the Company His Words Keep}},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {May 7-12, 2018},
  address   = {Miyazaki, Japan},
  editor    = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
               and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
               and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
               and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
  publisher = {European Language Resources Association (ELRA)},
  isbn      = {979-10-95546-00-9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/349.pdf},
  language  = {english}
}

Armin Hoenen. 7–12. Attempts at Visualization of Etymological Information. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018).

BibTeX

@inproceedings{HOENEN18.9,
  author    = {Armin Hoenen},
  title     = {Attempts at Visualization of Etymological Information},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation (LREC 2018)},
  year      = {2018},
  month     = {may},
  date      = {7-12},
  location  = {Miyazaki, Japan},
  editor    = {Kernerman, Ilan and Krek, Simon},
  publisher = {European Language Resources Association (ELRA)},
  address   = {Paris, France},
  isbn      = {979-10-95546-28-3},
  language  = {english},
  url       = {http://lrec-conf.org/workshops/lrec2018/W33/pdf/book_of_proceedings.pdf}
}

Armin Hoenen and Lela Samushia. 2018. Principles Aiding in Reading Abbreviations in Old Georgian and Latin. DHd 2018.

BibTeX

@inproceedings{Hoenen:Samushia:2018dhd,
  author    = {Hoenen, Armin and Samushia, Lela},
  title     = {{Principles Aiding in Reading Abbreviations in Old Georgian and Latin}},
  booktitle = {DHd 2018},
  url       = {http://dhd2018.uni-koeln.de/wp-content/uploads/boa-DHd2018-web-ISBN.pdf},
  year      = {2018}
}

Armin Hoenen. 2018. Wikipedia Mention Graphs by Example. EADH 2018.

BibTeX

@inproceedings{Hoenen:2018eadh,
  author    = {Hoenen, Armin},
  title     = {{Wikipedia Mention Graphs by Example}},
  booktitle = {EADH 2018},
  url       = {https://eadh2018.exordo.com/files/papers/37/final_draft/MentionGraphsEADH.pdf},
  year      = {2018}
}

Armin Hoenen. 2018. Annotated Timelines and Stacked Area Plots for Visualization in Lexicography. Elexis workshop at EADH 2018.

BibTeX

@inproceedings{Hoenen:2018elexis,
  author    = {Hoenen, Armin},
  title     = {{Annotated Timelines and Stacked Area Plots for Visualization in Lexicography}},
  booktitle = {Elexis workshop at EADH 2018},
  url       = {https://lexdhai.insight-centre.org/Lex_DH__AI_2018_paper_2.pdf},
  year      = {2018}
}

Eleanor Rutherford, Wahed Hemati and Alexander Mehler. 2018. Corpus2Wiki: A MediaWiki based Annotation & Visualisation Tool for the Digital Humanities. INF-DH-2018.

BibTeX

@inproceedings{Rutherford:et:al:2018,
  author    = {Rutherford, Eleanor AND Hemati, Wahed AND Mehler, Alexander},
  title     = {{Corpus2Wiki}: A MediaWiki based Annotation \& Visualisation Tool
               for the Digital Humanities},
  booktitle = {INF-DH-2018},
  year      = {2018},
  editor    = {Burghardt, Manuel AND Müller-Birn, Claudia},
  publisher = {Gesellschaft für Informatik e.V.},
  address   = {Bonn}
}

BibTeX

@inproceedings{Abrami:et:al:2018,
  author    = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb},
  title     = {{TextAnnotator}: A Browser-based Framework for Annotating Textual
               Data in Digital Humanities},
  booktitle = {Proceedings of the Digital Humanities Austria 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TA__A_Browser_based_Framework_for_Annotating_Textual_Data_in_Digital_Humanities.pdf},
  location  = {Salzburg, Austria},
  year      = {2018}
}

BibTeX

@inproceedings{Ahmed:Mehler:2018,
  author    = {Sajawel Ahmed and Alexander Mehler},
  title     = {{Resource-Size matters: Improving Neural Named Entity Recognition
               with Optimized Large Corpora}},
  abstract  = {This study improves the performance of neural named entity recognition
               by a margin of up to 11\% in terms of F-score on the example of
               a low-resource language like German, thereby outperforming existing
               baselines and establishing a new state-of-the-art on each single
               open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank
               2018). Rather than designing deeper and wider hybrid neural architectures,
               we gather all available resources and perform a detailed optimization
               and grammar-dependent morphological processing consisting of lemmatization
               and part-of-speech tagging prior to exposing the raw data to any
               training process. We test our approach in a threefold monolingual
               experimental setup of a) single, b) joint, and c) optimized training
               and shed light on the dependency of downstream-tasks on the size
               of corpora used to compute word embeddings.},
  booktitle = {Proceedings of the 17th IEEE International Conference on Machine
               Learning and Applications (ICMLA)},
  location  = {Orlando, Florida, USA},
  pdf       = {https://arxiv.org/pdf/1807.10675.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Weiland:et:al:2018,
  author    = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt
               and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt
               and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
  title     = {{BioFID}, a platform to enhance accessibility of biodiversity data},
  booktitle = {Proceedings of the 10th International Conference on Ecological Informatics},
  year      = {2018},
  url       = {https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail},
  location  = {Jena, Germany}
}

BibTeX

@inproceedings{Kett:et:al:2018,
  author    = {Attila Kett and Giuseppe Abrami and Alexander Mehler and Christian Spiekermann},
  title     = {{Resources2City Explorer}: A System for Generating Interactive
               Walkable Virtual Cities out of File Systems},
  booktitle = {Proceedings of the 31st ACM User Interface Software and Technology Symposium},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/UIST2018Final.pdf},
  location  = {Berlin, Germany},
  abstract  = {We present Resources2City Explorer (R2CE), a tool for representing
               file systems as interactive, walkable virtual cities. R2CE visualizes
               file systems based on concepts of spatial, 3D information processing.
               For this purpose, it extends the range of functions of conventional
               file browsers considerably. Visual elements in a city generated
               by R2CE represent (relations of) objects of the underlying file
               system. The paper describes the functional spectrum of R2CE and
               illustrates it by visualizing a sample of 940 files.},
  year      = {2018}
}

BibTeX

@inproceedings{Luecking:Ginzburg:2018,
  title     = {`Most people but not {Bill}': integrating sets, individuals and
               negation into a cognitively plausible account of noun phrase interpretation},
  booktitle = {Proceedings of Cognitive Structures: Linguistic, Philosophical
               and Psychological Perspectives},
  series    = {CoSt'18},
  author    = {L\"{u}cking, Andy and Ginzburg, Jonathan},
  year      = {2018}
}

Tolga Uslu and Alexander Mehler. 2018. PolyViz: a Visualization System for a Special Kind of Multipartite Graphs. Proceedings of the IEEE VIS 2018.

BibTeX

@inproceedings{Uslu:Mehler:2018,
  author    = {Tolga Uslu and Alexander Mehler},
  title     = {{PolyViz}: a Visualization System for a Special Kind of Multipartite Graphs},
  booktitle = {Proceedings of the IEEE VIS 2018},
  series    = {IEEE VIS 2018},
  location  = {Berlin, Germany},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/07/polyviz-visualization-system.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Baumartz:Uslu:Mehler:2018,
  author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
  title     = {{LTV}: Labeled Topic Vector},
  booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
               on Computational Linguistics: System Demonstrations, August 20-26},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {The COLING 2018 Organizing Committee},
  abstract  = {In this paper, we present LTV, a website and an API that generate
               labeled topic classifications based on the Dewey Decimal Classification
               (DDC), an international standard for topic classification in libraries.
               We introduce nnDDC, a largely language-independent neural network-based
               classifier for DDC-related topic classification, which we optimized
               using a wide range of linguistic features to achieve an F-score
               of 87.4\%. To show that our approach is language-independent,
               we evaluate nnDDC using up to 40 different languages. We derive
               a topic model based on nnDDC, which generates probability distributions
               over semantic units for any input on sense-, word- and text-level.
               Unlike related approaches, however, these probabilities are estimated
               by means of nnDDC so that each dimension of the resulting vector
               representation is uniquely labeled by a DDC class. In this way,
               we introduce a neural network-based Classifier-Induced Semantic
               Space (nnCISS).},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}

BibTeX

@inproceedings{Mehler:Abrami:Spiekermann:Jostock:2018,
  author    = {Mehler, Alexander and Abrami, Giuseppe and Spiekermann, Christian
               and Jostock, Matthias},
  title     = {{VAnnotatoR}: {A} Framework for Generating Multimodal Hypertexts},
  booktitle = {Proceedings of the 29th ACM Conference on Hypertext and Social Media},
  series    = {Proceedings of the 29th ACM Conference on Hypertext and Social Media (HT '18)},
  year      = {2018},
  location  = {Baltimore, Maryland},
  publisher = {ACM},
  address   = {New York, NY, USA},
  pdf       = {http://delivery.acm.org/10.1145/3210000/3209572/p150-mehler.pdf}
}

BibTeX

@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
               and Giuseppe Abrami},
  title     = {Evaluating and Integrating Databases in the Area of {NLP}},
  booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
  year      = {2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
  location  = {Wroclaw, Poland}
}

BibTeX

@inproceedings{Abrami:Boden:Gleiss:2018,
  author    = {Abrami, Giuseppe and Boden, Gertrud and Glei\ss{}, Lisa},
  title     = {{World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data
               by means of a digital ontology based on OWLnotator}},
  booktitle = {Proceedings of the Digital Humanities 2018},
  series    = {DH2018},
  location  = {Mexico City, Mexico},
  year      = {2018}
}

Tolga Uslu, Alexander Mehler and Dirk Meyer. 2018. LitViz: Visualizing Literary Data by Means of text2voronoi. Proceedings of the Digital Humanities 2018.

BibTeX

@inproceedings{Uslu:Mehler:Meyer:2018,
  author    = {Tolga Uslu and Alexander Mehler and Dirk Meyer},
  title     = {{{LitViz}: Visualizing Literary Data by Means of text2voronoi}},
  booktitle = {Proceedings of the Digital Humanities 2018},
  series    = {DH2018},
  location  = {Mexico City, Mexico},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/LitViz.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Spiekerman:Abrami:Mehler:2018,
  author    = {Christian Spiekermann and Giuseppe Abrami and Alexander Mehler},
  title     = {{VAnnotatoR}: a Gesture-driven Annotation Framework for Linguistic
               and Multimodal Annotation},
  booktitle = {Proceedings of the Annotation, Recognition and Evaluation of Actions
               (AREA 2018) Workshop},
  series    = {AREA},
  location  = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/VAnnotatoR.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Uslu:et:al:2018:a,
  author    = {Tolga Uslu and Lisa Miebach and Steffen Wolfsgruber and Michael Wagner
               and Klaus Fließbach and Rüdiger Gleim and Wahed Hemati and Alexander Henlein
               and Alexander Mehler},
  title     = {{Automatic Classification in Memory Clinic Patients and in Depressive Patients}},
  booktitle = {Proceedings of Resources and ProcessIng of linguistic, para-linguistic
               and extra-linguistic Data from people with various forms of cognitive/psychiatric
               impairments (RaPID-2)},
  series    = {RaPID},
  location  = {Miyazaki, Japan},
  year      = {2018}
}

BibTeX

@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
  author    = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
  title     = {Towards a {DDC}-based Topic Network Model of Wikipedia},
  booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
               and Management of Social Networks and their Applications (SOCNET
               2018), February 28, 2018},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Uslu:et:al:2018,
  author    = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
               and Wahed Hemati},
  title     = {fastSense: An Efficient Word Sense Disambiguation Classifier},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Gleim:Mehler:Song:2018,
  author    = {R{\"u}diger Gleim and Alexander Mehler and Sung Y. Song},
  title     = {WikiDragon: A Java Framework For Diachronic Content And Network
               Analysis Of MediaWikis},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/WikiDragon.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Helfrich:et:al:2018,
  author    = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking
               and Alexander Mehler},
  title     = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations},
  booktitle = {Proceedings of the 11th edition of the Language Resources and
               Evaluation Conference, May 7 - 12},
  series    = {LREC 2018},
  address   = {Miyazaki, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf},
  year      = {2018}
}

BibTeX

@inproceedings{Abrami:Mehler:2018,
  address   = {Miyazaki, Japan},
  author    = {Abrami, Giuseppe and Mehler, Alexander},
  booktitle = {Proceedings of the Eleventh International Conference on Language
               Resources and Evaluation ({LREC} 2018)},
  editor    = {Calzolari, Nicoletta and Choukri, Khalid and Cieri, Christopher
               and Declerck, Thierry and Goggi, Sara and Hasida, Koiti and Isahara, Hitoshi
               and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion
               and Odijk, Jan and Piperidis, Stelios and Tokunaga, Takenobu},
  month     = {may},
  series    = {LREC 2018},
  keywords  = {UIMA},
  pdf       = {https://aclanthology.org/L18-1212.pdf},
  publisher = {European Language Resources Association (ELRA)},
  title     = {A {UIMA} Database Interface for Managing {NLP}-related Text Annotations},
  url       = {https://aclanthology.org/L18-1212},
  year      = {2018}
}

BibTeX

@inproceedings{Hemati:Mehler:Uslu:2017,
  author    = {Wahed Hemati and Alexander Mehler and Tolga Uslu},
  title     = {{CRFVoter}: Chemical Entity Mention, Gene and Protein Related
               Object recognition using a conglomerate of CRF based tools},
  booktitle = {BioCreative V.5. Proceedings},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/CRFVoter.pdf},
  year      = {2017}
}

Wahed Hemati, Tolga Uslu and Alexander Mehler. 2017. TextImager as an interface to BeCalm. BioCreative V.5. Proceedings.

BibTeX

@inproceedings{Hemati:Uslu:Mehler:2017,
  author    = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
  title     = {{TextImager} as an interface to {BeCalm}},
  booktitle = {BioCreative V.5. Proceedings},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager_BeCalm.pdf},
  year      = {2017}
}

BibTeX

@inproceedings{Mehler:et:al:2017:a,
  author    = {Alexander Mehler and Giuseppe Abrami and Steffen Bruendel and Lisa Felder
               and Thomas Ostertag and Christian Spiekermann},
  title     = {{Stolperwege:} An App for a Digital Public History of the {Holocaust}},
  booktitle = {Proceedings of the 28th ACM Conference on Hypertext and Social Media},
  series    = {HT '17},
  pages     = {319--320},
  address   = {New York, NY, USA},
  publisher = {ACM},
  abstract  = {We present the Stolperwege app, a web-based framework for ubiquitous
               modeling of historical processes. Starting from the art project
               Stolpersteine of Gunter Demnig, it allows for virtually connecting
               these stumbling blocks with information about the biographies
               of victims of Nazism. According to the practice of public history,
               the aim of Stolperwege is to deepen public knowledge of the Holocaust
               in the context of our everyday environment. Stolperwege uses an
               information model that allows for modeling social networks of
               agents starting from information about portions of their life.
               The paper exemplifies how Stolperwege is informationally enriched
               by means of historical maps and 3D animations of (historical)
               buildings.},
  acmid     = {3078748},
  doi       = {10.1145/3078714.3078748},
  isbn      = {978-1-4503-4708-2},
  keywords  = {3d, geocaching, geotagging, historical maps,
                   historical processes, public history of the holocaust,
                   ubiquitous computing},
  location  = {Prague, Czech Republic},
  numpages  = {2},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2017/07/poster_ht2017.pdf},
  url       = {http://doi.acm.org/10.1145/3078714.3078748},
  year      = {2017}
}

BibTeX

@inproceedings{Mehler:Gleim:Hemati:Uslu:2017,
  author    = {Alexander Mehler and Rüdiger Gleim and Wahed Hemati and Tolga Uslu},
  title     = {{Skalenfreie online soziale Lexika am Beispiel von Wiktionary}},
  booktitle = {Proceedings of 53rd Annual Conference of the Institut für Deutsche
               Sprache (IDS), March 14-16, Mannheim, Germany},
  editor    = {Stefan Engelberg and Henning Lobin and Kathrin Steyer and Sascha Wolfer},
  address   = {Berlin},
  publisher = {De Gruyter},
  note      = {In German. Title translates into: Scale-free
                   online-social Lexika by Example of Wiktionary},
  abstract  = {In English: The paper deals with characteristics of the structural,
               thematic and participatory dynamics of collaboratively generated
               lexical networks. This is done by example of Wiktionary. Starting
               from a network-theoretical model in terms of so-called multi-layer
               networks, we describe Wiktionary as a scale-free lexicon. Systems
               of this sort are characterized by the fact that their content-related
               dynamics is determined by the underlying dynamics of collaborating
               authors. This happens in a way that social structure imprints
               on content structure. According to this conception, the unequal
               distribution of the activities of authors results in a correspondingly
               unequal distribution of the information units documented within
               the lexicon. The paper focuses on foundations for describing such
               systems starting from a parameter space which requires to deal
               with Wiktionary as an issue in big data analysis. In German: Der
               Beitrag thematisiert Eigenschaften der strukturellen, thematischen
               und partizipativen Dynamik kollaborativ erzeugter lexikalischer
               Netzwerke am Beispiel von Wiktionary. Ausgehend von einem netzwerktheoretischen
               Modell in Form so genannter Mehrebenennetzwerke wird Wiktionary
               als ein skalenfreies Lexikon beschrieben. Systeme dieser Art zeichnen
               sich dadurch aus, dass ihre inhaltliche Dynamik durch die zugrundeliegende
               Kollaborationsdynamik bestimmt wird, und zwar so, dass sich die
               soziale Struktur der entsprechenden inhaltlichen Struktur aufprägt.
               Dieser Auffassung gemäß führt die Ungleichverteilung der Aktivitäten
               von Lexikonproduzenten zu einer analogen Ungleichverteilung der
               im Lexikon dokumentierten Informationseinheiten. Der Beitrag thematisiert
               Grundlagen zur Beschreibung solcher Systeme ausgehend von einem
               Parameterraum, welcher die netzwerkanalytische Betrachtung von
               Wiktionary als Big-Data-Problem darstellt.},
  year      = {2017}
}

Armin Hoenen, Steffen Eger and Ralf Gehrke. 2017. How Many Stemmata with Root Degree k?. Proceedings of the 15th Meeting on the Mathematics of Language, 11–21.

BibTeX

@inproceedings{Hoenen:Eger:Gehrke:2017,
  author    = {Hoenen, Armin and Eger, Steffen and Gehrke, Ralf},
  title     = {{How Many Stemmata with Root Degree k?}},
  booktitle = {Proceedings of the 15th Meeting on the Mathematics of Language},
  pages     = {11--21},
  publisher = {Association for Computational Linguistics},
  location  = {London, UK},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/How_Many_Stemmata_with_Root_Degree_k.pdf},
  url       = {http://aclweb.org/anthology/W17-3402},
  year      = {2017}
}

BibTeX

@inproceedings{Hoenen:2017:b,
  author    = {Hoenen, Armin},
  title     = {{Using Word Embeddings for Computing Distances Between Texts and
               for Authorship Attribution}},
  booktitle = {International Conference on Applications of Natural Language to
               Information Systems},
  pages     = {274--277},
  organization = {Springer},
  url       = {https://link.springer.com/chapter/10.1007/978-3-319-59569-6_33},
  year      = {2017}
}

BibTeX

@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
  author    = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
  title     = {{TextImager} as a Generic Interface to {R}},
  booktitle = {Software Demonstrations of the 15th Conference of the European
               Chapter of the Association for Computational Linguistics (EACL
               2017)},
  location  = {Valencia, Spain},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
  year      = {2017}
}

Andy Lücking. 2017. Indexicals as Weak Descriptors. Proceedings of the 12th International Conference on Computational Semantics.

BibTeX

@inproceedings{Luecking:2017:c,
  author    = {L\"{u}cking, Andy},
  title     = {Indexicals as Weak Descriptors},
  booktitle = {Proceedings of the 12th International Conference on Computational Semantics},
  series    = {IWCS 2017},
  address   = {Montpellier (France)},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/descriptive-indexicals_rev.pdf},
  year      = {2017}
}

Steffen Eger, Armin Hoenen and Alexander Mehler. 2016. Language classification from bilingual word embedding graphs. Proceedings of COLING 2016.

BibTeX

@inproceedings{Eger:Hoenen:Mehler:2016,
  author    = {Steffen Eger and Armin Hoenen and Alexander Mehler},
  title     = {Language classification from bilingual word embedding graphs},
  booktitle = {Proceedings of COLING 2016},
  publisher = {ACL},
  location  = {Osaka},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/eger_hoenen_mehler_COLING2016.pdf},
  year      = {2016}
}

Wahed Hemati, Tolga Uslu and Alexander Mehler. 2016. TextImager: a Distributed UIMA-based System for NLP. Proceedings of the COLING 2016 System Demonstrations.

BibTeX

@inproceedings{Hemati:Uslu:Mehler:2016,
  author    = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
  title     = {TextImager: a Distributed UIMA-based System for NLP},
  booktitle = {Proceedings of the COLING 2016 System Demonstrations},
  organization = {Federated Conference on Computer Science and
                   Information Systems},
  location  = {Osaka, Japan},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager2016.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Luecking:2016:b,
  author    = {L\"{u}cking, Andy},
  title     = {Modeling Co-Verbal Gesture Perception in Type Theory with Records},
  booktitle = {Proceedings of the 2016 Federated Conference on Computer Science
               and Information Systems},
  editor    = {M. Ganzha and L. Maciaszek and M. Paprzycki},
  volume    = {8},
  series    = {Annals of Computer Science and Information Systems},
  pages     = {383-392},
  address   = {Gdansk, Poland},
  publisher = {IEEE},
  note      = {Best Paper Award},
  doi       = {10.15439/2016F83},
  pdf       = {http://annals-csis.org/Volume_8/pliks/83.pdf},
  url       = {http://annals-csis.org/Volume_8/drp/83.html},
  year      = {2016}
}

BibTeX

@inproceedings{Mehler:Uslu:Hemati:2016,
  author    = {Alexander Mehler and Tolga Uslu and Wahed Hemati},
  title     = {Text2voronoi: An Image-driven Approach to Differential Diagnosis},
  booktitle = {Proceedings of the 5th Workshop on Vision and Language (VL'16)
               hosted by the 54th Annual Meeting of the Association for Computational
               Linguistics (ACL), Berlin},
  pdf       = {https://aclweb.org/anthology/W/W16/W16-3212.pdf},
  year      = {2016}
}

Steffen Eger and Alexander Mehler. 2016. On the linearity of semantic change: Investigating meaning variation via dynamic graph models. Proceedings of ACL 2016.

BibTeX

@inproceedings{Eger:Mehler:2016,
  author    = {Steffen Eger and Alexander Mehler},
  title     = {On the linearity of semantic change: {I}nvestigating meaning variation
               via dynamic graph models},
  booktitle = {Proceedings of ACL 2016},
  location  = {Berlin},
  pdf       = {https://www.aclweb.org/anthology/P/P16/P16-2009.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Hoenen:2016DH,
  author    = {Hoenen, Armin},
  title     = {{Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity
               in Stemmas}},
  booktitle = {Digital Humanities 2016: Conference Abstracts. Jagiellonian University
               \& Pedagogical University},
  series    = {DH 2016},
  pages     = {557-560},
  abstract  = {In 1928, the philologue Joseph Bédier explored contemporary stemmas
               and found them to contain a suspiciously large amount of bifurcations.
               In this paper, the argument is investigated that, with a large
               amount of lost manuscripts, the amount of bifurcations in the
               true stemmas would naturally be high because the probability for
               siblings to survive becomes very low is assessed via a computer
               simulation.},
  location  = {Kraków},
  url       = {http://dh2016.adho.org/abstracts/311},
  year      = {2016}
}

Alexander Mehler, Benno Wagner and Rüdiger Gleim. 2016. Wikidition: Towards A Multi-layer Network Model of Intertextuality. Proceedings of DH 2016, 12-16 July.

BibTeX

@inproceedings{Mehler:Wagner:Gleim:2016,
  author    = {Mehler, Alexander and Wagner, Benno and Gleim, R\"{u}diger},
  title     = {Wikidition: Towards A Multi-layer Network Model of Intertextuality},
  booktitle = {Proceedings of DH 2016, 12-16 July},
  series    = {DH 2016},
  abstract  = {The paper presents Wikidition, a novel text mining tool for generating
               online editions of text corpora. It explores lexical, sentential
               and textual relations to span multi-layer networks (linkification)
               that allow for browsing syntagmatic and paradigmatic relations
               among the constituents of its input texts. In this way, relations
               of text reuse can be explored together with lexical relations
               within the same literary memory information system. Beyond that,
               Wikidition contains a module for automatic lexiconisation to extract
               author specific vocabularies. Based on linkification and lexiconisation,
               Wikidition does not only allow for traversing input corpora on
               different (lexical, sentential and textual) levels. Rather, its
               readers can also study the vocabulary of authors on several levels
               of resolution including superlemmas, lemmas, syntactic words and
               wordforms. We exemplify Wikidition by a range of literary texts
               and evaluate it by means of the apparatus of quantitative network
               analysis.},
  location  = {Kraków},
  url       = {http://dh2016.adho.org/abstracts/250},
  year      = {2016}
}

BibTeX

@inproceedings{vorderBrueck:Mehler:2016,
  author    = {vor der Br\"{u}ck, Tim and Mehler, Alexander},
  title     = {{TLT-CRF}: A Lexicon-supported Morphological Tagger for {Latin}
               Based on Conditional Random Fields},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {{Portoro\v{z} (Slovenia)}},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016_tagger.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Eger:Mehler:Gleim:2016,
  author    = {Eger, Steffen and Gleim, R\"{u}diger and Mehler, Alexander},
  title     = {Lemmatization and Morphological Tagging in {German} and {Latin}:
               A comparison and a survey of the state-of-the-art},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec_eger_gleim_mehler.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Luecking:Mehler:Walther:Mauri:Kurfuerst:2016,
  author    = {L\"{u}cking, Andy and Mehler, Alexander and Walther, D\'{e}sir\'{e}e
               and Mauri, Marcel and Kurf\"{u}rst, Dennis},
  title     = {Finding Recurrent Features of Image Schema Gestures: the {FIGURE} corpus},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-gesture-study-final-version-short.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Luecking:Hoenen:Mehler:2016,
  author    = {L\"{u}cking, Andy and Hoenen, Armin and Mehler, Alexander},
  title     = {{TGermaCorp} -- A (Digital) Humanities Resource for (Computational) Linguistics},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  islrn     = {536-382-801-278-5},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-ttgermacorp-final.pdf},
  year      = {2016}
}

BibTeX

@inproceedings{Wagner:Mehler:Biber:2016,
  author    = {Wagner, Benno and Mehler, Alexander and Biber, Hanno},
  title     = {{Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische
               Erschließung und digitale Visualisierung intertextueller Beziehungen
               digitaler Korpora}},
  booktitle = {DHd 2016},
  url       = {http://www.dhd2016.de/abstracts/sektionen-005.html#index.xml-body.1_div.4},
  year      = {2016}
}

Armin Hoenen. 2016. Wikipedia Titles As Noun Tag Predictors. Proceedings of the 10th International Conference on Language Resources and Evaluation.

BibTeX

@inproceedings{Hoenen:2016x,
  author    = {Hoenen, Armin},
  title     = {{Wikipedia Titles As Noun Tag Predictors}},
  booktitle = {Proceedings of the 10th International Conference on Language Resources
               and Evaluation},
  series    = {LREC 2016},
  location  = {Portoro\v{z} (Slovenia)},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/18_Paper.pdf},
  year      = {2016}
}

Armin Hoenen. 2016. Das erste dynamische Stemma, Pionier des digitalen Zeitalters?. Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Hoenen:2016y,
  author    = {Hoenen, Armin},
  title     = {Das erste dynamische Stemma, Pionier des digitalen Zeitalters?},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  url       = {http://www.dhd2016.de/abstracts/posters-060.html},
  year      = {2016}
}

Armin Hoenen and Franziska Mader. 2015. A New LMF Schema Application by Example of an Austrian Lexicon Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal. Historical Corpora.

BibTeX

@inproceedings{Hoenen:Mader:2015,
  author    = {Hoenen, Armin and Mader, Franziska},
  title     = {A New LMF Schema Application by Example of an Austrian Lexicon
               Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal},
  booktitle = {Historical Corpora},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/HoenenMader2013-a-new-lmf-schema-application.pdf},
  website   = {http://www.narr-shop.de/historical-corpora.html},
  year      = {2015}
}

Tim vor der Brück, Steffen Eger and Alexander Mehler. 2015. Complex Decomposition of the Negative Distance Kernel. IEEE International Conference on Machine Learning and Applications.

BibTeX

@inproceedings{vor:der:Bruck:Eger:Mehler:2015,
  author    = {vor der Br{\"u}ck, Tim and Eger, Steffen and Mehler, Alexander},
  title     = {Complex Decomposition of the Negative Distance Kernel},
  booktitle = {IEEE International Conference on Machine Learning and Applications},
  location  = {Miami, Florida, USA},
  year      = {2015}
}

Steffen Eger. 2015. Do we need bigram alignment models? On the effect of alignment quality on transduction accuracy in G2P. Proceedings of EMNLP.

BibTeX

@inproceedings{Eger:2015_EMNLP,
  author    = {Eger, Steffen},
  title     = {Do we need bigram alignment models? On the effect of alignment
               quality on transduction accuracy in G2P},
  booktitle = {Proceedings of EMNLP},
  year      = {2015},
  pdf       = {https://www.aclweb.org/anthology/D15-1139}
}

Tim vor der Brück and Steffen Eger. 2015. Deriving a primal form for the quadratic power kernel. Proceedings of the 38th German Conference on Artificial Intelligence (KI).

BibTeX

@inproceedings{vorDerBrueck:Eger:2015,
  author    = {vor der Brück, Tim and Eger, Steffen},
  title     = {Deriving a primal form for the quadratic power kernel},
  booktitle = {Proceedings of the 38th German Conference on Artificial Intelligence ({KI})},
  year      = {2015}
}

Steffen Eger. 2015. Improving G2P from Wiktionary and other (web) resources. Proceedings of Interspeech.

BibTeX

@inproceedings{Eger:2015_Interspeech,
  author    = {Eger, Steffen},
  title     = {Improving G2P from Wiktionary and other (web) resources},
  booktitle = {Proceedings of Interspeech},
  pdf       = {https://pdfs.semanticscholar.org/bba8/30015d9cbfc40b975c25d0ec186280da6ab0.pdf},
  year      = {2015}
}

BibTeX

@inproceedings{Eger:vor:der:Brueck:Mehler:2015,
  author    = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
  title     = {Lexicon-assisted tagging and lemmatization in {Latin}: A comparison
               of six taggers and two lemmatization methods},
  booktitle = {Proceedings of the 9th Workshop on Language Technology for Cultural
               Heritage, Social Sciences, and Humanities ({LaTeCH 2015})},
  address   = {Beijing, China},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Lexicon-assisted_tagging.pdf},
  year      = {2015}
}

Steffen Eger. 2015. Multiple Many-To-Many Sequence Alignment For Combining String-Valued Variables: A G2P Experiment. ACL.

BibTeX

@inproceedings{Eger:2015_ACL,
  author    = {Eger, Steffen},
  title     = {Multiple Many-To-Many Sequence Alignment For Combining String-Valued
               Variables: A G2P Experiment},
  booktitle = {ACL},
  publisher = {Association for Computational Linguistics},
  year      = {2015}
}

Steffen Eger. 2015. Designing and comparing G2P-type lemmatizers for a morphology-rich language. .

BibTeX

@inproceedings{Eger:2015_SFCM,
  author    = {Eger, Steffen},
  title     = {Designing and comparing G2P-type lemmatizers for a morphology-rich language},
  publisher = {Fourth International Workshop on Systems and
                   Frameworks for Computational Morphology},
  year      = {2015}
}

BibTeX

@inproceedings{Eger:Schenk:Mehler:2015,
  author    = {Eger, Steffen and Schenk, Niko and Mehler, Alexander},
  title     = {Towards Semantic Language Classification: Inducing and Clustering
               Semantic Association Networks from Europarl},
  booktitle = {Proceedings of the Fourth Joint Conference on Lexical and Computational
               Semantics},
  pages     = {127--136},
  publisher = {Association for Computational Linguistics},
  month     = {June},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/starsem2015-corrected-version.pdf},
  url       = {http://www.aclweb.org/anthology/S15-1014},
  year      = {2015}
}

Armin Hoenen. 2015. Das artifizielle Manuskriptkorpus TASCFE. Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.

BibTeX

@inproceedings{Hoenen:2015,
  author    = {Hoenen, Armin},
  title     = {Das artifizielle Manuskriptkorpus TASCFE},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Hoenen_tascfeDH2015.pdf},
  year      = {2015}
}

BibTeX

@inproceedings{Gleim:Mehler:2015,
  author    = {Gleim, Rüdiger and Mehler, Alexander},
  title     = {TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung
               von Texten und deren Evaluation},
  booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
               im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Gleim_Mehler_PrePro_DHGraz2015.pdf},
  year      = {2015}
}

BibTeX

@inproceedings{Abrami:Mehler:Zeunert:2015:a,
  author    = {Abrami, Giuseppe and Mehler, Alexander and Zeunert, Susanne},
  title     = {Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator},
  booktitle = {Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Abrami_Mehler_Zeunert_DHd_2015_abstract.pdf},
  year      = {2015}
}

BibTeX

@inproceedings{Hoenen:2015a,
  author    = {Hoenen, Armin},
  title     = {Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora},
  booktitle = {Proceedings of the 2015 Conference of the North American Chapter
               of the Association for Computational Linguistics: Human Language
               Technologies (NAACL HLT)},
  note      = {Citation: Trovato is published in 2014 not in 2009.},
  abstract  = {Two goals are targeted by computer philology for ancient manuscript
               corpora: firstly, making an edition, that is roughly speaking
               one text version representing the whole corpus, which contains
               variety induced through copy errors and other processes and secondly,
               producing a stemma. A stemma is a graph-based visualization of
               the copy history with manuscripts as nodes and copy events as
               edges. Its root, the so-called archetype is the supposed original
               text or urtext from which all subsequent copies are made. Our
               main contribution is to present one of the first computational
               approaches to automatic archetype reconstruction and to introduce
               the first text-based evaluation for automatically produced archetypes.
               We compare a philologically generated archetype with one generated
               by bio-informatic software.},
  website   = {http://www.aclweb.org/anthology/N15-1127},
  year      = {2015}
}

Armin Hoenen. 2015. Simulating Misreading. Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB).

BibTeX

@inproceedings{Hoenen:2015b,
  author    = {Hoenen, Armin},
  title     = {Simulating Misreading},
  booktitle = {Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS
               OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB)},
  abstract  = {Physical misreading (as opposed to interpretational misreading)
               is an unnoticed substitution in silent reading. Especially for
               legally important documents or instruction manuals, this can lead
               to serious consequences. We present a prototype of an automatic
               highlighter targeting words which can most easily be misread in
               a given text using a dynamic orthographic neighbour concept. We
               propose measures of fit of a misread token based on Natural Language
               Processing and detect a list of short most easily misread tokens
               in the English language. We design a highlighting scheme for avoidance
               of misreading.},
  website   = {http://link.springer.com/chapter/10.1007/978-3-319-19581-0_34},
  year      = {2015}
}

BibTeX

@inproceedings{Abrami:Freiberg:Warner:2015,
  author    = {Abrami, Giuseppe and Freiberg, Michael and Warner, Paul},
  title     = {Managing and Annotating Historical Multimodal Corpora with the
               eHumanities Desktop - An outline of the current state of the LOEWE
               project Illustrations of Goethe s Faust},
  booktitle = {Historical Corpora},
  pages     = {353 - 363},
  abstract  = {Text corpora are structured sets of text segments that can be
               annotated or interrelated. Expanding on this, we can define a
               database of images as an iconographic multimodal corpus with annotated
               images and the relations between images as well as between images
               and texts. The Goethe-Museum in Frankfurt holds a significant
               collection of art work and texts relating to Goethe’s Faust from
               the early 19th century until the present. In this project we create
               a database containing digitized items from this collection, and
               extend a tool, the ImageDB in the eHumanities Desktop, to annotate
               and provide relations between resources. This article gives an
               overview of the project and provides some technical details. Furthermore
               we show newly implemented features, explain the challenge of creating
               an ontology on multimodal corpora and give a forecast for future
               work.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/AbramiFreibergWarner_HC_2012.pdf},
  website   = {http://www.narr-shop.de/historical-corpora.html},
  year      = {2015}
}

BibTeX

@inproceedings{Islam:Rahman:Mehler:2014,
  author    = {Islam, Md. Zahurul and Rahman, Md. Rashedur and Mehler, Alexander},
  title     = {Readability Classification of Bangla Texts},
  booktitle = {15th International Conference on Intelligent Text Processing and
               Computational Linguistics (cicLing), Kathmandu, Nepal},
  abstract  = {Readability classification is an important application of Natural
               Language Processing. It aims at judging the quality of documents
               and to assist writers to identify possible problems. This paper
               presents a readability classifier for Bangla textbooks using information-theoretic
               and lexical features. All together 18 features are explored to
               achieve an F-score of 86.46},
  year      = {2014}
}

Tim vor der Brück, Alexander Mehler and Md. Zahurul Islam. 2014. ColLex.EN: Automatically Generating and Evaluating a Full-form Lexicon for English. Proceedings of LREC 2014.

BibTeX

@inproceedings{vor:der:Brueck:Mehler:Islam:2014,
  author    = {vor der Brück, Tim and Mehler, Alexander and Islam, Md. Zahurul},
  title     = {ColLex.EN: Automatically Generating and Evaluating a Full-form
               Lexicon for English},
  booktitle = {Proceedings of LREC 2014},
  address   = {Reykjavik, Iceland},
  abstract  = {Currently, a large number of different lexica is available for
               English. However, substantial and freely available fullform lexica
               with a high number of named entities are rather rare even in the
               case of this lingua franca. Existing lexica are often limited
               in several respects as explained in Section 2. What is missing
               so far is a freely available substantial machine-readable lexical
               resource of English that contains a high number of word forms
               and a large collection of named entities. In this paper, we describe
               a procedure to generate such a resource by example of English.
               This lexicon, henceforth called ColLex.EN (for Collecting Lexica
               for English ), will be made freely available to the public 1.
               In this paper, we describe how ColLex.EN was collected from existing
               lexical resources and specify the statistical procedures that
               we developed to extend and adjust it. No manual modifications
               were done on the generated word forms and lemmas. Our fully automatic
               procedure has the advantage that whenever new versions of the
               source lexica are available, a new version of ColLex.EN can be
               automatically generated with low effort.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/vdbrueck_mehler_islam_collex_lrec.pdf},
  website   = {
                   http://aclanthology.info/papers/collex-en-automatically-generating-and-evaluating-a-full-form-lexicon-for-english},
  year      = {2014}
}

Armin Hoenen. 2014. Simulation of Scribal Letter Substitution. Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches.

BibTeX

@inproceedings{Hoenen:2014,
  author    = {Hoenen, Armin},
  title     = {Simulation of Scribal Letter Substitution},
  booktitle = {Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches},
  editor    = {T.L Andrews and C.Macé},
  owner     = {hoenen},
  website   = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503552682-1},
  year      = {2014}
}

BibTeX

@inproceedings{Luecking:2013:e,
  author    = {Lücking, Andy},
  title     = {Multimodal Propositions? From Semiotic to Semantic Considerations
               in the Case of Gestural Deictics},
  booktitle = {Poster Abstracts of the Proceedings of the 17th Workshop on the
               Semantics and Pragmatics of Dialogue},
  editor    = {Fernandez, Raquel and Isard, Amy},
  series    = {SemDial 2013},
  pages     = {221-223},
  address   = {Amsterdam},
  month     = {12},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dialdam2013.pdf},
  year      = {2013}
}

BibTeX

@inproceedings{Islam:Hoenen:2013,
  author    = {Islam, Md. Zahurul and Hoenen, Armin},
  title     = {Source and Translation Classifiction using Most Frequent Words},
  booktitle = {Proceedings of the 6th International Joint Conference on Natural
               Language Processing (IJCNLP)},
  abstract  = {Recently, translation scholars have made some general claims about
               translation properties. Some of these are source language independent
               while others are not. Koppel and Ordan (2011) performed empirical
               studies to validate both types of properties using English source
               texts and other texts translated into English. Obviously, corpora
               of this sort, which focus on a single language, are not adequate
               for claiming universality of translation prop- erties. In this
               paper, we are validating both types of translation properties
               using original and translated texts from six European languages.},
  pdf       = {http://www.aclweb.org/anthology/I/I13/I13-1185.pdf},
  website   = {http://aclanthology.info/papers/source-and-translation-classification-using-most-frequent-words},
  year      = {2013}
}

Andy Lücking. 2013. Interfacing Speech and Co-Verbal Gesture: Exemplification. Proceedings of the 35th Annual Conference of the German Linguistic Society, 284–286.

BibTeX

@inproceedings{Luecking:2013:b,
  author    = {Lücking, Andy},
  title     = {Interfacing Speech and Co-Verbal Gesture: Exemplification},
  booktitle = {Proceedings of the 35th Annual Conference of the German Linguistic Society},
  series    = {DGfS 2013},
  pages     = {284-286},
  address   = {Potsdam, Germany},
  year      = {2013}
}

BibTeX

@inproceedings{Islam:Mehler:2013:a,
  author    = {Islam, Md. Zahurul and Mehler, Alexander},
  title     = {Automatic Readability Classification of Crowd-Sourced Data based
               on Linguistic and Information-Theoretic Features},
  booktitle = {14th International Conference on Intelligent Text Processing and
               Computational Linguistics},
  abstract  = {This paper presents a classifier of text readability based on
               information-theoretic features. The classifier was developed based
               on a linguistic approach to readability that explores lexical,
               syntactic and semantic features. For this evaluation we extracted
               a corpus of 645 articles from Wikipedia together with their quality
               judgments. We show that information-theoretic features perform
               as well as their linguistic counterparts even if we explore several
               linguistic levels at once.},
  owner     = {zahurul},
  pdf       = {http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/download/1516/1497},
  timestamp = {2013.01.22},
  website   = {http://www.redalyc.org/articulo.oa?id=61527437002},
  year      = {2013}
}

Md. Zahurul Islam and Rashedur Rahman. 2013. English to Bangla Name Transliteration System (Abstract). The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013).

BibTeX

@inproceedings{Islam:Rahman:2013,
  author    = {Islam, Md. Zahurul and Rahman, Rashedur},
  title     = {English to Bangla Name Transliteration System (Abstract)},
  booktitle = {The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013)},
  abstract  = {Machine translation systems always struggle transliterating names
               and unknown words during the translation process. It becomes more
               problematic when the source and the target language use different
               scripts for writing. To handle this problem, transliteration systems
               are becoming popular as additional modules of the MT systems.
               In this abstract, we are presenting an English to Bangla name
               transliteration system that outperforms Google’s transliteration
               system. The transliteration system is the same as the phrase based
               statistical machine translation system, but it works on character
               level rather than on phrase level. The performance of a statistical
               system is directly correlated with the size of the training corpus.
               In this work, 2200 names are extracted from the Wikipedia cross
               lingual links and from Geonames . Also 3694 names are manually
               transliterated and added to the data. 4716 names are used for
               training, 590 for tuning and 588 names are used for testing. If
               we consider only the candidate transliterations, the system gives
               64.28\% accuracy. The performance increases to more than 90\%,
               if we consider only the top 5 transliterations. To compare with
               the Google’s English to Bangla transliteration system, a list
               of 100 names are randomly selected from the test data and translated
               by both systems. Our system gives 63\% accuracy where the Google’s
               transliteration system does not transliterate a single name correctly.
               We have found significant improvement in terms of BLUE and TER
               score when we add the transliteration module with an English to
               Bangla machine transliteration system.},
  owner     = {zahurul},
  timestamp = {2013.01.22},
  website   = {https://www.academia.edu/3955036/English_to_Bangla_Name_Transliteration_System},
  year      = {2013}
}

BibTeX

@inproceedings{Beckage:et:al:2013,
  author    = {Nicole Beckage and Michael S. Vitevitch and Alexander Mehler and Eliana Colunga},
  title     = {Using Complex Network Analysis in the Cognitive Sciences},
  booktitle = {Proceedings of the 35th Annual Meeting of the Cognitive Science
               Society, CogSci 2013, Berlin, Germany, July 31 - August 3, 2013},
  editor    = {Markus Knauff and Michael Pauen and Natalie Sebanz and Ipke Wachsmuth},
  publisher = {cognitivesciencesociety.org},
  year      = {2013}
}

Steffen Eger. 2012. S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications. Proceedings of COLING 2012, 781–798.

BibTeX

@inproceedings{Eger:2012:b,
  author    = {Eger, Steffen},
  title     = {S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications},
  booktitle = {Proceedings of COLING 2012},
  pages     = {781-798},
  address   = {Mumbai, India},
  publisher = {The COLING 2012 Organizing Committee},
  abstract  = {We present a simple and straightforward alignment algorithm for
               monotone many-to-many alignments in grapheme-to-phoneme conversion
               and related fields such as morphology, and discuss a few noteworthy
               extensions. Moreover, we specify combinatorial formulas for monotone
               many-to-many alignments and decoding in G2P which indicate that
               exhaustive enumeration is generally possible, so that some limitations
               of our approach can easily be overcome. Finally, we present a
               decoding scheme, within the monotone many-to-many alignment paradigm,
               that relates the decoding problem to restricted integer compositions
               and that is, putatively, superior to alternatives suggested in
               the literatur},
  pdf       = {http://aclweb.org/anthology/C/C12/C12-1048.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.370.5941},
  year      = {2012}
}

BibTeX

@inproceedings{Eger:2012:c,
  author    = {Eger, Steffen},
  title     = {Lexical semantic typologies from bilingual corpora - A framework},
  booktitle = {SEM 2012: The First Joint Conference on Lexical and Computational
               Semantics -- Volume 1: Proceedings of the main conference and
               the shared task, and Volume 2: Proceedings of the Sixth International
               Workshop on Semantic Evaluation (SemEval 2012)},
  pages     = {90-94},
  address   = {Montreal, Canada},
  publisher = {Association for Computational Linguistics},
  abstract  = {We present a framework, based on Sejane and Eger (2012), for inducing
               lexical semantic typologies for groups of languages. Our framework
               rests on lexical semantic association networks derived from encoding,
               via bilingual corpora, each language in a common reference language,
               the tertium comparationis, so that distances between languages
               can easily be determined.},
  pdf       = {http://www.aclweb.org/anthology/S12-1015},
  website   = {http://dl.acm.org/citation.cfm?id=2387653},
  year      = {2012}
}

BibTeX

@inproceedings{Mehler:Stegbauer:Gleim:2012:b,
  author    = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
  title     = {Latent Barriers in Wiki-based Collaborative Writing},
  booktitle = {Proceedings of the Wikipedia Academy: Research and Free Knowledge.
               June 29 - July 1 2012},
  address   = {Berlin},
  month     = {July},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/12_Paper_Alexander_Mehler_Christian_Stegbauer_Ruediger_Gleim.pdf},
  year      = {2012}
}

BibTeX

@inproceedings{vor:der:Brueck:Wang:2012,
  author    = {vor der Brück, Tim and Wang, Yu-Fang},
  title     = {Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods},
  booktitle = {Proceedings of the 20th European Conference on Artificial Intelligence (ECAI)},
  pages     = {822--827},
  address   = {Montpellier, France},
  abstract  = {Synonyms are a highly relevant information source for natural
               language processing. Automatic synonym extraction methods have
               in common that they are either applied on the surface representation
               of the text or on a syntactical structure derived from it. In
               this paper, however, we present a semantic synonym extraction
               approach that operates directly on semantic networks (SNs), which
               were derived from text by a deep syntactico-semantic analysis.
               Synonymy hypotheses are extracted from the SNs by graph matching.
               These hypotheses are then validated by a support vector machine
               (SVM) employing a combined graph and string kernel. Our method
               was compared to several other approaches and the evaluation has
               shown that our results are considerably superior},
  pdf       = {http://www.vdb1.de/papers/ECAI_535.pdf},
  website   = {http://ebooks.iospress.nl/publication/7076},
  year      = {2012}
}

BibTeX

@inproceedings{Islam:Mehler:Rahman:2012,
  author    = {Islam, Md. Zahurul and Mehler, Alexander and Rahman, Rashedur},
  title     = {Text Readability Classification of Textbooks of a Low-Resource Language},
  booktitle = {Accepted in the 26th Pacific Asia Conference on Language, Information,
               and Computation (PACLIC 26)},
  abstract  = {There are many languages considered to be low-density languages,
               either because the population speaking the language is not very
               large, or because insufficient digitized text material is available
               in the language even though millions of people speak the language.
               Bangla is one of the latter ones. Readability classification is
               an important Natural Language Processing (NLP) application that
               can be used to judge the quality of documents and assist writers
               to locate possible problems. This paper presents a readability
               classifier of Bangla textbook documents based on information-theoretic
               and lexical features. The features proposed in this paper result
               in an F-score that is 50\% higher than that for traditional readability
               formulas.},
  owner     = {zahurul},
  pdf       = {http://www.aclweb.org/anthology/Y12-1059},
  timestamp = {2012.08.14},
  website   = {http://www.researchgate.net/publication/256648250_Text_Readability_Classification_of_Textbooks_of_a_Low-Resource_Language},
  year      = {2012}
}

BibTeX

@inproceedings{Mehler:Luecking:2012:d,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {Pathways of Alignment between Gesture and Speech: Assessing Information
               Transmission in Multimodal Ensembles},
  booktitle = {Proceedings of the International Workshop on Formal and Computational
               Approaches to Multimodal Communication under the auspices of ESSLLI
               2012, Opole, Poland, 6-10 August},
  editor    = {Gianluca Giorgolo and Katya Alahverdzhieva},
  abstract  = {We present an empirical account of multimodal ensembles based
               on Hjelmslev’s notion of selection. This is done to get measurable
               evidence for the existence of speech-and-gesture ensembles. Utilizing
               information theory, we show that there is an information transmission
               that makes a gestures’ representation technique predictable when
               merely knowing its lexical affiliate – in line with the notion
               of the primacy of language. Thus, there is evidence for a one-way
               coupling – going from words to gestures – that leads to speech-and-gesture
               alignment and underlies the constitution of multimodal ensembles.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/06/Mehler_Luecking_FoCoMC2012-2.pdf},
  website   = {http://www.researchgate.net/publication/268368670_Pathways_of_Alignment_between_Gesture_and_Speech_Assessing_Information_Transmission_in_Multimodal_Ensembles},
  year      = {2012}
}

BibTeX

@inproceedings{Luecking:2012,
  author    = {Lücking, Andy},
  title     = {Towards a Conceptual, Unification-based Speech-Gesture Interface},
  booktitle = {Proceedings of the International Workshop on Formal and Computational
               Approaches to Multimodal Communication under the auspices of ESSLLI
               2012, Opole, Poland, 6-10 August},
  editor    = {Gianluca Giorgolo and Katya Alahverdzhieva},
  abstract  = {A framework for grounding the semantics of co-verbal iconic gestures
               is presented. A resemblance account to iconicity is discarded
               in favor of an exemplification approach. It is sketched how exemplification
               can be captured within a unification-based grammar that provides
               a conceptual interface. Gestures modeled as vector sequences are
               the exemplificational base. Some hypotheses that follow from the
               general account are pointed at and remaining challenges are discussed.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/FoCoMoC2012-1.pdf},
  year      = {2012}
}

BibTeX

@inproceedings{Mehler:Luecking:2012:c,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis},
  booktitle = {Proceedings of the International Workshop On User Experience in
               e-Learning and Augmented Technologies in Education (UXeLATE 2012)
               in Conjunction with ACM Multimedia 2012, 29 October- 2 November,
               Nara, Japan},
  pages     = {7-12},
  abstract  = {We introduce WikiNect as a kinetic museum information system that
               allows museum visitors to give on-site feedback about exhibitions.
               To this end, WikiNect integrates three approaches to Human-Computer
               Interaction (HCI): games with a purpose, wiki-based collaborative
               writing and kinetic text-technologies. Our aim is to develop kinetic
               technologies as a new paradigm of HCI. They dispense with classical
               interfaces (e.g., keyboards) in that they build on non-contact
               modes of communication like gestures or facial expressions as
               input displays. In this paper, we introduce the notion of gestural
               writing as a kinetic text-technology that underlies WikiNect to
               enable museum visitors to communicate their feedback. The basic
               idea is to explore sequences of gestures that share the semantic
               expressivity of verbally manifested speech acts. Our task is to
               identify such gestures that are learnable on-site in the usage
               scenario of WikiNect. This is done by referring to so-called transient
               gestures as part of multimodal ensembles, which are candidate
               gestures of the desired functionality.},
  keywords  = {wikinect},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/UXeLATE2012-copyright.pdf},
  website   = {http://www.researchgate.net/publication/262319200_WikiNect_towards_a_gestural_writing_system_for_kinetic_museum_wikis},
  year      = {2012}
}

BibTeX

@inproceedings{Gleim:Mehler:Ernst:2012,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Ernst, Alexandra},
  title     = {SOA implementation of the eHumanities Desktop},
  booktitle = {Proceedings of the Workshop on Service-oriented Architectures
               (SOAs) for the Humanities: Solutions and Impacts, Digital Humanities
               2012, Hamburg, Germany},
  abstract  = {The eHumanities Desktop is a system which allows users to upload,
               organize and share resources using a web interface. Furthermore
               resources can be processed, annotated and analyzed in various
               ways. Registered users can organize themselves in groups and collaboratively
               work on their data. The eHumanities Desktop is platform independent
               and runs in a web browser. This paper presents the system focusing
               on its service orientation and process management.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dhc2012.pdf},
  year      = {2012}
}

BibTeX

@inproceedings{Mehler:Stegbauer:2012,
  author    = {Mehler, Alexander and Stegbauer, Christian},
  title     = {On the Self-similarity of Intertextual Structures in Wikipedia},
  booktitle = {Proceedings of the HotSocial '12: The First ACM International
               Workshop on Hot Topics on Interdisciplinary Social Networks Research},
  editor    = {Xiaoming Fu and Peter Gloor and Jie Tang},
  pages     = {65-68},
  address   = {Beijing, China},
  pdf       = {http://wan.poly.edu/KDD2012/forms/workshop/HotSocial12/doc/p64_mehler.pdf},
  website   = {http://dl.acm.org/citation.cfm?id=2392633&bnc=1},
  year      = {2012}
}

BibTeX

@inproceedings{Islam:Mehler:2012:a,
  author    = {Islam, Md. Zahurul and Mehler, Alexander},
  title     = {Customization of the Europarl Corpus for Translation Studies},
  booktitle = {Proceedings of the 8th International Conference on Language Resources
               and Evaluation (LREC)},
  abstract  = {Currently, the area of translation studies lacks corpora by which
               translation scholars can validate their theoretical claims, for
               example, regarding the scope of the characteristics of the translation
               relation. In this paper, we describe a customized resource in
               the area of translation studies that mainly addresses research
               on the properties of the translation relation. Our experimental
               results show that the Type-Token-Ratio (TTR) is not a universally
               valid indicator of the simplification of translation.},
  owner     = {zahurul},
  pdf       = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/729_Paper.pdf},
  timestamp = {2012.02.02},
  year      = {2012}
}

BibTeX

@inproceedings{Luecking:Mehler:2012,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {What's the Scope of the Naming Game? Constraints on Semantic Categorization},
  booktitle = {Proceedings of the 9th International Conference on the Evolution of Language},
  pages     = {196-203},
  address   = {Kyoto, Japan},
  abstract  = {The Naming Game (NG) has become a vivid research paradigm for
               simulation studies on language evolution and the establishment
               of naming conventions. Recently, NGs were used for reconstructing
               the creation of linguistic categories, most notably for color
               terms. We recap the functional principle of NGs and the latter
               Categorization Games (CGs) and evaluate them in the light of semantic
               data of linguistic categorization outside the domain of colors.
               This comparison reveals two specifics of the CG paradigm: Firstly,
               the emerging categories draw basically on the predefined topology
               of the learning domain. Secondly, the kind of categories that
               can be learnt in CGs is bound to context-independent intersective
               categories. This suggests that the NG and the CG focus on a special
               aspect of natural language categorization, which disregards context-sensitive
               categories used in a non-compositional manner.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Evolang2012-AL_AM.pdf},
  url       = {http://kyoto.evolang.org/},
  website   = {https://www.researchgate.net/publication/267858061_WHAT'S_THE_SCOPE_OF_THE_NAMING_GAME_CONSTRAINTS_ON_SEMANTIC_CATEGORIZATION},
  year      = {2012}
}

BibTeX

@inproceedings{Sukhareva:Islam:Hoenen:Mehler:2012,
  author    = {Sukhareva, Maria and Islam, Md. Zahurul and Hoenen, Armin and Mehler, Alexander},
  title     = {A Three-step Model of Language Detection in Multilingual Ancient Texts},
  booktitle = {Proceedings of Workshop on Annotation of Corpora for Research in the Humanities},
  address   = {Heidelberg, Germany},
  abstract  = {Ancient corpora contain various multilingual patterns. This imposes
               numerous problems on their manual annotation and automatic processing.
               We introduce a lexicon building system, called Lexicon Expander,
               that has an integrated language detection module, Language Detection
               (LD) Toolkit. The Lexicon Expander post-processes the output of
               the LD Toolkit which leads to the improvement of f-score and accuracy
               values. Furthermore, the functionality of the Lexicon Expander
               also includes manual editing of lexical entries and automatic
               morphological expansion by means of a morphological grammar.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/sukhareva_islam_hoenen_mehler_2011.pdf},
  website   = {https://www.academia.edu/2236625/A_Three-step_Model_of_Language_Detection_in_Multilingual_Ancient_Texts},
  year      = {2012}
}

BibTeX

@inproceedings{Luecking:Ptock:Bergmann:2011,
  author    = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
  title     = {Staccato: Segmentation Agreement Calculator},
  booktitle = {Gesture in Embodied Communication and Human-Computer Interaction.
               Proceedings of the 9th International Gesture Workshop},
  editor    = {Eleni Efthimiou and Georgios Kouroupetroglou},
  series    = {GW 2011},
  pages     = {50--53},
  address   = {Athens, Greece},
  publisher = {National and Kapodistrian University of Athens},
  month     = {5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/LueckingEA_final.pdf},
  year      = {2011}
}

Alexander Mehler and Andy Lücking. September, 2011. A Graph Model of Alignment in Multilog. Proceedings of IEEE Africon 2011.

BibTeX

@inproceedings{Mehler:Luecking:2011,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {A Graph Model of Alignment in Multilog},
  booktitle = {Proceedings of IEEE Africon 2011},
  series    = {IEEE Africon},
  address   = {Zambia},
  organization = {IEEE},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/africon2011-paper-Alexander_Mehler_Andy_Luecking.pdf},
  website   = {https://www.researchgate.net/publication/267941012_A_Graph_Model_of_Alignment_in_Multilog},
  year      = {2011}
}

BibTeX

@inproceedings{Stegbauer:Mehler:2011,
  author    = {Stegbauer, Christian and Mehler, Alexander},
  title     = {Positionssensitive Dekomposition von Potenzgesetzen am Beispiel
               von Wikipedia-basierten Kollaborationsnetzwerken},
  booktitle = {Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK
               2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin},
  pdf       = {http://www.user.tu-berlin.de/komm/CD/paper/090423.pdf},
  specialnote = {Best Paper Award},
  specialnotewebsite = {http://www.digitale-soziale-netze.de/gi-workshop/index.php?site=review2011},
  year      = {2011}
}

BibTeX

@inproceedings{Lux:Laussmann:Mehler:Menssen:2011,
  author    = {Lux, Markus and Lau{\ss}mann, Jan and Mehler, Alexander and Men{\ss}en, Christian},
  title     = {An Online Platform for Visualizing Time Series in Linguistic Networks},
  booktitle = {Proceedings of the Demonstrations Session of the 2011 IEEE / WIC
               / ACM International Conferences on Web Intelligence and Intelligent
               Agent Technology, 22 - 27 August 2011, Lyon, France},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wi-iat-poster-2011.pdf},
  website   = {http://dl.acm.org/citation.cfm?id=2052396},
  year      = {2011}
}

BibTeX

@inproceedings{Mehler:Luecking:Menke:2011,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {From Neural Activation to Symbolic Alignment: A Network-Based
               Approach to the Formation of Dialogue Lexica},
  booktitle = {Proceedings of the International Joint Conference on Neural Networks
               (IJCNN 2011), San Jose, California, July 31 -- August 5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/neural-align-final.pdf},
  website   = {{http://dx.doi.org/10.1109/IJCNN.2011.6033266}},
  year      = {2011}
}

Andy Lücking, Olga Abramov, Alexander Mehler and Peter Menke. 2011. The Bielefeld Jigsaw Map Game (JMG) Corpus. Abstracts of the Corpus Linguistics Conference 2011.

BibTeX

@inproceedings{Luecking:Abramov:Mehler:Menke:2011,
  author    = {Lücking, Andy and Abramov, Olga and Mehler, Alexander and Menke, Peter},
  title     = {The Bielefeld Jigsaw Map Game (JMG) Corpus},
  booktitle = {Abstracts of the Corpus Linguistics Conference 2011},
  series    = {CL2011},
  address   = {Birmingham},
  pdf       = {http://www.birmingham.ac.uk/documents/college-artslaw/corpus/conference-archives/2011/Paper-137.pdf},
  website   = {http://www.birmingham.ac.uk/research/activity/corpus/publications/conference-archives/2011-birmingham.aspx},
  year      = {2011}
}

BibTeX

@inproceedings{Gleim:Hoenen:Diewald:Mehler:Ernst:2011,
  author    = {Gleim, Rüdiger and Hoenen, Armin and Diewald, Nils and Mehler, Alexander
               and Ernst, Alexandra},
  title     = {Modeling, Building and Maintaining Lexica for Corpus Linguistic
               Studies by Example of Late Latin},
  booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Paper-48.pdf},
  year      = {2011}
}

Peter Menke and Alexander Mehler. 2011. From experiments to corpora: The Ariadne Corpus Management System. Corpus Linguistics 2011, 20-22 July, Birmingham.

BibTeX

@inproceedings{Menke:Mehler:2011,
  author    = {Menke, Peter and Mehler, Alexander},
  title     = {From experiments to corpora: The Ariadne Corpus Management System},
  booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
  website   = {https://www.researchgate.net/publication/260186214_From_Experiments_to_Corpora_The_Ariadne_Corpus_Management_System},
  year      = {2011}
}

BibTeX

@inproceedings{Mehler:Luecking:Menke:2011:a,
  author    = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
  title     = {Assessing Lexical Alignment in Spontaneous Direction Dialogue
               Data by Means of a Lexicon Network Model},
  booktitle = {Proceedings of 12th International Conference on Intelligent Text
               Processing and Computational Linguistics (CICLing), February 20--26,
               Tokyo},
  series    = {CICLing'11},
  pages     = {368-379},
  address   = {Berlin/New York},
  publisher = {Springer},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/titan-cicling-camera-ready.pdf},
  website   = {http://www.springerlink.com/content/g7p2250025u20010/},
  year      = {2011}
}

Gerd Doeben-Henisch, Giuseppe Abrami, Marcus Pfaff and Marvin Struwe. Sept., 2011. Conscious learning semiotics systems to assist human persons (CLS2H). AFRICON, 2011, 1 –7.

BibTeX

@inproceedings{Doebenhenisch:Abrami:Pfaff:Struwe:2011,
  author    = {Doeben-Henisch, Gerd and Abrami, Giuseppe and Pfaff, Marcus and Struwe, Marvin},
  title     = {Conscious learning semiotics systems to assist human persons (CLS2H)},
  booktitle = {AFRICON, 2011},
  volume    = {},
  number    = {},
  pages     = {1 -7},
  abstract  = {Challenged by the growing societal demand for Ambient Assistive
               Living (AAL) technologies, we are dedicated to develop intelligent
               technical devices which are able to communicate with human persons
               in a truly human-like manner. The core of the project is a simulation
               environment which enables the development of conscious learning
               semiotic agents which will be able to assist human persons in
               their daily life. We are reporting first results and future perspectives.},
  doi       = {10.1109/AFRCON.2011.6072043},
  issn      = {2153-0025},
  keywords  = {ambient assistive living;conscious learning semiotic
                   agents;conscious learning semiotics systems;human
                   persons;intelligent technical devices;simulation
                   environment;learning (artificial
                   intelligence);multi-agent systems;},
  month     = {sept.},
  pdf       = {http://www.doeben-henisch.de/gdhnp/csg/africon2011.pdf},
  website   = {http://www.researchgate.net/publication/261451874_Conscious_Learning_Semiotics_Systems_to_Assist_Human_Persons_(CLS(2)H)},
  year      = {2011}
}

BibTeX

@inproceedings{Dong:vor:der:Brueck:2011,
  author    = {Dong, Tiansi and vor der Brück, Tim},
  title     = {Qualitative Spatial Knowledge Acquisition Based on the Connection Relation},
  booktitle = {Proceedings of the 3rd International Conference on Advanced Cognitive
               Technologies and Applications (COGNITIVE)},
  editor    = {Terry Bossomaier and Pascal Lorenz},
  pages     = {70--75},
  address   = {Rome, Italy},
  abstract  = {Research in cognitive psychology shows that the connection relation
               is the primitive spatial relation. This paper proposes a novel
               spatial knowledge representation of indoor environments based
               on the connection relation, and demonstrates how deictic orientation
               relations can be acquired from a map, which is constructed purely
               on connection relations between extended objects. Without loss
               of generality, we restrict indoor environments to be constructed
               by a set of rectangles, each representing either a room or a corridor.
               The term fiat cell is coined to represent a subjective partition
               along a corridor. Spatial knowledge includes rectangles, sides
               information of rectangles, connection relations among rectangles,
               and fiat cells of rectangles. Efficient algorithms are given for
               identifying one shortest path between two locations, transforming
               paths into fiat paths, and acquiring deictic orientations.},
  pdf       = {http://www.thinkmind.org/download.php?articleid=cognitive_2011_3_40_40123},
  website   = {http://www.thinkmind.org/index.php?view=article&articleid=cognitive_2011_3_40_40123},
  year      = {2011}
}

BibTeX

@inproceedings{Zahurul:Mittmann:Mehler:2011,
  author    = {Islam, Md. Zahurul and Mittmann, Roland and Mehler, Alexander},
  title     = {Multilingualism in Ancient Texts: Language Detection by Example
               of Old High German and Old Saxon},
  booktitle = {GSCL conference on Multilingual Resources and Multilingual Applications
               (GSCL 2011), 28-30 September, Hamburg, Germany},
  abstract  = {In this paper, we present an approach to language d etection in
               streams of multilingual ancient texts. We introduce a supervised
               classifier that detects, amongst others, Old High G erman (OHG)
               and Old Saxon (OS). We evaluate our mod el by means of three experiments
               that show that language detection is po ssible even for dead languages.
               Finally, we present an experiment in unsupervised language detection
               as a tertium comparationis for o ur supervised classifier.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Multilingualism_in_Ancient_Texts_Language_Detection_by_Example_of_Old_High_German_and_Old_Saxon.pdf},
  timestamp = {2011.08.25},
  year      = {2011}
}

Veronika Ries and Andy Lücking. 2011. Multilingual Resources and Multilingual Applications: Proceedings of the German Society for Computational Linguistics 2011, 207–210.

BibTeX

@inproceedings{Ries:Luecking:2011,
  author    = {Ries, Veronika and Lücking, Andy},
  booktitle = {Multilingual Resources and Multilingual Applications: Proceedings
               of the German Society for Computational Linguistics 2011},
  year      = {2011},
  pages     = {207--210},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Ries_Luecking.pdf},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SoSaBiEC-poster.pdf}
}

Rüdiger Gleim and Alexander Mehler. 2010. Computational Linguistics for Mere Mortals – Powerful but Easy-to-use Linguistic Processing for Scientists in the Humanities. Proceedings of LREC 2010.

BibTeX

@inproceedings{Gleim:Mehler:2010:b,
  author    = {Gleim, Rüdiger and Mehler, Alexander},
  title     = {Computational Linguistics for Mere Mortals – Powerful but Easy-to-use
               Linguistic Processing for Scientists in the Humanities},
  booktitle = {Proceedings of LREC 2010},
  address   = {Malta},
  publisher = {ELDA},
  abstract  = {Delivering linguistic resources and easy-to-use methods to a broad
               public in the humanities is a challenging task. On the one hand
               users rightly demand easy to use interfaces but on the other hand
               want to have access to the full flexibility and power of the functions
               being offered. Even though a growing number of excellent systems
               exist which offer convenient means to use linguistic resources
               and methods, they usually focus on a specific domain, as for example
               corpus exploration or text categorization. Architectures which
               address a broad scope of applications are still rare. This article
               introduces the eHumanities Desktop, an online system for corpus
               management, processing and analysis which aims at bridging the
               gap between powerful command line tools and intuitive user interfaces.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_2010.pdf},
  year      = {2010}
}

Tim vor der Brück and Holger Stenzhorn. 2010. Logical Ontology Validation Using an Automatic Theorem Prover. Proceedings of the 19th European Conference on Artificial Intelligence (ECAI), 491–496.

BibTeX

@inproceedings{vor:der:Brueck:Stenzhorn:2010,
  author    = {vor der Brück, Tim and Stenzhorn, Holger},
  title     = {Logical Ontology Validation Using an Automatic Theorem Prover},
  booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence (ECAI)},
  pages     = {491--496},
  address   = {Lisbon, Portugal},
  abstract  = {Ontologies are utilized for a wide range of tasks, like information
               retrieval/extraction or text generation, and in a multitude of
               domains, such as biology, medicine or business and commerce. To
               be actually usable in such real-world scenarios, ontologies usually
               have to encompass a large number of factual statements. However,
               with increasing size, it becomes very diffcult to ensure their
               complete correctness. This is particularly true in the case when
               an ontology is not hand-crafted but constructed (semi)automatically
               through text mining, for example. As a consequence, when inference
               mechanisms are applied on these ontologies, even minimal inconsistencies
               of tentimes lead to serious errors and are hard to trace back
               and find. This paper addresses this issue and describes a method
               to validate ontologies using an automatic theorem prover and MultiNet
               axioms. This logic-based approach allows to detect many inconsistencies,
               which are diffcult or even impossible to identify through statistical
               methods or by manual investigation in reasonable time. To make
               this approach accessible for ontology developers, a graphical
               user interface is provided that highlights erroneous axioms directly
               in the ontology for quicker fixing.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ECAI-216.pdf},
  year      = {2010}
}

BibTeX

@inproceedings{vor:der:Brueck:2010:a,
  author    = {vor der Brück, Tim},
  title     = {Learning Deep Semantic Patterns for Hypernymy Extraction Following
               the Minimum Description Length Principle},
  booktitle = {Proceedings of the 29th International Conference on Lexis and Grammar (LGC)},
  pages     = {39--49},
  address   = {Belgrade, Serbia},
  abstract  = {Current approaches of hypernymy acquisition are mostly based on
               syntactic or surface representations and extract hypernymy relations
               between surface word forms and not word readings. In this paper
               we present a purely semantic approach for hypernymy extraction
               based on semantic networks (SNs). This approach employs a set
               of patterns sub0 (a1,a2) <-- premise where the premise part of
               a pattern is given by a SN. Furthermore this paper describes how
               the patterns can be derived by relational statistical learning
               following the Minimum Description Length principle (MDL). The
               evaluation demonstrates the usefulness of the learned patterns
               and also of the entire hypernymy extraction system.},
  year      = {2010}
}

Tim vor der Brück. 2010. Learning Semantic Network Patterns for Hypernymy Extraction. Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex), 38–47.

BibTeX

@inproceedings{vor:der:Brueck:2010:b,
  author    = {vor der Brück, Tim},
  title     = {Learning Semantic Network Patterns for Hypernymy Extraction},
  booktitle = {Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex)},
  pages     = {38--47},
  address   = {Beijing, China},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ontolex_brueck_13_2010.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
  year      = {2010}
}

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:a,
  author    = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
  title     = {Detecting Duplicates with Shallow and Parser-based Methods},
  booktitle = {Proceedings of the 6th International Conference on Natural Language
               Processing and Knowledge Engineering (NLPKE)},
  pages     = {142--149},
  address   = {Beijing, China},
  abstract  = {Identifying duplicate texts is important in many areas like plagiarism
               detection, information retrieval, text summarization, and question
               answering. Current approaches are mostly surface-oriented (or
               use only shallow syntactic representations) and see each text
               only as a token list. In this work however, we describe a deep,
               semantically oriented method based on semantic networks which
               are derived by a syntactico-semantic parser. Semantically identical
               or similar semantic networks for each sentence of a given base
               text are efficiently retrieved by using a specialized semantic
               network index. In order to detect many kinds of paraphrases the
               current base semantic network is varied by applying inferences:
               lexico-semantic relations, relation axioms, and meaning postulates.
               Some important phenomena occurring in difficult-to-detect duplicates
               are discussed. The deep approach profits from background knowledge,
               whose acquisition from corpora like Wikipedia is explained briefly.
               This deep duplicate recognizer is combined with two shallow duplicate
               recognizers in order to guarantee high recall for texts which
               are not fully parsable. The evaluation shows that the combined
               approach preserves recall and increases precision considerably,
               in comparison to traditional shallow methods. For the evaluation,
               a standard corpus of German plagiarisms was extended by four diverse
               components with an emphasis on duplicates (and not just plagiarisms),
               e.g., news feed articles from different web sources and two translations
               of the same short story.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-ieee.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5587838&abstractAccess=no&userType=inst},
  year      = {2010}
}

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:b,
  author    = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
  title     = {Semantic Duplicate Identification with Parsing and Machine Learning},
  booktitle = {Proceedings of the 13th International Conference on Text, Speech
               and Dialogue (TSD 2010)},
  editor    = {Petr Sojka and Aleš Horák and Ivan Kopeček and Karel Pala},
  volume    = {6231},
  series    = {Lecture Notes in Artificial Intelligence},
  pages     = {84--92},
  address   = {Brno, Czech Republic},
  abstract  = {Identifying duplicate texts is important in many areas like plagiarism
               detection, information retrieval, text summarization, and question
               answering. Current approaches are mostly surface-oriented (or
               use only shallow syntactic representations) and see each text
               only as a token list. In this work however, we describe a deep,
               semantically oriented method based on semantic networks which
               are derived by a syntacticosemantic parser. Semantically identical
               or similar semantic networks for each sentence of a given base
               text are efficiently retrieved by using a specialized index. In
               order to detect many kinds of paraphrases the semantic networks
               of a candidate text are varied by applying inferences: lexico-
               semantic relations, relation axioms, and meaning postulates. Important
               phenomena occurring in difficult duplicates are discussed. The
               deep approach profits from background knowledge, whose acquisition
               from corpora is explained briefly. The deep duplicate recognizer
               is combined with two shallow duplicate recognizers in order to
               guarantee a high recall for texts which are not fully parsable.
               The evaluation shows that the combined approach preserves recall
               and increases precision considerably in comparison to traditional
               shallow methods.},
  month     = {September},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-paper.pdf},
  website   = {http://link.springer.com/chapter/10.1007/978-3-642-15760-8_12},
  year      = {2010}
}

Andy Lücking. July, 2010. A Semantic Account for Iconic Gestures. Gesture: Evolution, Brain, and Linguistic Structures, 210.

BibTeX

@inproceedings{Luecking:2010,
  author    = {Lücking, Andy},
  title     = {A Semantic Account for Iconic Gestures},
  booktitle = {Gesture: Evolution, Brain, and Linguistic Structures},
  pages     = {210},
  address   = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
  organization = {4th Conference of the International Society for
                   Gesture Studies (ISGS)},
  keywords  = {own},
  month     = {7},
  pdf       = {https://pub.uni-bielefeld.de/download/2318565/2319962},
  website   = {http://pub.uni-bielefeld.de/publication/2318565},
  year      = {2010}
}

BibTeX

@inproceedings{Luecking:et:al:2010,
  author    = {Lücking, Andy and Bergmann, Kirsten and Hahn, Florian and Kopp, Stefan
               and Rieser, Hannes},
  title     = {The Bielefeld Speech and Gesture Alignment Corpus (SaGA)},
  booktitle = {Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality},
  pages     = {92--98},
  address   = {Malta},
  organization = {7th International Conference for Language Resources
                   and Evaluation (LREC 2010)},
  abstract  = {People communicate multimodally. Most prominently, they co-produce
               speech and gesture. How do they do that? Studying the interplay
               of both modalities has to be informed by empirically observed
               communication behavior. We present a corpus built of speech and
               gesture data gained in a controlled study. We describe 1) the
               setting underlying the data; 2) annotation of the data; 3) reliability
               evalution methods and results; and 4) applications of the corpus
               in the research domain of speech and gesture alignment.},
  keywords  = {own},
  month     = {5},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/saga-corpus.pdf},
  website   = {http://pub.uni-bielefeld.de/publication/2001935},
  year      = {2010}
}

BibTeX

@inproceedings{Zahurul:Tiedemann:Eisele:2010,
  author    = {Islam, Md. Zahurul and Tiedemann, Jörg and Eisele, Andreas},
  title     = {English to Bangla Phrase – Based Machine Translation},
  booktitle = {The 14th Annual Conference of The European Association for Machine
               Translation. Saint-Raphaël, France, 27-28 May},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/English_to_Bangla_Phrase–Based_Machine_Translation.pdf},
  timestamp = {2011.08.02},
  year      = {2010}
}

Ulli Waltinger. May, 2010. GermanPolarityClues: A Lexical Resource for German Sentiment Analysis. Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC '10).

BibTeX

@inproceedings{Waltinger:2010:a,
  author    = {Waltinger, Ulli},
  title     = {GermanPolarityClues: A Lexical Resource for German Sentiment Analysis},
  booktitle = {Proceedings of the Seventh conference on International Language
               Resources and Evaluation (LREC '10)},
  editor    = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Bente Maegaard
               and Joseph Mariani and Jan Odjik, Stelios Piperidis and Mike Rosner
               and Daniel Tapias},
  address   = {Valletta, Malta},
  publisher = {European Language Resources Association (ELRA)},
  date_0    = {2010-05},
  isbn      = {2-9517408-6-7},
  language  = {english},
  month     = {may},
  pdf       = {http://www.ulliwaltinger.de/pdf/91_Paper.pdf},
  website   = {http://www.ulliwaltinger.de/sentiment/},
  year      = {2010}
}

BibTeX

@inproceedings{Mehler:Weiss:Menke:Luecking:2010,
  author    = {Mehler, Alexander and Wei{\ss}, Petra and Menke, Peter and Lücking, Andy},
  title     = {Towards a Simulation Model of Dialogical Alignment},
  booktitle = {Proceedings of the 8th International Conference on the Evolution
               of Language (Evolang8), 14-17 April 2010, Utrecht},
  pages     = {238-245},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Alexander_Mehler_Petra_Weiss_Peter_Menke_Andy_Luecking.pdf},
  website   = {http://www.let.uu.nl/evolang2010.nl/},
  year      = {2010}
}

Fiorella Foscarini, Yunhyong Kim, Christopher A. Lee, Alexander Mehler, Gillian Oliver and Seamus Ross. 2010. On the Notion of Genre in Digital Preservation. Automation in Digital Preservation.

BibTeX

@inproceedings{Foscarini:Kim:Lee:Mehler:Oliver:Ross:2010,
  author    = {Foscarini, Fiorella and Kim, Yunhyong and Lee, Christopher A.
               and Mehler, Alexander and Oliver, Gillian and Ross, Seamus},
  title     = {On the Notion of Genre in Digital Preservation},
  booktitle = {Automation in Digital Preservation},
  editor    = {Chanod, Jean-Pierre and Dobreva, Milena and Rauber, Andreas and Ross, Seamus},
  number    = {10291},
  series    = {Dagstuhl Seminar Proceedings},
  address   = {Dagstuhl, Germany},
  publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik,
                   Germany},
  annote    = {Keywords: Digital preservation, genre analysis,
                   context modeling, diplomatics, information retrieval},
  issn      = {1862-4405},
  pdf       = {http://drops.dagstuhl.de/opus/volltexte/2010/2763/pdf/10291.MehlerAlexander.Paper.2763.pdf},
  website   = {http://drops.dagstuhl.de/opus/volltexte/2010/2763},
  year      = {2010}
}

BibTeX

@inproceedings{Mehler:Gleim:Waltinger:Diewald:2010,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Diewald, Nils},
  title     = {Time Series of Linguistic Networks by Example of the Patrologia Latina},
  booktitle = {Proceedings of INFORMATIK 2010: Service Science, September 27
               - October 01, 2010, Leipzig},
  editor    = {F{\"a}hnrich, Klaus-Peter and Franczyk, Bogdan},
  volume    = {2},
  series    = {Lecture Notes in Informatics},
  pages     = {609-616},
  publisher = {GI},
  pdf       = {http://subs.emis.de/LNI/Proceedings/Proceedings176/586.pdf},
  year      = {2010}
}

BibTeX

@inproceedings{Gleim:Warner:Mehler:2010,
  author    = {Gleim, Rüdiger and Warner, Paul and Mehler, Alexander},
  title     = {eHumanities Desktop - An Architecture for Flexible Annotation
               in Iconographic Research},
  booktitle = {Proceedings of the 6th International Conference on Web Information
               Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_warner_mehler_2010.pdf},
  website   = {https://www.researchgate.net/publication/220724277_eHumanities_Desktop_-_An_Architecture_for_Flexible_Annotation_in_Iconographic_Research},
  year      = {2010}
}

Peter Menke and Alexander Mehler. 2010. The Ariadne System: A flexible and extensible framework for the modeling and storage of experimental data in the humanities. Proceedings of LREC 2010.

BibTeX

@inproceedings{Menke:Mehler:2010,
  author    = {Menke, Peter and Mehler, Alexander},
  title     = {The Ariadne System: A flexible and extensible framework for the
               modeling and storage of experimental data in the humanities},
  booktitle = {Proceedings of LREC 2010},
  address   = {Malta},
  publisher = {ELDA},
  abstract  = {This paper introduces the Ariadne Corpus Management System. First,
               the underlying data model is presented which enables users to
               represent and process heterogeneous data sets within a single,
               consistent framework. Secondly, a set of automatized procedures
               is described that offers assistance to researchers in various
               data-related use cases. Finally, an approach to easy yet powerful
               data retrieval is introduced in form of a specialised querying
               language for multimodal data.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/menke_mehler_2010.pdf},
  website   = {http://arnetminer.org/publication/the-ariadne-system-a-flexible-and-extensible-framework-for-the-modeling-and-storage-of-experimental-data-in-the-humanities-2839925.html},
  year      = {2010}
}

BibTeX

@inproceedings{Eger:Sejane:2010,
  author    = {Eger, Steffen and Sejane, Ineta},
  title     = {Computing Semantic Similarity from Bilingual Dictionaries},
  booktitle = {Proceedings of the 10th International Conference on the Statistical
               Analysis of Textual Data (JADT-2010)},
  pages     = {1217-1225},
  address   = {Rome, Italy},
  publisher = {JADT-2010},
  pdf       = {http://www.ledonline.it/ledonline/JADT-2010/allegati/JADT-2010-1217-1226_167-Eger.pdf},
  year      = {2010}
}

BibTeX

@inproceedings{vor:der:Brueck:Helbig:2010:a,
  author    = {vor der Brück, Tim and Helbig, Hermann},
  title     = {Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels},
  booktitle = {Proceedings of the Ninth International Conference on Machine Learning
               and Applications (ICMLA)},
  pages     = {243--250},
  address   = {Washington, D.C.},
  publisher = {IEEE Press},
  abstract  = {There is a substantial body of work on the extraction of relations
               from texts, most of which is based on pattern matching or on applying
               tree kernel functions to syntactic structures. Whereas pattern
               application is usually more efficient, tree kernels can be superior
               when assessed by the F-measure. In this paper, we introduce a
               hybrid approach to extracting meronymy relations, which is based
               on both patterns and kernel functions. In a first step, meronymy
               relation hypotheses are extracted from a text corpus by applying
               patterns. In a second step these relation hypotheses are validated
               by using several shallow features and a graph kernel approach.
               In contrast to other meronymy extraction and validation methods
               which are based on surface or syntactic representations we use
               a purely semantic approach based on semantic networks. This involves
               analyzing each sentence of the Wikipedia corpus by a deep syntactico-semantic
               parser and converting it into a semantic network. Meronymy relation
               hypotheses are extracted from the semantic networks by means of
               an automated theorem prover, which employs a set of logical axioms
               and patterns in the form of semantic networks. The meronymy candidates
               are then validated by means of a graph kernel approach based on
               common walks. The evaluation shows that this method achieves considerably
               higher accuracy, recall, and F-measure than a method using purely
               shallow validation.},
  website   = {http://www.computer.org/csdl/proceedings/icmla/2010/4300/00/4300a243-abs.html},
  year      = {2010}
}

BibTeX

@inproceedings{Mehler:Gleim:Waltinger:Ernst:Esch:Feith:2009,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra
               and Esch, Dietmar and Feith, Tobias},
  title     = {eHumanities Desktop – eine webbasierte Arbeitsumgebung für die
               geisteswissenschaftliche Fachinformatik},
  booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
               26.–27. Februar, Duisburg-Essen University},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_gleim_waltinger_ernst_esch_feith_2009.pdf},
  website   = {http://duepublico.uni-duisburg-essen.de/servlets/DocumentServlet?id=37041},
  year      = {2009}
}

BibTeX

@inproceedings{Wagner:Mehler:Wolff:Dotzler:2009,
  author    = {Wagner, Benno and Mehler, Alexander and Wolff, Christian and Dotzler, Bernhard},
  title     = {Bausteine eines Literary Memory Information System (LiMeS) am
               Beispiel der Kafka-Forschung},
  booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
               26.–27. Februar, Duisburg-Essen University},
  abstract  = {In dem Paper beschreiben wir Bausteine eines Literary Memory Information
               System (LiMeS), das die literaturwissenschaftliche Erforschung
               von so genannten Matrixtexten – das sind Prim{\"a}rtexte eines
               bestimmten literarischen Gesamtwerks – unter dem Blickwinkel gro{\ss}er
               Mengen so genannter Echotexte (Topia 1984; Wagner/Reinhard 2007)
               – das sind Subtexte im Sinne eines literaturwissenschaftlichen
               Intertextualit{\"a}tsbegriffs – ermöglicht. Den Ausgangspunkt
               dieses computerphilologischen Informationssystems bildet ein Text-Mining-Modell
               basierend auf dem Intertextualit{\"a}tsbegriff in Verbindung mit
               dem Begriff des Semantic Web (Mehler, 2004b, 2005a, b, Wolff 2005).
               Wir zeigen, inwiefern dieses Modell über bestehende Informationssystemarchitekturen
               hinausgeht und schlie{\ss}en einen Brückenschlag zur derzeitigen
               Entwicklung von Arbeitsumgebungen in der geisteswissenschaftlichen
               Fachinformatik in Form eines eHumanities Desktop.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wagner_mehler_wolff_dotzler_2009.pdf},
  website   = {http://epub.uni-regensburg.de/6795/},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Mehler:Wegner:2009,
  author    = {Waltinger, Ulli and Mehler, Alexander and Wegner, Armin},
  title     = {A Two-Level Approach to Web Genre Classification},
  booktitle = {Proceedings of the 5th International Conference on Web Information
               Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa},
  abstract  = {This paper presents an approach of two-level categorization of
               web pages. In contrast to related approaches the model additionally
               explores and categorizes functionally and thematically demarcated
               segments of the hypertext types to be categorized. By classifying
               these segments conclusions can be drawn about the type of the
               corresponding compound web document.},
  pdf       = {http://www.ulliwaltinger.de/pdf/Webist_2009_TwoLevel_Genre_Classification_WaltingerMehlerWegner.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Gleim:Mehler:Waltinger:Menke:2009,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Waltinger, Ulli and Menke, Peter},
  title     = {eHumanities Desktop – An extensible Online System for Corpus Management
               and Analysis},
  booktitle = {5th Corpus Linguistics Conference, University of Liverpool},
  abstract  = {This paper presents the eHumanities Desktop - an online system
               for corpus management and analysis in support of computing in
               the humanities. Design issues and the overall architecture are
               described, as well as an outline of the applications offered by
               the system.},
  pdf       = {http://www.ulliwaltinger.de/pdf/eHumanitiesDesktop-AnExtensibleOnlineSystem-CL2009.pdf},
  website   = {http://www.ulliwaltinger.de/ehumanities-desktop-an-extensible-online-system-for-corpus-management-and-analysis/},
  year      = {2009}
}

BibTeX

@inproceedings{Mehler:Luecking:2009,
  author    = {Mehler, Alexander and Lücking, Andy},
  title     = {A Structural Model of Semiotic Alignment: The Classification of
               Multimodal Ensembles as a Novel Machine Learning Task},
  booktitle = {Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  publisher = {IEEE},
  abstract  = {In addition to the well-known linguistic alignment processes in
               dyadic communication – e.g., phonetic, syntactic, semantic alignment
               – we provide evidence for a genuine multimodal alignment process,
               namely semiotic alignment. Communicative elements from different
               modalities 'routinize into' cross-modal 'super-signs', which we
               call multimodal ensembles. Computational models of human communication
               are in need of expressive models of multimodal ensembles. In this
               paper, we exemplify semiotic alignment by means of empirical examples
               of the building of multimodal ensembles. We then propose a graph
               model of multimodal dialogue that is expressive enough to capture
               multimodal ensembles. In line with this model, we define a novel
               task in machine learning with the aim of training classifiers
               that can detect semiotic alignment in dialogue. This model is
               in support of approaches which need to gain insights into realistic
               human-machine communication.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_luecking_2009.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?reload=true&arnumber=5308098},
  year      = {2009}
}

Tim vor der Brück. 2009. Hypernymy Extraction Based on Shallow and Deep Patterns. From Form To Meaning: Processing Texts Automatically, Proceedings of the Biennial GSCL Conference 2009, 41–52.

BibTeX

@inproceedings{vor:der:Brueck:2009:b,
  author    = {vor der Brück, Tim},
  title     = {Hypernymy Extraction Based on Shallow and Deep Patterns},
  booktitle = {From Form To Meaning: Processing Texts Automatically, Proceedings
               of the Biennial GSCL Conference 2009},
  editor    = {Christian Chiarcos and Richard Eckart de Castilho},
  pages     = {41--52},
  address   = {Potsdam, Germany},
  abstract  = {There exist various approaches to construct taxonomies by text
               mining. Usually these approaches are based on supervised learning
               and extract in a first step several patterns. These patterns are
               then applied to previously unseen texts and used to recognize
               hypernym/hyponym pairs. Normally these approaches are only based
               on a surface representation or a syntactic tree structure, i.e.,
               a constituency or dependency tree derived by a syntactical parser.
               In this work we present an approach which, additionally to shallow
               patterns, directly operates on semantic networks which are derived
               by a deep linguistic syntacticosemantic analysis. Furthermore,
               the shallow approach heavily depends on semantic information,
               too. It is shown that recall and precision can be improved considerably
               than by relying on shallow patterns alone.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gscl09_12_brueck.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Bouma:Duarte:Zahurul:2009,
  author    = {Bouma, Gosse and Duarte, Sergio and Islam, Md. Zahurul},
  title     = {Cross-lingual Alignment and Completion of Wikipedia Templates},
  booktitle = {Third International Workshop on Cross Lingual Information Access:
               Addressing the Information Need of Multilingual Societies (CLIAWS3),
               Boulder, Colorado, USA, June 4},
  abstract  = {For many languages, the size of Wikipedia is an order of magnitude
               smaller than the English Wikipedia. We present a method for cross-lingual
               alignment of template and infobox attributes in Wikipedia. The
               alignment is used to add and complete templates and infoboxes
               in one language with information derived from Wikipedia in another
               language. We show that alignment between English and Dutch Wikipedia
               is accurate and that the result can be used to expand the number
               of template attribute-value pairs in Dutch Wikipedia by 50\%.
               Furthermore, the alignment provides valuable information for normalization
               of template and attribute names and can be used to detect potential
               inconsistencies},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Cross-lingual_Alignment_and_Completion_of_Wikipedia_Templates.pdf},
  timestamp = {2011.08.02},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.148.1418},
  year      = {2009}
}

Ulli Waltinger. 2009. Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.

BibTeX

@inproceedings{Waltinger:2009:a,
  author    = {Waltinger, Ulli},
  title     = {Polarity Reinforcement: Sentiment Polarity Identification By Means
               Of Social Semantics},
  booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  date_0    = {2009},
  pdf       = {http://www.ulliwaltinger.de/pdf/AfriconIEEE_2009_SentimentPolarity_Waltinger.pdf},
  website   = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5308104},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Cramer:Wandmacher:2009:a,
  author    = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
  title     = {From Social Networks To Distributional Properties: A Comparative
               Study On Computing Semantic Relatedness},
  booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
  editor    = {Taatgen, N.A. and van Rijn, H.},
  pages     = {3016-3021},
  address   = {Austin, TX},
  publisher = {Cognitive Science Society},
  date_0    = {2009},
  pdf       = {http://csjarchive.cogsci.rpi.edu/proceedings/2009/papers/661/paper661.pdf},
  year      = {2009}
}

Ulli Waltinger. 2009. Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.

BibTeX

@inproceedings{Waltinger:2009:b,
  author    = {Waltinger, Ulli},
  title     = {Polarity Reinforcement: Sentiment Polarity Identification By Means
               Of Social Semantics},
  booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
  date_0    = {2009},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Cramer:Wandmacher:2009:b,
  author    = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
  title     = {From Social Networks To Distributional Properties: A Comparative
               Study On Computing Semantic Relatedness},
  booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
  editor    = {N.A. Taatgen and H. van Rijn},
  pages     = {3016-3021},
  address   = {Austin, TX},
  publisher = {Cognitive Science Society},
  date_0    = {2009},
  year      = {2009}
}

BibTeX

@inproceedings{Gleim:Waltinger:Ernst:Mehler:Esch:Feith:2009,
  author    = {Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra and Mehler, Alexander
               and Esch, Dietmar and Feith, Tobias},
  title     = {The eHumanities Desktop – An Online System for Corpus Management
               and Analysis in Support of Computing in the Humanities},
  booktitle = {Proceedings of the Demonstrations Session of the 12th Conference
               of the European Chapter of the Association for Computational Linguistics
               EACL 2009, 30 March – 3 April, Athens},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_waltinger_ernst_mehler_esch_feith_2009.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Mehler:2009:a,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {The Feature Difference Coefficient: Classification by Means of
               Feature Distributions},
  booktitle = {Proceedings of the Conference on Text Mining Services (TMS 2009)},
  series    = {Leipziger Beitr{\"a}ge zur Informatik: Band XIV},
  pages     = {159–168},
  address   = {Leipzig},
  publisher = {Leipzig University},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/waltinger_mehler_2009_a.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Mehler:Gleim:2009:a,
  author    = {Waltinger, Ulli and Mehler, Alexander and Gleim, Rüdiger},
  title     = {Social Semantics And Its Evaluation By Means of Closed Topic Models:
               An SVM-Classification Approach Using Semantic Feature Replacement
               By Topic Generalization},
  booktitle = {Proceedings of the Biennial GSCL Conference 2009, September 30
               – October 2, Universit{\"a}t Potsdam},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/GSCL_2009_WaltingerMehlerGleim_camera_ready.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Waltinger:Mehler:2009:c,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {Social Semantics and Its Evaluation By Means Of Semantic Relatedness
               And Open Topic Models},
  booktitle = {IEEE/WIC/ACM International Conference on Web Intelligence, September
               15–18, Milano},
  abstract  = {This paper presents an approach using social semantics for the
               task of topic labelling by means of Open Topic Models. Our approach
               utilizes a social ontology to create an alignment of documents
               within a social network. Comprised category information is used
               to compute a topic generalization. We propose a feature-frequency-based
               method for measuring semantic relatedness which is needed in order
               to reduce the number of document features for the task of topic
               labelling. This method is evaluated against multiple human judgement
               experiments comprising two languages and three different resources.
               Overall the results show that social ontologies provide a rich
               source of terminological knowledge. The performance of the semantic
               relatedness measure with correlation values of up to .77 are quite
               promising. Results on the topic labelling experiment show, with
               an accuracy of up to .79, that our approach can be a valuable
               method for various NLP applications.},
  website   = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5284920&abstractAccess=no&userType=inst},
  year      = {2009}
}

BibTeX

@inproceedings{vor:der:Brueck:2009,
  author    = {vor der Brück, Tim},
  title     = {Approximation of the Parameters of a Readability Formula by Robust Regression},
  booktitle = {Machine Learning and Data Mining in Pattern recognition: Poster
               Proceedings of the International Conference on Machine Learning
               and Data Mining (MLDM)},
  pages     = {115--125},
  address   = {Leipzig, Germany},
  abstract  = {Most readability formulas calculate a global readability score
               by combining several indicator values by a linear combination.
               Typical indicators are Average sentence length, Average number
               of syllables per word, etc. Usually the parameters of the linear
               combination are determined by a linear OLS (ordinary least square
               estimation) minimizing the sum of the squared residuals in comparison
               with human ratings for a given set of texts. The usage of OLS
               leads to several drawbacks. First, the parameters are not constraint
               in any way and are therefore not intuitive and difficult to interpret.
               Second, if the number of parameters become large, the effect of
               overfitting easily occurs. Finally, OLS is quite sensitive to
               outliers. Therefore, an alternative method is presented which
               avoids these drawbacks and is based on robust regression.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mldm_2009_brueck_142.pdf},
  year      = {2009}
}

BibTeX

@inproceedings{Stuehrenberg:Beisswenger:Kuehnberger:Mehler:Luengen:Metzing:Moennich:2008,
  author    = {Stührenberg, Maik and Bei{\ss}wenger, Michael and Kühnberger, Kai-Uwe
               and Mehler, Alexander and Lüngen, Harald and Metzing, Dieter and Mönnich, Uwe},
  title     = {Sustainability of Text-Technological Resources},
  booktitle = {Proceedings of the Post LREC-2008 Workshop: Sustainability of
               Language Resources and Tools for Natural Language Processing Marrakech,
               Morocco},
  abstract  = {We consider that there are obvious relationships between research
               on sustainability of language and linguistic resources on the
               one hand and work undertaken in the Research Unit 'Text-Technological
               Modelling of Information' on the other. Currently the main focus
               in sustainability research is concerned with archiving methods
               of textual resources, i.e. methods for sustainability of primary
               and secondary data; these aspects are addressed in our work as
               well. However, we believe that there are additional certain aspects
               of sustainability on which new light is shed on by procedures,
               algorithms and dynamic processes undertaken in our Research Unit},
  pdf       = {http://www.michael-beisswenger.de/pub/lrec-sustainability.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{Pustylnikov:Mehler:Gleim:2008,
  author    = {Abramov, Olga and Mehler, Alexander and Gleim, Rüdiger},
  title     = {A Unified Database of Dependency Treebanks. Integrating, Quantifying
               and Evaluating Dependency Data},
  booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
               (LREC 2008), Marrakech (Morocco)},
  abstract  = {This paper describes a database of 11 dependency treebanks which
               were unified by means of a two-dimensional graph format. The format
               was evaluated with respect to storage-complexity on the one hand,
               and efficiency of data access on the other hand. An example of
               how the treebanks can be integrated within a unique interface
               is given by means of the DTDB interface.},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/LREC08_full.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{vor:der:Brueck:Stenzhorn:2008,
  author    = {vor der Brück, Tim and Stenzhorn, Holger},
  title     = {A Dynamic Approach for Automatic Error Detection in Generation Grammars},
  booktitle = {Proceedings of the 18th European Conference on Artificial Intelligence (ECAI)},
  address   = {Patras, Greece},
  abstract  = {In any real world application scenario, natural language generation
               (NLG) systems have to employ grammars consisting of tremendous
               amounts of rules. Detecting and fixing errors in such grammars
               is therefore a highly tedious task. In this work we present a
               data mining algorithm which deduces incorrect grammar rules by
               abductive reasoning out of positive and negative training examples.
               More specifcally, the constituency trees belonging to successful
               generation processes and the incomplete trees of failed ones are
               analyzed. From this a quality score is derived for each grammar
               rule by analyzing the occurrences of the rules in the trees and
               by spotting the exact error locations in the incomplete trees.
               In prior work on automatic error detection v.d.Brück et al. [5]
               proposed a static error detection algorithm for generation grammars.
               The approach of Cussens et al. creates missing grammar rules for
               parsing using abduction [1]. Zeller introduced a dynamic approach
               in the related area of detecting errors in computer programs [6].},
  isbn      = {978-1-58603-891-5},
  month     = {July},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/xtragen_egram.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{vor:der:Brueck:Hartrumpf:Helbig:2008:a,
  author    = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
  title     = {A Readability Checker with Supervised Learning using Deep Syntactic
               and Semantic Indicators},
  booktitle = {Proceedings of the 11th International Multiconference: Information
               Society - IS 2008 - Language Technologies},
  editor    = {Erjavec, Tomaž and Gros, Jerneja Žganec},
  pages     = {92--97},
  address   = {Ljubljana, Slovenia},
  abstract  = {Checking for readability or simplicity of texts is important for
               many institutional and individual users. Formulas for approximately
               measuring text readability have a long tradition. Usually, they
               exploit surfaceoriented indicators like sentence length, word
               length, word frequency, etc. However, in many cases, this information
               is not adequate to realistically approximate the cognitive difficulties
               a person can have to understand a text. Therefore we use deep
               syntactic and semantic indicators in addition. The syntactic information
               is represented by a dependency tree, the semantic information
               by a semantic network. Both representations are automatically
               generated by a deep syntactico-semantic analysis. A global readability
               score is determined by applying a nearest neighbor algorithm on
               3,000 ratings of 300 test persons. The evaluation showed that
               the deep syntactic and semantic indicators lead to promising results
               comparable to the best surface-based indicators. The combination
               of deep and shallow indicators leads to an improvement over shallow
               indicators alone. Finally, a graphical user interface was developed
               which highlights difficult passages, depending on the individual
               indicator values, and displays a global readability score. Povzetek:
               Strojno učenje z odvisnostnimi drevesi je uporabljeno za ugotavljanje
               berljivosti besedil. 1},
  isbn      = {987-961-264-006-4},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf_helbig08.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/brueck_hartrumpf_helbig08.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.5878},
  year      = {2008}
}

BibTeX

@inproceedings{Pustylnikov:Mehler:2008:c,
  author    = {Pustylnikov, Olga and Mehler, Alexander},
  title     = {Text classification by means of structural features. What kind
               of information about texts is captured by their structure?},
  booktitle = {Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia},
  pdf       = {http://www.www.texttechnologylab.org/data/pdf/mehler_geibel_pustylnikov_2007.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{Waltinger:Mehler:Stuehrenberg:2008,
  author    = {Waltinger, Ulli and Mehler, Alexander and Stührenberg, Maik},
  title     = {An Integrated Model of Lexical Chaining: Applications, Resources
               and their Format},
  booktitle = {Proceedings of KONVENS 2008 – Erg{\"a}nzungsband Textressourcen
               und lexikalisches Wissen},
  editor    = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander
               and Würzner, Kay-Michael},
  pages     = {59-70},
  pdf       = {http://www.ulliwaltinger.de/pdf/Konvens_2008_Integrated_Model_of_Lexical_Chaining_WaltingerMehlerStuehrenberg.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{Mehler:2008:c,
  author    = {Mehler, Alexander},
  title     = {A Model of the Distribution of the Distances of Alike Elements
               in Dialogical Communication},
  booktitle = {Proceedings of the International Conference on Information Theory
               and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas},
  pages     = {45-50},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_c.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{Waltinger:Mehler:Heyer:2008,
  author    = {Waltinger, Ulli and Mehler, Alexander and Heyer, Gerhard},
  title     = {Towards Automatic Content Tagging: Enhanced Web Services in Digital
               Libraries Using Lexical Chaining},
  booktitle = {4th Int. Conf. on Web Information Systems and Technologies (WEBIST
               '08), 4-7 May, Funchal, Portugal},
  editor    = {Cordeiro, José and Filipe, Joaquim and Hammoudi, Slimane},
  pages     = {231-236},
  address   = {Barcelona},
  publisher = {INSTICC Press},
  pdf       = {http://www.ulliwaltinger.de/pdf/Webist_2008_Towards_Automatic_Content_Tagging_WaltingerMehlerHeyer.pdf},
  url       = {http://dblp.uni-trier.de/db/conf/webist/webist2008-2.html#WaltingerMH08},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.463.3097},
  year      = {2008}
}

BibTeX

@inproceedings{Mehler:2008:f,
  author    = {Mehler, Alexander},
  title     = {A Short Note on Social-Semiotic Networks from the Point of View
               of Quantitative Semantics},
  booktitle = {Proceedings of the Dagstuhl Seminar on Social Web Communities,
               September 21-26, Dagstuhl},
  editor    = {Alani, Harith and Staab, Steffen and Stumme, Gerd},
  pdf       = {http://drops.dagstuhl.de/opus/volltexte/2008/1788/pdf/08391.MehlerAlexander.ExtAbstract.1788.pdf},
  year      = {2008}
}

BibTeX

@inproceedings{Waltinger:Mehler:2008:a,
  author    = {Waltinger, Ulli and Mehler, Alexander},
  title     = {Who is it? Context sensitive named entity and instance recognition
               by means of Wikipedia},
  booktitle = {Proceedings of the 2008 IEEE/WIC/ACM International Conference
               on Web Intelligence (WI-2008)},
  pages     = {381–384},
  publisher = {IEEE Computer Society},
  pdf       = {http://www.ulliwaltinger.de/pdf/WI_2008_Context_Sensitive_Instance_Recognition_WaltingerMehler.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.324.5881},
  year      = {2008}
}

BibTeX

@inproceedings{Luecking:Mehler:Menke:2008,
  author    = {Lücking, Andy and Mehler, Alexander and Menke, Peter},
  title     = {Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching
               Empirical Evidence of Intrapersonal Alignment in Multimodal Communication},
  booktitle = {LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics
               and Pragmatics of Dialogue (SEMDIAL)},
  pages     = {157–164},
  address   = {King's College London},
  month     = {June 2–4},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_menke_2008.pdf},
  website   = {https://www.researchgate.net/publication/237305375_Taking_Fingerprints_of_Speech-and-Gesture_Ensembles_Approaching_Empirical_Evidence_of_Intrapersonal_Alignment_in_Multimodal_Communication},
  year      = {2008}
}

BibTeX

@inproceedings{Mehler:2008:e,
  author    = {Mehler, Alexander},
  title     = {On the Impact of Community Structure on Self-Organizing Lexical Networks},
  booktitle = {Proceedings of the 7th Evolution of Language Conference (Evolang
               2008), March 11-15, 2008, Barcelona},
  editor    = {Smith, Andrew D. M. and Smith, Kenny and Cancho, Ramon Ferrer i},
  pages     = {227-234},
  publisher = {World Scientific},
  abstract  = {This paper presents a simulation model of self-organizing lexical
               networks. Its starting point is the notion of an association game
               in which the impact of varying community models is studied on
               the emergence of lexical networks. The paper reports on experiments
               whose results are in accordance with findings in the framework
               of the naming game. This is done by means of a multilevel network
               model in which the correlation of social and of linguistic networks
               is studied},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_b.pdf},
  website   = {http://stel.ub.edu/evolang2008/evo10.htm},
  year      = {2008}
}

BibTeX

@inproceedings{Pustylnikov:Mehler:2008:a,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Towards a Uniform Representation of Treebanks: Providing Interoperability
               for Dependency Tree Data},
  booktitle = {Proceedings of First International Conference on Global Interoperability
               for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11},
  abstract  = {In this paper we present a corpus representation format which
               unifies the representation of a wide range of dependency treebanks
               within a single model. This approach provides interoperability
               and reusability of annotated syntactic data which in turn extends
               its applicability within various research contexts. We demonstrate
               our approach by means of dependency treebanks of 11 languages.
               Further, we perform a comparative quantitative analysis of these
               treebanks in order to demonstrate the interoperability of our
               approach.},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/acl07.1.0.pdf},
  website   = {https://www.researchgate.net/publication/242681771_Towards_a_Uniform_Representation_of_Treebanks_Providing_Interoperability_for_Dependency_Tree_Data},
  year      = {2008}
}

BibTeX

@inproceedings{Rehm:Santini:Mehler:Braslavski:Gleim:Stubbe:Symonenko:Tavosanis:Vidulin:2008,
  author    = {Rehm, Georg and Santini, Marina and Mehler, Alexander and Braslavski, Pavel
               and Gleim, Rüdiger and Stubbe, Andrea and Symonenko, Svetlana and Tavosanis, Mirko
               and Vidulin, Vedrana},
  title     = {Towards a Reference Corpus of Web Genres for the Evaluation of
               Genre Identification Systems},
  booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
               (LREC 2008), Marrakech (Morocco)},
  abstract  = {We present initial results from an international and multi-disciplinary
               research collaboration that aims at the construction of a reference
               corpus of web genres. The primary application scenario for which
               we plan to build this resource is the automatic identification
               of web genres. Web genres are rather difficult to capture and
               to describe in their entirety, but we plan for the finished reference
               corpus to contain multi-level tags of the respective genre or
               genres a web document or a website instantiates. As the construction
               of such a corpus is by no means a trivial task, we discuss several
               alternatives that are, for the time being, mostly based on existing
               collections. Furthermore, we discuss a shared set of genre categories
               and a multi-purpose tool as two additional prerequisites for a
               reference corpus of web genres.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/rehm_santini_mehler_braslavski_gleim_stubbe_symonenko_tavosanis_vidulin_2008.pdf},
  website   = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/94.html},
  year      = {2008}
}

BibTeX

@inproceedings{Gleim:Mehler:Dehmer:Abramov:2007,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Dehmer, Matthias and Abramov, Olga},
  title     = {Aisles through the Category Forest – Utilising the Wikipedia Category
               System for Corpus Building in Machine Learning},
  booktitle = {3rd International Conference on Web Information Systems and Technologies
               (WEBIST '07), March 3-6, 2007, Barcelona},
  editor    = {Filipe, Joaquim and Cordeiro, José and Encarnação, Bruno and Pedrosa, Vitor},
  pages     = {142-149},
  address   = {Barcelona},
  abstract  = {The Word Wide Web is a continuous challenge to machine learning.
               Established approaches have to be enhanced and new methods be
               developed in order to tackle the problem of finding and organising
               relevant information. It has often been motivated that semantic
               classifications of input documents help solving this task. But
               while approaches of supervised text categorisation perform quite
               well on genres found in written text, newly evolved genres on
               the web are much more demanding. In order to successfully develop
               approaches to web mining, respective corpora are needed. However,
               the composition of genre- or domain-specific web corpora is still
               an unsolved problem. It is time consuming to build large corpora
               of good quality because web pages typically lack reliable meta
               information. Wikipedia along with similar approaches of collaborative
               text production offers a way out of this dilemma. We examine how
               social tagging, as supported by the MediaWiki software, can be
               utilised as a source of corpus building. Further, we describe
               a representation format for social ontologies and present the
               Wikipedia Category Explorer, a tool which supports categorical
               views to browse through the Wikipedia and to construct domain
               specific corpora for machine learning.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/webist_2007-gleim_mehler_dehmer_pustylnikov.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:Gleim:Wegner:2007,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Wegner, Armin},
  title     = {Structural Uncertainty of Hypertext Types. An Empirical Study},
  booktitle = {Proceedings of the Workshop "Towards Genre-Enabled Search Engines:
               The Impact of NLP", September, 30, 2007, in conjunction with RANLP
               2007, Borovets, Bulgaria},
  editor    = {Rehm, Georg and Santini, Marina},
  pages     = {13-19},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/RANLP.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:2007:d,
  author    = {Mehler, Alexander},
  title     = {Evolving Lexical Networks. A Simulation Model of Terminological Alignment},
  booktitle = {Proceedings of the Workshop on Language, Games, and Evolution
               at the 9th European Summer School in Logic, Language and Information
               (ESSLLI 2007), Trinity College, Dublin, 6-17 August},
  editor    = {Benz, Anton and Ebert, Christian and van Rooij, Robert},
  pages     = {57-67},
  abstract  = {In this paper we describe a simulation model of terminological
               alignment in a multiagent community. It is based on the notion
               of an association game which is used instead of the classical
               notion of a naming game (Steels, 1996). The simulation model integrates
               a small world-like agent community which restricts agent communication.
               We hypothesize that this restriction is decisive when it comes
               to simulate terminological alignment based on lexical priming.
               The paper presents preliminary experimental results in support
               of this hypothesis.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_d.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:Geibel:Gleim:Herold:Jain:Pustylnikov:2007,
  author    = {Mehler, Alexander and Geibel, Peter and Gleim, Rüdiger and Herold, Sebastian
               and Jain, Brijnesh-Johannes and Abramov, Olga},
  title     = {Much Ado About Text Content. Learning Text Types Solely by Structural
               Differentiae},
  booktitle = {Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
               to Extract Semantic Knowledge from Structured Information},
  editor    = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
  series    = {Publications of the Institute of Cognitive Science
                   (PICS)},
  pages     = {63-71},
  address   = {Osnabrück},
  abstract  = {In this paper, we deal with classifying texts into classes which
               denote text types whose textual instances serve more or less homogeneous
               functions. Other than mainstream approaches to text classification,
               which rely on the vector space model [30] or some of its descendants
               [2] and, thus, on content-related lexical features, we solely
               refer to structural differentiae, that is, to patterns of text
               structure as determinants of class membership. Further, we suppose
               that text types span a type hierarchy based on the type-subtype
               relation [31]. Thus, although we admit that class membership is
               fuzzy so that overlapping classes are inevitable, we suppose a
               non-overlapping type system structured into a rooted tree – whether
               solely based on functional or additional on, e.g., content- or
               mediabased criteria [1]. What regards criteria of goodness of
               classification, we perform a classical supervised categorization
               experiment [30] based on cross-validation as a method of model
               selection [11]. That is, we perform a categorization experiment
               in which for all training and test cases class membership is known
               ex ante. In summary, we perform a supervised experiment of text
               classification in order to learn functionally grounded text types
               where membership to these types is solely based on structural
               criteria.},
  pdf       = {http://ikw.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Geibel_abstract.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Dehmer:Mehler:Emmert-Streib:2007:a,
  author    = {Dehmer, Matthias and Mehler, Alexander and Emmert-Streib, Frank},
  title     = {Graph-theoretical Characterizations of Generalized Trees},
  booktitle = {Proceedings of the 2007 International Conference on Machine Learning:
               Models, Technologies \& Applications (MLMTA '07), June 25-28,
               2007, Las Vegas},
  pages     = {113-117},
  website   = {https://www.researchgate.net/publication/221188591_Graph-theoretical_Characterizations_of_Generalized_Trees},
  year      = {2007}
}

Rüdiger Gleim, Alexander Mehler and Hans-Jürgen Eikmeyer. 2007. Representing and Maintaining Large Corpora. Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK).

BibTeX

@inproceedings{Gleim:Mehler:Eikmeyer:2007:a,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer, Hans-Jürgen},
  title     = {Representing and Maintaining Large Corpora},
  booktitle = {Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK)},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_eikmeyer_2007_a.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Geibel:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
  author    = {Geibel, Peter and Abramov, Olga and Mehler, Alexander and Gust, Helmar
               and Kühnberger, Kai-Uwe},
  title     = {Classification of Documents Based on the Structure of Their DOM Trees},
  booktitle = {Proceedings of ICONIP 2007 (14th International Conference on Neural
               Information Processing)},
  series    = {Lecture Notes in Computer Science 4985},
  pages     = {779–788},
  publisher = {Springer},
  abstract  = {In this paper, we discuss kernels that can be applied for the
               classification of XML documents based on their DOM trees. DOM
               trees are ordered trees in which every node might be labeled by
               a vector of attributes including its XML tag and the textual content.
               We describe five new kernels suitable for such structures: a kernel
               based on predefined structural features, a tree kernel derived
               from the well-known parse tree kernel, the set tree kernel that
               allows permutations of children, the string tree kernel being
               an extension of the so-called partial tree kernel, and the soft
               tree kernel as a more efficient alternative. We evaluate the kernels
               experimentally on a corpus containing the DOM trees of newspaper
               articles and on the well-known SUSANNE corpus.},
  website   = {http://www.springerlink.com/content/x414002113425742/},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:Waltinger:Wegner:2007:a,
  author    = {Mehler, Alexander and Waltinger, Ulli and Wegner, Armin},
  title     = {A Formal Text Representation Model Based on Lexical Chaining},
  booktitle = {Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial
               Data (LNVD 2007) September 10, Osnabrück},
  editor    = {Geibel, Peter and Jain, Brijnesh J.},
  pages     = {17-26},
  address   = {Osnabrück},
  publisher = {Universit{\"a}t Osnabrück},
  abstract  = {This paper presents a formal text representation model as an alternative
               to the vector space model. It combines a tree-like model with
               graph-inducing lexical relations. The paper aims at formalizing
               two yet unrelated approaches, i.e. lexical chaining [3] and quantitative
               structure analysis [9], in order to combine content and structure
               modeling.},
  pdf       = {http://www.ulliwaltinger.de/pdf/LNVD07MehlerWaltingerWegner.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Asadullah:Zahurul:Khan:2007,
  author    = {Asadullah, Munshi and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Error-tolerant Finite-state Recognizer and String Pattern Similarity
               Based Spell-Checker for Bengali},
  booktitle = {5th International Conference on Natural Language Processing (ICON)
               as a poster,Hyderabad, India, January 2007},
  abstract  = {A crucial figure of merit for a spelling checker is not just whether
               it can detect misspelled words, but also in how it ranks the sugges
               tions for the word. Spelling checker algorithms using edit distance
               methods tend to produce a large number of possibilities for misspelled
               words. We propose an alternative approach to checking the spelling
               of Bangla text that uses a finite state automaton (FSA) to probabilistically
               create the suggestion list for a misspelled word. FSA has proven
               to be an effective method for problems requiring probabilistic
               solution and high error tolerance. We start by using a finite
               state representation for all the words in the Bangla dictionary;
               the algorithm then uses the state tables to test a string, and
               in case of an erroneous string, try to find all possible solutions
               by attempting singular and multi - step transitions to consume
               one or more characters and using the su bsequent characters as
               look - ahead; and finally, we use backtracking to add each possible
               solution to the suggestion list. The use of finite state representation
               for the word implies that the algorithm is much more efficient
               in the case of non - inflected for ms; in case of nouns, it is
               even more significant as Bangla nouns are heavily used in the
               non - inflected form. In terms of error detection and correction,
               the algorithm uses the statistics of Bangla error pattern and
               thus produces a small number of signific ant suggestions. One
               notable limitation is the inability to handle transposition errors
               as a single edit distance errors. This is not as significant as
               it may seem since the number of transposition errors are not as
               common as other errors in Bangla. This p aper presents the structure
               and the algorithm to implement a Practical Bangla spell - checker,
               and discusses the results obtained from the prototype implementation.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Error-tolerant_Finite-state_Recognizer_and_String_Pattern_Similarity_Based_Spell-Checker_for_Bengali.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

BibTeX

@inproceedings{Zahurul:Uddin:Khan:2007,
  author    = {Islam, Md. Zahurul and Uddin, Md. Nizam and Khan, Mumit},
  title     = {A Light Weight Stemmer for Bengali and Its Use in Spelling Checker},
  booktitle = {1st International Conference on Digital Communications and Computer
               Applications (DCCA2007)},
  abstract  = {Stemming is an operation that splits a word into the constituent
               root part and affix without doing complete morphological analysis.
               It is used to impr ove the performance of spelling checkers and
               informatio n retrieval applications, where morphological analysi
               would be too computationally expensive. For spellin g checkers
               specifically, using stemming may drastical ly reduce the dictionary
               size, often a bottleneck for mobile and embedded devices. This
               paper presents a computationally inexpensive stemming algorithm
               for Bengali, which handles suffix removal in a domain independent
               way. The evaluation of the proposed algorithm in a Bengali spelling
               checker indicates t hat it can be effectively used in information
               retrieval applications in general.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/A_Light_Weight_Stemmer_for_Bengali_and_Its_Use_in_Spelling_Checker.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

BibTeX

@inproceedings{Zahurul:Khan:2007,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {Bangla Verb Morphology and a Multilingual Computational Morphology
               FrameWork for PC-KIMMO},
  booktitle = {The Proceedings of Workshop on Morpho - Syntactic Analysis by
               the School of Asian Applied Natural Language Processing for Language
               Diversity and Language Resource Development (ADD), Bangkok, Thailand},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Bangla_Verb_Morphology_and_a_Multilingual_Computational_Morphology_FrameWork_for_PC-KIMMO-talk.pdf},
  timestamp = {2011.08.02},
  year      = {2007}
}

BibTeX

@inproceedings{Abramov:Mehler:2007:b,
  author    = {Abramov, Olga and Mehler, Alexander},
  title     = {Structural Differentiae of Text Types. A Quantitative Model},
  booktitle = {Proceedings of the 31st Annual Conference of the German Classification
               Society on Data Analysis, Machine Learning, and Applications (GfKl)},
  pages     = {655–662},
  pdf       = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/gfkl.pdf},
  website   = {http://www.springerprofessional.de/077---structural-differentiae-of-text-types--a-quantitative-model/1957362.html},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:Storrer:2007,
  author    = {Mehler, Alexander and Storrer, Angelika},
  title     = {What are Ontologies Good For? Evaluating Terminological Ontologies
               in the Framework of Text Graph Classification},
  booktitle = {Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
               to Extract Semantic Knowledge from Structured Information},
  editor    = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
  series    = {Publications of the Institute of Cognitive Science
                   (PICS)},
  pages     = {11-18},
  address   = {Osnabrück},
  pdf       = {http://cogsci.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Storrer_abstract.pdf},
  website   = {http://citeseer.uark.edu:8080/citeseerx/viewdoc/summary?doi=10.1.1.91.2979},
  year      = {2007}
}

BibTeX

@inproceedings{Stuehrenberg:Goecke:Diewald:Mehler:Cramer:2007:a,
  author    = {Stührenberg, Maik and Goecke, Daniela and Diewald, Nils and Mehler, Alexander
               and Cramer, Irene},
  title     = {Web-based Annotation of Anaphoric Relations and Lexical Chains},
  booktitle = {Proceedings of the Linguistic Annotation Workshop, ACL 2007},
  pages     = {140–147},
  pdf       = {http://www.aclweb.org/anthology/W07-1523},
  website   = {https://www.researchgate.net/publication/234800610_Web-based_annotation_of_anaphoric_relations_and_lexical_chains},
  year      = {2007}
}

BibTeX

@inproceedings{Ferrer:i:Cancho:Mehler:Pustylnikov:Diaz-Guilera:2007:a,
  author    = {Ferrer i Cancho, Ramon and Mehler, Alexander and Abramov, Olga
               and Díaz-Guilera, Albert},
  title     = {Correlations in the organization of large-scale syntactic dependency networks},
  booktitle = {Proceedings of Graph-based Methods for Natural Language Processing
               (TextGraphs-2) at the Annual Conference of the North American
               Chapter of the Association for Computational Linguistics (NAACL-HLT
               2007), Rochester, New York},
  pages     = {65-72},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ferrer-i-cancho_mehler_pustylnikov_diaz-guilera_2007_a.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Gleim:Mehler:Eikmeyer:Rieser:2007,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer, Hans-Jürgen
               and Rieser, Hannes},
  title     = {Ein Ansatz zur Repr{\"a}sentation und Verarbeitung gro{\ss}er
               Korpora multimodaler Daten},
  booktitle = {Data Structures for Linguistic Resources and Applications. Proceedings
               of the Biennial GLDV Conference 2007, 11.–13. April, Universit{\"a}t
               Tübingen},
  editor    = {Rehm, Georg and Witt, Andreas and Lemnitzer, Lothar},
  pages     = {275-284},
  address   = {Tübingen},
  publisher = {Narr},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_eikmeyer_rieser_2007.pdf},
  year      = {2007}
}

BibTeX

@inproceedings{Geibel:Krumnack:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
  author    = {Geibel, Peter and Krumnack, Ulf and Abramov, Olga and Mehler, Alexander
               and Gust, Helmar and Kühnberger, Kai-Uwe},
  title     = {Structure-Sensitive Learning of Text Types},
  booktitle = {Proceedings of AI 2007: Advances in Artificial Intelligence, 20th
               Australian Joint Conference on Artificial Intelligence, Gold Coast,
               Australia, December 2-6, 2007},
  editor    = {Orgun, Mehmet A. and Thornton, John},
  volume    = {4830},
  series    = {Lecture Notes in Computer Science},
  pages     = {642-646},
  publisher = {Springer},
  abstract  = {In this paper, we discuss the structure based classification of
               documents based on their logical document structure, i.e., their
               DOM trees. We describe a method using predefined structural features
               and also four tree kernels suitable for such structures. We evaluate
               the methods experimentally on a corpus containing the DOM trees
               of newspaper articles, and on the well-known SUSANNE corpus. We
               will demonstrate that, for the two corpora, many text types can
               be learned based on structural features only.},
  website   = {http://www.springerlink.com/content/w574377ww1h6m212/},
  year      = {2007}
}

BibTeX

@inproceedings{Mehler:Gleim:Dehmer:2006,
  author    = {Mehler, Alexander and Gleim, Rüdiger and Dehmer, Matthias},
  title     = {Towards Structure-Sensitive Hypertext Categorization},
  booktitle = {Proceedings of the 29th Annual Conference of the German Classification
               Society, March 9-11, 2005, Universit{\"a}t Magdeburg},
  editor    = {Spiliopoulou, Myra and Kruse, Rudolf and Borgelt, Christian and Nürnberger, Andreas
               and Gaul, Wolfgang},
  pages     = {406-413},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {Hypertext categorization is the task of automatically assigning
               category labels to hypertext units. Comparable to text categorization
               it stays in the area of function learning based on the bag-of-features
               approach. This scenario faces the problem of a many-to-many relation
               between websites and their hidden logical document structure.
               The paper argues that this relation is a prevalent characteristic
               which interferes any effort of applying the classical apparatus
               of categorization to web genres. This is confirmed by a threefold
               experiment in hypertext categorization. In order to outline a
               solution to this problem, the paper sketches an alternative method
               of unsupervised learning which aims at bridging the gap between
               statistical and structural pattern recognition (Bunke et al. 2001)
               in the area of web mining.},
  website   = {http://www.springerlink.com/content/l7665tm3u241317l/},
  year      = {2006}
}

BibTeX

@inproceedings{Gleim:Mehler:Dehmer:2006:a,
  author    = {Gleim, Rüdiger and Mehler, Alexander and Dehmer, Matthias},
  title     = {Web Corpus Mining by Instance of Wikipedia},
  booktitle = {Proceedings of the EACL 2006 Workshop on Web as Corpus, April
               3-7, 2006, Trento, Italy},
  editor    = {Kilgariff, Adam and Baroni, Marco},
  pages     = {67-74},
  abstract  = {Workshop organizer: Adam Kilgarriff},
  pdf       = {http://www.aclweb.org/anthology/W06-1710},
  website   = {http://pub.uni-bielefeld.de/publication/1773538},
  year      = {2006}
}

Alexander Mehler. 2006. In Search of a Bridge Between Network Analysis in Computational Linguistics and Computational Biology-A Conceptual Note.. BIOCOMP, 496–502.

BibTeX

@inproceedings{mehler:2006,
  author    = {Mehler, Alexander},
  title     = {In Search of a Bridge Between Network Analysis in Computational
               Linguistics and Computational Biology-A Conceptual Note.},
  booktitle = {BIOCOMP},
  pages     = {496--502},
  pdf       = {https://pdfs.semanticscholar.org/81aa/0b840ed413089d69908cff60628a92609ccd.pdf},
  year      = {2006}
}

Tim vor der Brück and Stephan Busemann. October, 2006. Automatic Error Correction for Tree-Mapping Grammars. Proceedings of KONVENS 2006, 1–8.

BibTeX

@inproceedings{vor:der:Brueck:Busemann:2006,
  author    = {vor der Brück, Tim and Busemann, Stephan},
  title     = {Automatic Error Correction for Tree-Mapping Grammars},
  booktitle = {Proceedings of KONVENS 2006},
  pages     = {1--8},
  address   = {Konstanz, Germany},
  abstract  = {Tree mapping grammars are used in natural language generation
               (NLG) to map non-linguistic input onto a derivation tree from
               which the target text can be trivially read off as the terminal
               yield. Such grammars may consist of a large number of rules. Finding
               errors is quite tedious and sometimes very time-consuming. Often
               the generation fails because the relevant input subtree is not
               specified correctly. This work describes a method to detect and
               correct wrong assignments of input subtrees to grammar categories
               by cross-validating grammar rules with the given input structures.
               The result is implemented in a grammar development workbench and
               helps accelerating the grammar writer's work considerably.},
  annote    = {editor: Miriam Butt},
  isbn      = {3-89318-050-8},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck-busemann-konvens06.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/brueck-busemann-konvens06.pdf},
  website   = {http://www.dfki.de/lt/publication_show.php?id=3602},
  year      = {2006}
}

BibTeX

@inproceedings{Kranstedt:et:al:2006:c,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Staudacher, Marc},
  title     = {Measuring and Reconstructing Pointing in Visual Contexts},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {82--89},
  address   = {Potsdam},
  publisher = {Universit{\"a}tsverlag Potsdam},
  abstract  = {We describe an experiment to gather original data on geometrical
               aspects of pointing. In particular, we are focusing upon the concept
               of the pointing cone, a geometrical model of a pointing’s extension.
               In our setting we employed methodological and technical procedures
               of a new type to integrate data from annotations as well as from
               tracker recordings. We combined exact information on position
               and orientation with rater’s classifications. Our first results
               seem to challenge classical linguistic and philosophical theories
               of demonstration in that they advise to separate pointings from
               reference.},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/measure.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.144.8472},
  year      = {2006}
}

BibTeX

@inproceedings{Luecking:Rieser:Staudacher:2006:a,
  author    = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
  title     = {Multi-modal Integration for Gesture and Speech},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {106--113},
  address   = {Potsdam},
  publisher = {Universit{\"a}tsverlag Potsdam},
  abstract  = {Demonstratives, in particular gestures that 'only' accompany speech,
               are not a big issue in current theories of grammar. If we deal
               with gestures, fixing their function is one big problem, the other
               one is how to integrate the representations originating from different
               channels and, ultimately, how to determine their composite meanings.
               The growing interest in multi-modal settings, computer simulations,
               human-machine interfaces and VR-applications increases the need
               for theories of multi-modal structures and events. In our workshop-contribution
               we focus on the integration of multi-modal contents and investigate
               different approaches dealing with this problem such as Johnston
               et al. (1997) and Johnston (1998), Johnston and Bangalore (2000),
               Chierchia (1995), Asher (2005), and Rieser (2005).},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mm-int-brandial-final.pdf},
  year      = {2006}
}

BibTeX

@inproceedings{Pfeiffer:Kranstedt:Luecking:2006,
  author    = {Pfeiffer, Thies and Kranstedt, Alfred and Lücking, Andy},
  title     = {Sprach-Gestik Experimente mit IADE, dem Interactive Augmented Data Explorer},
  booktitle = {Proceedings: Dritter Workshop Virtuelle und Erweiterte Realit{\"a}t
               der GI-Fachgruppe VR/AR},
  address   = {Koblenz},
  abstract  = {Für die empirische Erforschung natürlicher menschlicher Kommunikation
               sind wir auf die Akquise und Auswertung umfangreicher Daten angewiesen.
               Die Modalit{\"a}ten, über die sich Menschen ausdrücken können,
               sind sehr unterschiedlich - und genauso verschieden sind die Repr{\"a}sentationen,
               mit denen sie für die Empirie verfügbar gemacht werden können.
               Für eine Untersuchung des Zeigeverhaltens bei der Referenzierung
               von Objekten haben wir mit IADE ein Framework für die Aufzeichnung,
               Analyse und Resimulation von Sprach-Gestik Daten entwickelt. Mit
               dessen Hilfe können wir für unsere Forschung entscheidende Fortschritte
               in der linguistischen Experimentalmethodik machen.},
  keywords  = {own},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Pfeiffer-Kranstedt-Luecking-IADE.pdf},
  website   = {http://pub.uni-bielefeld.de/publication/2426853},
  year      = {2006}
}

BibTeX

@inproceedings{Luecking:Rieser:Stauchdacher:2006:b,
  author    = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
  title     = {SDRT and Multi-modal Situated Communication},
  booktitle = {brandial '06 -- Proceedings of the 10th Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {David Schlangen and Raquel Fernández},
  pages     = {72--79},
  publisher = {Universit{\"a}tsverlag Potsdam},
  keywords  = {own},
  month     = {9},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/sdrt-sitcomm-brandial-final.pdf},
  year      = {2006}
}

BibTeX

@inproceedings{Zahurul:Khan:2006,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {JKimmo: A Multilingual Computational Morphology Framework for PC-KIMMO},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  abstract  = {Morphological analysis is of fundamental interest in computational
               linguistics and language processing. While there are established
               morphological analyzers for mostly Western and a few other languages
               using localized interfaces, the same cannot be said for Indic
               and other less-studied languages for which language processing
               is just beginning. There are three primary obstacles to computational
               morphological analysis of these less-studied languages: the generative
               rules that define the language morphology, the morphological processor,
               and the computational interface that a linguist can use to experiment
               with the generative rules. In this paper, we present JKimmo, a
               multilingual morphological open-source framework that uses the
               PC-KIMMO two-level morphological processor and provides a localized
               interface for Bangla morphological analysis. We then apply Jkimmo
               to Bangla computational morphology, demonstrating both its recognition
               and generation capabilities. Jkimmo’s internationalization (i18n)
               frame-work allows easy localization in other languages as well,
               using a property file for the interface definitions and a transliteration
               scheme for the analysis.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/JKimmo_-A_Multilingual_Computational_Morphology_Framework_for_PC-KIMMO.pdf},
  timestamp = {2011.08.02},
  website   = {https://www.researchgate.net/publication/237728403_JKimmo_A_Multilingual_Computational_Morphology_Framework_for_PC-KIMMO},
  year      = {2006}
}

BibTeX

@inproceedings{Rownok:Zahurul:Khan:2006,
  author    = {Rownok, Tofazzal and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Bangla Text Input and Rendering Support for Short Message Service
               on Mobile Devices},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  abstract  = {Technology is the most important thing that involve in our everyday
               life. It is involving in almost every aspect of life like communication,
               work, shopping, recreation etc. Communication through mobile devices
               is the most effective and easy way now a day. It is faster, easier
               and you can communicate whenever you want from any-where. Mobile
               messaging or short message service is one of the popular ways
               to communicate using mobile devices. It is a big challenge to
               write and display Bangla characters on mobile devices. In this
               paper, we describe a Bangla text input method and rendering support
               on mobile devices for short message service.},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Bangla_Text_Input_and_Rendering_Support_for_Short_Message_Service_on_Mobile_Devices.pdf},
  timestamp = {2011.08.02},
  year      = {2006}
}

BibTeX

@inproceedings{Arafat:Zahurul:Khan:2006,
  author    = {Arafat, Yeasir and Islam, Md. Zahurul and Khan, Mumit},
  title     = {Analysis and Observations From a Bangla news corpus},
  booktitle = {9th International Conference on Computer and Information Technology
               (ICCIT 2006), Dhaka, Bangladesh},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Analysis_and_Observations_From_a_Bangla_news_corpus.pdf},
  timestamp = {2011.08.02},
  year      = {2006}
}

BibTeX

@inproceedings{Gleim:2006,
  author    = {Gleim, Rüdiger},
  title     = {HyGraph - Ein Framework zur Extraktion, Repr{\"a}sentation und
               Analyse webbasierter Hypertextstrukturen},
  booktitle = {Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen.
               Beitr{\"a}ge zur GLDV-Tagung 2005, Universit{\"a}t Bonn},
  editor    = {Fisseni, Bernhard and Schmitz, Hans-Christian and Schröder, Bernhard
               and Wagner, Petra},
  pages     = {42-53},
  address   = {Frankfurt a. M.},
  publisher = {Lang},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/GLDV2005-HyGraph-Framework.pdf},
  website   = {https://www.researchgate.net/publication/268294000_HyGraph__Ein_Framework_zur_Extraktion_Reprsentation_und_Analyse_webbasierter_Hypertextstrukturen},
  year      = {2006}
}

BibTeX

@inproceedings{Mehler:2006:c,
  author    = {Mehler, Alexander},
  title     = {Text Linkage in the Wiki Medium – A Comparative Study},
  booktitle = {Proceedings of the EACL Workshop on New Text – Wikis and blogs
               and other dynamic text sources, April 3-7, 2006, Trento, Italy},
  editor    = {Karlgren, Jussi},
  pages     = {1-8},
  abstract  = {Workshop organizer: Jussi Karlgren},
  pdf       = {http://www.aclweb.org/anthology/W06-2801},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.6390},
  year      = {2006}
}

BibTeX

@inproceedings{Mehler:Sichelschmidt:2006,
  author    = {Mehler, Alexander and Sichelschmidt, Lorenz},
  title     = {Reconceptualizing Latent Semantic Analysis in Terms of Complex
               Network Theory. A Corpus-Linguistic Approach},
  booktitle = {2nd International Conference of the German Cognitive Linguistics
               Association – Theme Session: Cognitive-Linguistic Approaches:
               What can we gain by computational treatment of data? 5.-7. Oktober
               2006, Ludwig-Maximilians-Universit{\"a}t München},
  pages     = {23-26},
  editors   = {Alonge, Antonietta and Lönneker-Rodman, Birte},
  pdf       = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.87.5069&rep=rep1&type=pdf},
  year      = {2006}
}

BibTeX

@inproceedings{Mehler:Dehmer:Gleim:2006,
  author    = {Mehler, Alexander and Dehmer, Matthias and Gleim, Rüdiger},
  title     = {Towards Logical Hypertext Structure - A Graph-Theoretic Perspective},
  booktitle = {Proceedings of the Fourth International Workshop on Innovative
               Internet Computing Systems (I2CS '04)},
  editor    = {Böhme, Thomas and Heyer, Gerhard},
  series    = {Lecture Notes in Computer Science 3473},
  pages     = {136-150},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {Facing the retrieval problem according to the overwhelming set
               of documents online the adaptation of text categorization to web
               units has recently been pushed. The aim is to utilize categories
               of web sites and pages as an additional retrieval criterion. In
               this context, the bag-of-words model has been utilized just as
               HTML tags and link structures. In spite of promising results this
               adaptation stays in the framework of IR specific models since
               it neglects the content-based structuring inherent to hypertext
               units. This paper approaches hypertext modelling from the perspective
               of graph-theory. It presents an XML-based format for representing
               websites as hypergraphs. These hypergraphs are used to shed light
               on the relation of hypertext structure types and their web-based
               instances. We place emphasis on two characteristics of this relation:
               In terms of realizational ambiguity we speak of functional equivalents
               to the manifestation of the same structure type. In terms of polymorphism
               we speak of a single web unit which manifests different structure
               types. It is shown that polymorphism is a prevalent characteristic
               of web-based units. This is done by means of a categorization
               experiment which analyses a corpus of hypergraphs representing
               the structure and content of pages of conference websites. On
               this background we plead for a revision of text representation
               models by means of hypergraphs which are sensitive to the manifold
               structuring of web documents.},
  website   = {http://rd.springer.com/chapter/10.1007/11553762_14},
  year      = {2006}
}

BibTeX

@inproceedings{Mehler:2006:a,
  author    = {Mehler, Alexander},
  title     = {In Search of a Bridge between Network Analysis in Computational
               Linguistics and Computational Biology – A Conceptual Note},
  booktitle = {Proceedings of the 2006 International Conference on Bioinformatics
               \& Computational Biology (BIOCOMP '06), June 26, 2006, Las Vegas,
               USA},
  editor    = {Arabnia, Hamid R. and Valafar, Homayoun},
  pages     = {496-500},
  pdf       = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.92.9842&rep=rep1&type=pdf},
  year      = {2006}
}

BibTeX

@inproceedings{Dehmer:Emmert:Streib:Mehler:Kilian:Muehlhaeuser:2005,
  author    = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander
               and Kilian, Jürgen and Mühlh{\"a}user, Max},
  title     = {Application of a similarity measure for graphs to web-based document structures},
  booktitle = {Proceedings of VI. International Conference on Enformatika, Systems
               Sciences and Engineering, Budapest, Hungary, October 2005, International
               Academy of Sciences: Enformatika 8 (2005)},
  pages     = {77-81},
  abstract  = {Due to the tremendous amount of information provided by the World
               Wide Web (WWW) developing methods for mining the structure of
               web-based documents is of considerable interest. In this paper
               we present a similarity measure for graphs representing web-based
               hypertext structures. Our similarity measure is mainly based on
               a novel representation of a graph as linear integer strings, whose
               components represent structural properties of the graph. The similarity
               of two graphs is then defined as the optimal alignment of the
               underlying property strings. In this paper we apply the well known
               technique of sequence alignments for solving a novel and challenging
               problem: Measuring the structural similarity of generalized trees.
               In other words: We first transform our graphs considered as high
               dimensional objects in linear structures. Then we derive similarity
               values from the alignments of the property strings in order to
               measure the structural similarity of generalized trees. Hence,
               we transform a graph similarity problem to a string similarity
               problem for developing a efficient graph similarity measure. We
               demonstrate that our similarity measure captures important structural
               information by applying it to two different test sets consisting
               of graphs representing web-based document structures.},
  pdf       = {http://waset.org/publications/15299/application-of-a-similarity-measure-for-graphs-to-web-based-document-structures},
  website   = {https://www.researchgate.net/publication/238687277_Application_of_a_Similarity_Measure_for_Graphs_to_Web-based_Document_Structures},
  year      = {2005}
}

BibTeX

@inproceedings{Mehler:2005:c,
  author    = {Mehler, Alexander},
  title     = {Preliminaries to an Algebraic Treatment of Lexical Associations},
  booktitle = {Learning and Extending Lexical Ontologies. Proceedings of the
               Workshop at the 22nd International Conference on Machine Learning
               (ICML '05), August 7-11, 2005, Universit{\"a}t Bonn, Germany},
  editor    = {Biemann, Chris and Paa{\ss}, Gerhard},
  pages     = {41-47},
  year      = {2005}
}

BibTeX

@inproceedings{Mehler:Gleim:2005:a,
  author    = {Mehler, Alexander and Gleim, Rüdiger},
  title     = {Polymorphism in Generic Web Units. A corpus linguistic study},
  booktitle = {Proceedings of Corpus Linguistics '05, July 14-17, 2005, University
               of Birmingham, Great Britian},
  volume    = {Corpus Linguistics Conference Series 1(1)},
  abstract  = {Corpus linguistics and related disciplines which focus on statistical
               analyses of textual units have substantial need for large corpora.
               More speciﬁcally, genre or register speciﬁc corpora are needed
               which allow studying variations in language use. Along with the
               incredible growth of the internet, the web became an important
               source of linguistic data. Of course, web corpora face the same
               problem of acquiring genre speciﬁc corpora. Amongst other things,
               web mining is a framework of methods for automatically assigning
               category labels to web units and thus may be seen as a solution
               to this corpus acquisition problem as far as genre categories
               are applied. The paper argues that this approach is faced with
               the problem of a many-to-many relation between expression units
               on the one hand and content or function units on the other hand.
               A quantitative study is performed which supports the argumentation
               that functions of web-based communication are very often concentrated
               on single web pages and thus interfere any effort of directly
               applying the classical apparatus of categorization on web page
               level. The paper outlines a two-level algorithm as an alternative
               approach to category assignment which is sensitive to genre speciﬁc
               structures and thus may be used to tackle the problem of acquiring
               genre speciﬁc corpora.},
  issn      = {1747-9398},
  pdf       = {http://www.birmingham.ac.uk/Documents/college-artslaw/corpus/conference-archives/2005-journal/Thewebasacorpus/AlexanderMehlerandRuedigerGleimCorpusLinguistics2005.pdf},
  year      = {2005}
}

BibTeX

@inproceedings{Mehler:Dehmer:Gleim:2005,
  author    = {Mehler, Alexander and Dehmer, Matthias and Gleim, Rüdiger},
  title     = {Zur Automatischen Klassifikation von Webgenres},
  booktitle = {Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen.
               Beitr{\"a}ge zur GLDV-Frühjahrstagung '05, 10. M{\"a}rz – 01.
               April 2005, Universit{\"a}t Bonn},
  editor    = {Fisseni, Bernhard and Schmitz, Hans-Christina and Schröder, Bernhard
               and Wagner, Petra},
  pages     = {158-174},
  address   = {Frankfurt a. M.},
  publisher = {Lang},
  year      = {2005}
}

BibTeX

@inproceedings{Zahurul:Khan:2005,
  author    = {Islam, Md. Zahurul and Khan, Mumit},
  title     = {Teaching Compiler Development to Undergraduates using a Template Based Approach},
  booktitle = {8th International Conference on Computer and Information Technology
               (ICCIT 2005), Dhaka, Bangladesh},
  abstract  = {Compiler Design remains one of the most dreaded courses in any
               undergraduate Computer Science curriculum, due in part to the
               complexity and the breadth of the material covered in a typical
               14-15 week semester time frame. The situation is further complicated
               by the fact that most undergraduates have never implemented a
               large enough software package that is needed for a working compiler,
               and to do so in such a short time span is a challenge indeed.
               This necessitates changes in the way we teach compilers, and specifically
               in ways we set up the project for the Compiler Design course at
               the undergraduate level. We describe a template based method for
               teaching compiler design and implementation to the undergraduates,
               where the students fill in the blanks in a set of templates for
               each phase of the compiler, starting from the lexical scanner
               to the code generator. Compilers for new languages can be implemented
               by modifying only the parts necessary to implement the syntax
               and the semantics of the language, leaving much of the remaining
               environment as is. The students not only learn how to design the
               various phases of the compiler, but also learn the software design
               and engineering techniques for implementing large software systems.
               In this paper, we describe a compiler teaching methodology that
               implements a full working compiler for an imperative C-like programming
               language with backend code generators for MIPS, Java Virtual Machine
               (JVM) and Microsoft’s .NET Common Language Runtime (CLR).},
  owner     = {zahurul},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Teaching_Compiler_Development_to_Undergraduates_using_a_Template_Based_Approach.pdf},
  timestamp = {2011.08.02},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.173.1323},
  year      = {2005}
}

Alexander Mehler. 2005. Lexical Chaining as a Source of Text Chaining. Proceedings of the 1st Computational Systemic Functional Grammar Conference, University of Sydney, Australia, 12–21.

BibTeX

@inproceedings{Mehler:2005:d,
  author    = {Mehler, Alexander},
  title     = {Lexical Chaining as a Source of Text Chaining},
  booktitle = {Proceedings of the 1st Computational Systemic Functional Grammar
               Conference, University of Sydney, Australia},
  editor    = {Patrick, Jon and Matthiessen, Christian},
  pages     = {12-21},
  abstract  = {July 16, 2005,},
  pdf       = {http://www.www.texttechnologylab.org/media/pdf/CohesionTrees1.pdf},
  year      = {2005}
}

Andreas Eisele and Tim vor der Brück. October, 2004. Error-Tolerant Finite-State Lookup for Trademark Search. 27th German Conference on Artificial Intelligence (KI). Springer Best Paper Award.

BibTeX

@inproceedings{Eisele:vor:der:Brueck:2004,
  author    = {Eisele, Andreas and vor der Brück, Tim},
  title     = {Error-Tolerant Finite-State Lookup for Trademark Search},
  booktitle = {27th German Conference on Artificial Intelligence (KI)},
  editor    = {Susanne Biundo},
  address   = {Ulm, Germany},
  publisher = {Springer},
  note      = {Springer Best Paper Award},
  abstract  = {Error-tolerant lookup of words in large vocabularies hasmany potential
               uses, both within and beyond natural language processing (NLP).
               This work describes a generic library for finite-state-based lexical
               lookup, originally designed for NLP-related applications, that
               can be adapted to application-specific error metrics. We show
               how this tool can be used for searching existing trademarks in
               a database, using orthographic and phonetic similarity. We sketch
               a prototypical implementation of a trademark search engine and
               show results of a preliminary evaluation of this system.},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/eisele_brueck_2004.pdf},
  specialnote = {Best Paper Award},
  specialnotewebsite = {http://www.springerlink.com/content/e98tbd0jv9clnh2m/},
  website   = {http://www.springerlink.com/content/e98tbd0jv9clnh2m/},
  year      = {2004}
}

Michael Rohn, Wolfgang Raatz and Tim vor der Brück. October, 2004. Objektive Optimierung der lokalen Wettervorhersage. DACH Meteorologenkonferenz.

BibTeX

@inproceedings{Rohn:Raatz:vor:der:Brueck:2004,
  author    = {Rohn, Michael and Raatz, Wolfgang and vor der Brück, Tim},
  title     = {Objektive Optimierung der lokalen Wettervorhersage},
  booktitle = {DACH Meteorologenkonferenz},
  address   = {Karlsruhe, Germany},
  abstract  = {Die lokale Wettervorhersage umfa{\ss}t einen Zeitraum von 0 bis
               178 Stunden und mu{\ss} daher die unterschiedlichsten Punktinformationen
               aus den Ergebnissen der numerischen Modellierung, konventioneller
               Beobachtungen von Bodenwetterelementen sowie Nowcasting-Produkten
               integrieren. Dabei liefern die Verfahren oft unterschiedliche
               Punktprognosen. Um eine Endvorhersage oder Guidance abzuleiten,
               müssen alle verfügbaren Informationen bezüglich ihrer Qualit{\"a}t
               bewertet werden, sodann eine Auswahl getroffen, und abschlie{\ss}end
               zu einer einzigen Aussage kombiniert werden. Dieses Problem von
               Selektion und Kombination verschiedener Vorhersageinformationen
               wird anschaulich von Winkler 1989 aus der Perspektive der Entscheidungstheorie
               beschrieben. In der t{\"a}glichen Routine arbeit des Vorhersagemeteorologen
               wird diese Integration 'intuitiv' vollzogen, basierend auf seiner
               meteorologischen Erfahrung über die synoptische Situation sowie
               seiner Kenntnisse der lokalen Charakteristika des Prognoseortes.
               Der DWDplant, den Vorhersageproze{\ss} durch ein Verfahren 'ObjektiveOptimierung'
               zu unterstützen, welches eine sog. Objektiv Optimierte Guidance
               OOG erzeugt. Das Verfahren umfa{\ss}t objektive Ans{\"a}tze zur
               Kombination verschiedener Vorhersagedaten sowie die kontinuierliche
               Aktualisierung durch Beobachtungs- und Nowcastingdaten.},
  month     = {October},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/021_RoRaBr.pdf},
  url       = {http://pi7.fernuni-hagen.de/brueck/papers/021_RoRaBr.pdf},
  year      = {2004}
}

BibTeX

@inproceedings{Luecking:Rieser:Stegmann:2004,
  author    = {Lücking, Andy and Rieser, Hannes and Stegmann, Jens},
  title     = {Statistical Support for the Study of Structures in Multi-Modal
               Dialogue: Inter-Rater Agreement and Synchronization},
  booktitle = {Catalog '04---Proceedings of the Eighth Workshop on the Semantics
               and Pragmatics of Dialogue},
  editor    = {Jonathan Ginzburg and Enric Vallduví},
  pages     = {56--63},
  address   = {Barcelona},
  organization = {Department of Translation and Philology, Universitat
                   Pompeu Fabra},
  abstract  = {We present a statistical approach to assess relations that hold
               among speech and pointing gestures in and between turns in task-oriented
               dialogue. The units quantified over are the time-stamps of the
               XML-based annotation of the digital video data. It was found that,
               on average, gesture strokes do not exceed, but are freely distributed
               over the time span of their linguistic affiliates. Further, the
               onset of the affiliate was observed to occur earlier than gesture
               initiation. Moreover, we found that gestures do obey certain appropriateness
               conditions and contribute semantic content ('gestures save words')
               as well. Gestures also seem to play a functional role wrt dialogue
               structure: There is evidence that gestures can contribute to the
               bundle of features making up a turn-taking signal. Some statistical
               results support a partitioning of the domain, which is also reflected
               in certain rating difficulties. However, our evaluation of the
               applied annotation scheme generally resulted in very good agreement},
  keywords  = {own},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/08-lucking-etal.pdf},
  year      = {2004}
}

BibTeX

@inproceedings{Mehler:2004:c,
  author    = {Mehler, Alexander},
  title     = {A Data-Oriented Model of Context in Hypertext Authoring},
  booktitle = {Proceedings of the 7th International Workshop on Organisational
               Semiotics (OS '04), July 19-20, 2004, Setúbal, Portugal},
  editor    = {Filipe, Joaquim and Liu, Kecheng},
  pages     = {24-45},
  address   = {Setúbal},
  publisher = {INSTICC},
  pdf       = {http://www.orgsem.org/papers/02.pdf},
  website   = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.121.7944},
  year      = {2004}
}

BibTeX

@inproceedings{Dehmer:Mehler:Gleim:2004,
  author    = {Dehmer, Matthias and Mehler, Alexander and Gleim, Rüdiger},
  title     = {Aspekte der Kategorisierung von Webseiten},
  booktitle = {INFORMATIK 2004 – Informatik verbindet, Band 2, Beitr{\"a}ge der
               34. Jahrestagung der Gesellschaft für Informatik e.V. (GI). Workshop
               Multimedia-Informationssysteme},
  editor    = {Dadam, Peter and Reichert, Manfred},
  volume    = {51},
  series    = {Lecture Notes in Informatics},
  pages     = {39-43},
  publisher = {GI},
  abstract  = {Im Zuge der Web-basierten Kommunikation tritt die Frage auf, inwiefern
               Webpages zum Zwecke ihrer inhaltsorientierten Filterung kategorisiert
               werden können. Diese Studie untersucht zwei Ph{\"a}nomene, welche
               die Bedingung der Möglichkeit einer solchen Kategorisierung betreffen
               (siehe [6]): Mit dem Begriff der funktionalen Aquivalenz beziehen
               wir uns auf das Ph{\"a}nomen, dass dieselbe Funktions- oder Inhaltskategorie
               durch völlig verschiedene Bausteine Web-basierter Dokumente manifestiert
               werden kann. Mit dem Begriff des Polymorphie beziehen wir uns
               auf das Ph{\"a}nomen, dass dasselbe Dokument zugleich mehrere
               Funktions- oder Inhaltskategorien manifestieren kann. Die zentrale
               Hypothese lautet, dass beide Ph{\"a}nomene für Web-basierte Hypertextstrukturen
               charakteristisch sind. Ist dies der Fall, so kann die automatische
               Kategorisierung von Hypertexten [2, 10] nicht mehr als eindeutige
               Zuordnung verstanden werden, bei der einem Dokument genau eine
               Kategorie zugeordnet wird. In diesem Sinne thematisiert das Papier
               die Frage nach der ad{\"a}quaten Modellierung multimedialer Dokumente.},
  pdf       = {http://subs.emis.de/LNI/Proceedings/Proceedings51/GI-Proceedings.51-11.pdf},
  website   = {https://www.researchgate.net/publication/221385316_Aspekte_der_Kategorisierung_von_Webseiten},
  year      = {2004}
}

Alexander Mehler and Siegfried Reich. 2003. Guided Tours + Trails := Guided Trails. Poster at the 14th ACM Conference on Hypertext and Hypermedia (Hypertext '03), Nottingham, August 26-30, 1–2.

BibTeX

@inproceedings{Mehler:Reich:2003,
  author    = {Mehler, Alexander and Reich, Siegfried},
  title     = {Guided Tours + Trails := Guided Trails},
  booktitle = {Poster at the 14th ACM Conference on Hypertext and Hypermedia
               (Hypertext '03), Nottingham, August 26-30},
  pages     = {1-2},
  website   = {http://www.sigweb.org/Ht03posters},
  year      = {2003}
}

BibTeX

@inproceedings{Mehler:Clarke:2002,
  author    = {Mehler, Alexander and Clarke, Rodney},
  title     = {Systemic Functional Hypertexts. An Architecture for Socialsemiotic
               Hypertext Systems},
  booktitle = {New Directions in Humanities Computing. The 14th Joint International
               Conference of the Association for Literary and Linguistic Computing
               and the Association for Computers and the Humanities (ALLC/ACH
               '02), July 24-28, University of Tübingen},
  pages     = {68-69},
  year      = {2002}
}

BibTeX

@inproceedings{Mehler:2002:e,
  author    = {Mehler, Alexander},
  title     = {Text Mining with the Help of Cohesion Trees},
  booktitle = {Classification, Automation, and New Media. Proceedings of the
               24th Annual Conference of the Gesellschaft für Klassifikation,
               March 15-17, 2000, Universit{\"a}t Passau},
  editor    = {Gaul, Wolfgang and Ritter, Gunter},
  pages     = {199-206},
  address   = {Berlin/New York},
  publisher = {Springer},
  abstract  = {In the framework of automatic text processing, semantic spaces
               are used as a format for modeling similarities of natural language
               texts represented as vectors. They prove to be efficient in divergent
               areas, as information retrieval (Dumais 1995), computational psychology
               (Landauer, Dumais 1997), and computational linguistics (Rieger
               1995; Mehler 1998). In order to group semantically similar texts,
               cluster analysis is used. A central problem of this method relates
               to the difficulty to name clusters, whereas lists neglect the
               polyhierarchical structure of semantic spaces. This paper introduces
               the concept of cohesion tree as an alternative tool for exploring
               similarity relations of texts represented in high dimensional
               spaces. Cohesion trees allow the perspective evaluation of numerically
               represented text similarities. They depart from minimal spanning
               trees (MST) by context-sensitively optimizing path costs. This
               central property underlies the linguistic interpretation of cohesion
               trees: instead of manifesting context-free associations, they
               model context priming effects.},
  website   = {http://www.springerlink.com/content/x484814744877078/},
  year      = {2002}
}

BibTeX

@inproceedings{Mehler:2002:f,
  author    = {Mehler, Alexander},
  title     = {Cohesive Paths: Applying the Concept of Cohesion to Hypertext},
  booktitle = {Sprachwissenschaft auf dem Weg in das dritte Jahrtausend. Proceedings
               of the 34th Linguistics Colloquium, September 7-10, 1999, Universit{\"a}t
               Mainz},
  editor    = {Rapp, Reinhard},
  pages     = {725-733},
  address   = {Frankfurt a. M.},
  publisher = {Peter Lang},
  year      = {2002}
}

BibTeX

@inproceedings{Mehler:2002:k,
  author    = {Mehler, Alexander},
  title     = {Hierarchical Orderings of Textual Units},
  booktitle = {Proceedings of the 19th International Conference on Computational
               Linguistics (COLING '02), August 24 – September 1, 2002, Taipei,
               Taiwan},
  pages     = {646-652},
  address   = {San Francisco},
  publisher = {Morgan Kaufmann},
  abstract  = {Text representation is a central task for any approach to automatic
               learning from texts. It requires a format which allows to interrelate
               texts even if they do not share content words, but deal with similar
               topics. Furthermore, measuring text similarities raises the question
               of how to organize the resulting clusters. This paper presents
               cohesion trees (CT) as a data structure for the perspective, hierarchical
               organization of text corpora. CTs operate on alternative text
               representation models taking lexical organization, quantitative
               text characteristics, and text structure into account. It is shown
               that CTs realize text linkages which are lexically more homogeneous
               than those produced by minimal spanning trees.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2002_k.pdf},
  year      = {2002}
}

BibTeX

@inproceedings{Clarke:Mehler:1999,
  author    = {Clarke, Rodney and Mehler, Alexander},
  title     = {Theorising Print Media in Contexts: A Systemic Semiotic Contribution
               to Computational Semiotics},
  booktitle = {Proceedings of the 7th International Congress of the IASS-AIS:
               International Association for Semiotic Studies – Sign Processes
               in Complex Systems, Dresden, University of Technology, October
               6-11},
  year      = {1999}
}

BibTeX

@inproceedings{Mehler:1999,
  author    = {Mehler, Alexander},
  title     = {Aspects of Text Semantics in Hypertext},
  booktitle = {Returning to our Diverse Roots. Proceedings of the 10th ACM Conference
               on Hypertext and Hypermedia (Hypertext '99), February 21-25, 1999,
               Technische Universit{\"a}t Darmstadt},
  editor    = {Tochtermann, Klaus and Westbomke, Jörg and Wiil, Uffe K. and Leggett, John J.},
  pages     = {25-26},
  address   = {New York},
  publisher = {ACM Press},
  pdf       = {{http://dl.acm.org/ft_gateway.cfm?id=294477&ftid=30049&dwn=1&CFID=722943569&CFTOKEN=97409508}},
  website   = {http://dl.acm.org/citation.cfm?id=294477},
  year      = {1999}
}

BibTeX

@inproceedings{Mehler:1998,
  author    = {Mehler, Alexander},
  title     = {Toward Computational Aspects of Text Semiotics},
  booktitle = {Proceedings of the 1998 Joint Conference of IEEE ISIC, IEEE CIRA,
               and ISAS on the Science and Technology of Intelligent Systems,
               September 14-17, 1998, NIST, Gaithersburg, USA},
  editor    = {Albus, James and Meystel, Alex},
  pages     = {807-813},
  address   = {Gaithersburg},
  publisher = {IEEE},
  website   = {http://www.researchgate.net/publication/3766784_Toward_computational_aspects_of_text_semiotics},
  year      = {1998}
}

BibTeX

@inproceedings{Mehler:1996:a,
  author    = {Mehler, Alexander},
  title     = {A Multiresolutional Approach to Fuzzy Text Meaning -- a First Attempt},
  booktitle = {Proceedings of the 1996 International Multidisciplinary Conference
               on Intelligent Systems: A Semiotic Perspective, Gaithersburg,
               Maryland, October 20-23},
  editor    = {Albus, James and Meystel, Alex and Quintero, Richard},
  volume    = {I},
  pages     = {261-273},
  address   = {Gaithersburg},
  publisher = {National Institute of Standards and Technology (NIST)},
  year      = {1996}
}

Miscellaneous

Andy Lücking and Alexander Mehler. 2026–01–28/2026–01–30. Sprachbegleitende Gesten, KI und Virtuelle Realität. Invited talk.

BibTeX

@misc{Luecking:Mehler:2026,
  author    = {Lücking, Andy and Mehler, Alexander},
  title     = {{Sprachbegleitende Gesten, KI und Virtuelle Realität}},
  subtitle  = {{Multimodale Kommunikationsforschung im Schnittfeld von Linguistik und Computerwissenschaft}},
  howpublished = {Invited talk at DaFWEBKON26, Webkonferenz für
                  Deutschlehrende},
  date      = {2026-01-28/2026-01-30},
  url       = {https://dafwebkon.com/events/sprachbegleitende-gesten/},
  keywords  = {talk, cosgrin-vr},
  note      = {Invited talk},
  abstract  = {Alltagskommunikation ist üblicherweise multimodal (d.h., nutzt
               mehr als einen Informationskanal). Gesprochene Sprache wird beispielsweise
               von manuellen Gesten begleitet. Diese Gesten wiederum können über
               die linguistische Bedeutung hinausgehende Information beitragen.
               Sie sind also semantisch interessant.<br><br>Der Vortrag skizziert
               eine räumliche Gestensemantik und führt in KI-gestützte Gestenklassifikation
               ein. Um multimodale Verhaltensdaten zu erfassen und auszuwerten,
               werden zunehmend Methoden der Virtuellen Realität (VR) eingesetzt.
               Das Frankfurter Va.Si.Li-Lab kombiniert KI und VR für Multimodalitätsforschung.
               Auf diese Weise lassen sich z.B. mutlimodal, avatarbasierte VR-Interaktionen
               untersuchen und mit Face-to-face-Interaktionen vergleichen. Der
               Vortrag stellt erste Ergebnisse vor.}
}

Andy Lücking. 2025–12–01/2025–12–02. Formal and Computational Iconic Gesture Semantics. Invited talk.

BibTeX

@misc{Luecking:2025-zif,
  author    = {Lücking, Andy},
  keywords  = {cosgrin-vr},
  title     = {Formal and Computational Iconic Gesture Semantics},
  howpublished = {Invited talk at the ZiF Workshop \textit{Multimodal
                  Creativity}, Zentrum für interdisziplinäre
                  Forschung, Universität Bielefeld},
  note      = {Invited talk},
  date      = {2025-12-01/2025-12-02}
}

Andy Lücking and Alexander Henlein. 2025–07–28/2025–08–08. Spatial Gesture Semantics. ESSLLI 2025 Advanced Course, Ruhr University Bochum.

BibTeX

@misc{Luecking:Henlein:2025-esslli,
  author    = {Lücking, Andy and Henlein, Alexander},
  year      = {2025},
  date      = {2025-07-28/2025-08-08},
  title     = {Spatial Gesture Semantics},
  howpublished = {ESSLLI 2025 Advanced Course, Ruhr University Bochum},
  note      = {ESSLLI 2025 Advanced Course, Ruhr University Bochum},
  url       = {https://aluecking.github.io/ESSLLI2025/},
  keywords  = {gemdis}
}

Andy Lücking. 2025–09–24. From Gesture Representation to Spatial Gesture Semantics. Invited talk.

BibTeX

@misc{Luecking:2025-mmsr,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {From Gesture Representation to Spatial Gesture Semantics},
  howpublished = {Invited talk at the IWCS Workshop \textit{Beyond
                  Language: Multimodal Semantic Representations} (MMSR
                  II), Heinrich Heine University, Düsseldorf},
  date      = {2025-09-24},
  note      = {Invited talk},
  url       = {https://mmsr-workshop.github.io/}
}

Andy Lücking. 2024–03–14. Gesture semantics: Deictic Reference, deferred reference, and iconic co-speech gestures. Invited talk.

BibTeX

@misc{Luecking:2024-quebec,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Gesture semantics: Deictic Reference, deferred reference, and
               iconic co-speech gestures},
  howpublished = {Invited talk at Stevan Harnad's interdisciplinary
                  seminar series in Cognitive Informatics at the
                  Université du Québec à Montréal},
  note      = {Invited talk},
  date      = {2024-03-14}
}

Ali Abusaleh and Mehdi Rahim. 2024. A Multitask VAE for Time Series Preprocessing and Prediction of Blood Glucose Level.

BibTeX

@misc{Abusaleh:Rahim:2024,
  title     = {A Multitask VAE for Time Series Preprocessing and Prediction of
               Blood Glucose Level},
  author    = {Ali Abusaleh and Mehdi Rahim},
  year      = {2024},
  eprint    = {2410.00015},
  archiveprefix = {arXiv},
  primaryclass = {eess.SP},
  url       = {https://arxiv.org/abs/2410.00015}
}

BibTeX

@misc{Owoyele:et:al:2024,
  title     = {MaskAnyone Toolkit: Offering Strategies for Minimizing Privacy
               Risks and Maximizing Utility in Audio-Visual Data Archiving},
  author    = {Babajide Alamu Owoyele and Martin Schilling and Rohan Sawahn and Niklas Kaemer
               and Pavel Zherebenkov and Bhuvanesh Verma and Wim Pouw and Gerard de Melo},
  year      = {2024},
  eprint    = {2408.03185},
  archiveprefix = {arXiv},
  primaryclass = {cs.CR},
  url       = {https://arxiv.org/abs/2408.03185}
}

Bhuvanesh Verma and Lisa Raithel. 2024. DFKI-NLP at SemEval-2024 Task 2: Towards Robust LLMs Using Data Perturbations and MinMax Training.

BibTeX

@misc{Verma:Raithel:2024,
  title     = {DFKI-NLP at SemEval-2024 Task 2: Towards Robust LLMs Using Data
               Perturbations and MinMax Training},
  author    = {Bhuvanesh Verma and Lisa Raithel},
  year      = {2024},
  eprint    = {2405.00321},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url       = {https://arxiv.org/abs/2405.00321}
}

Andy Lücking. 2022–05–03. Pointing: From reference to attention and back. Invited talk.

BibTeX

@misc{Luecking:2022-bochum,
  author    = {Lücking, Andy},
  keywords  = {gemdis},
  title     = {Pointing: From reference to attention and back},
  howpublished = {Invited talk at the Language Colloquium,
                  Ruhr-Universit{\"a}t Bochum},
  note      = {Invited talk},
  date      = {2022-05-03}
}

Arne Binder, Bhuvanesh Verma and Leonhard Hennig. 2022. Full-Text Argumentation Mining on Scientific Publications.

BibTeX

@misc{Binder:et:al:2022,
  title     = {Full-Text Argumentation Mining on Scientific Publications},
  author    = {Arne Binder and Bhuvanesh Verma and Leonhard Hennig},
  year      = {2022},
  eprint    = {2210.13084},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url       = {https://arxiv.org/abs/2210.13084}
}

BibTeX

@misc{Konca:et:al:2022,
  author    = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander and Nagel, Marie-Theres
               and Zlatkin-Troitschanskaia, Olga},
  howpublished = {Presentation given at the AERA annual meeting, 21.-26.04. 2022, WERA symposium},
  month     = {04},
  title     = {Computational educational linguistics for `Critical Online Reasoning'
               among young professionals in medicine, law and teaching},
  year      = {2022},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2022/10/BRIDGE_WERA_AERA-2022_reduce.pdf}
}

BibTeX

@misc{Mehler:et:al:2022,
  author    = {Mehler, Alexander and Konca, Maxim and Nagel, Marie-Theres and L\"{u}cking, Andy
               and Zlatkin-Troitschanskaia, Olga},
  year      = {2022},
  month     = {03},
  howpublished = {Presentation at BEBF 2022},
  title     = {On latent domain-specific textual preferences in solving Internet-based
               generic tasks among graduates/young professionals from three domains},
  abstract  = {Although Critical Online Reasoning (COR) is often viewed as a
               general competency (e.g. Alexander et al. 2016), studies have
               found evidence supporting their domain-specificity (Toplak et
               al. 2002). To investigate this assumption, we focus on commonalities
               and differences in textual preferences in solving COR-related
               tasks between graduates/young professionals from three domains.
               For this reason, we collected data by requiring participants to
               solve domain-specific (DOM-COR) and generic (GEN-COR) tasks in
               an authentic Internet-based COR performance assessment (CORA),
               allowing us to disentangle the assumed components of COR abilities.
               Here, we focus on GEN-COR to distinguish between different groups
               of graduates from the three disciplines in the context of generic
               COR tasks. We present a computational model for educationally
               relevant texts that combines features at multiple levels (lexical,
               syntactic, semantic). We use machine learning to predict domain-specific
               group membership based on documents consulted during task solving.
               A major contribution of our analyses is a multi-part text classification
               system that contrasts human annotation and rating of the documents
               used with a semi-automatic classification to predict the document
               type of web pages. That is, we work with competing classifications
               to support our findings. In this way, we develop a computational
               linguistic model that correlates GEN-COR abilities with properties
               of documents consulted for solving the GEN-COR tasks. Results
               show that participants from different domains indeed inquire different
               sets of online sources for the same task. Machine learning-based
               classifications show that the distributional differences can be
               reproduced by computational linguistic models.},
  pdf       = {https://www.texttechnologylab.org/wp-content/uploads/2022/04/On_latent_domain-specific_textual_preferences_in_solving_Internet-based_generic_tasks_among_graduates__young_professionals_from_three_domains.pdf}
}

Giuseppe Abrami, Sajawel Ahmed, Rüdiger Gleim, Wahed Hemati, Alexander Mehler and Uslu Tolga. March, 2018. Natural Language Processing and Text Mining for BIOfid.

BibTeX

@misc{Abrami:et:al:2018b,
  author    = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed
               and Mehler, Alexander and Uslu Tolga},
  title     = {{Natural Language Processing and Text Mining for BIOfid}},
  howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
  adress    = {Goethe-University, Frankfurt am Main, Germany},
  year      = {2018},
  month     = {March},
  day       = {08},
  pdf       = {}
}

Alexander Mehler, Andy Lücking, Tim vor der Brück and Giuseppe Abrami. November, 2013. WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors.

BibTeX

@misc{Mehler:Luecking:vor:der:Brueck:2013:a,
  author    = {Mehler, Alexander and Lücking, Andy and vor der Brück, Tim and Abrami, Giuseppe},
  title     = {WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors},
  howpublished = {Poster Presentation at the Scientific Computing and
                   Cultural Heritage 2013 Conference, Heidelberg},
  keywords  = {wikinect},
  month     = {11},
  poster    = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SCCHPoster2013.pdf},
  url       = {http://scch2013.wordpress.com/},
  year      = {2013}
}

Andy Lücking. May, 2013. Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz von Gestik in der Aphasietherapie.

BibTeX

@misc{Luecking:2013:c,
  author    = {Lücking, Andy},
  title     = {Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz
               von Gestik in der Aphasietherapie},
  howpublished = {Talk at the BKL workshop 2013, Bochum},
  month     = {05},
  url       = {http://www.bkl-ev.de/bkl_workshop/archiv/workshop13_programm.php},
  year      = {2013}
}

Andy Lücking. October, 2013. Eclectic Semantics for Non-Verbal Signs.

BibTeX

@misc{Luecking:2013:d,
  author    = {Lücking, Andy},
  title     = {Eclectic Semantics for Non-Verbal Signs},
  howpublished = {Talk at the Conference on Investigating semantics:
                   Empirical and philosophical approaches, Bochum},
  month     = {10},
  url       = {http://www.ruhr-uni-bochum.de/phil-lang/investigating/index.html},
  year      = {2013}
}

Andy Lücking and Kirsten Bergmann. July, 2010. Introducing the Bielefeld SaGA Corpus.

BibTeX

@misc{Luecking:Bergmann:2010,
  author    = {Andy L\"{u}cking and Kirsten Bergmann},
  title     = {Introducing the {B}ielefeld {SaGA} Corpus},
  howpublished = {Talk given at \textit{Gesture: Evolution, Brain, and
                   Linguistic Structures.} 4th Conference of the
                   International Society for Gesture Studies (ISGS).
                   Europa Universit\"{a}t Viadrina Frankfurt/Oder},
  abstract  = {People communicate multimodally. Most prominently, they co-produce
               speech and gesture. How do they do that? Studying the interplay
               of both modalities has to be informed by empirically observed
               communication behavior. We present a corpus built of speech and
               gesture data gained in a controlled study. We describe 1) the
               setting underlying the data; 2) annotation of the data; 3) reliability
               evalution methods and results; and 4) applications of the corpus
               in the research domain of speech and gesture alignment.},
  address   = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
  day       = {28},
  month     = {07},
  year      = {2010}
}

Alfred Kranstedt, Andy Lücking, Thies Pfeiffer, Hannes Rieser and Marc Staudacher. June, 2007. Locating Objects by Pointing.

BibTeX

@misc{Kranstedt:et:al:2007,
  author    = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
               and Staudacher, Marc},
  title     = {Locating Objects by Pointing},
  howpublished = {3rd International Conference of the International
                   Society for Gesture Studies. Evanston, IL, USA},
  keywords  = {own},
  month     = {6},
  year      = {2007}
}

PhD Theses

Alexander Henlein. 2023. PhD Thesis: Toward context-based text-to-3D scene generation.

BibTeX

@phdthesis{Henlein:2023,
  author    = {Alexander Henlein},
  title     = {Toward context-based text-to-3D scene generation},
  type      = {doctoralthesis},
  pages     = {199},
  school    = {Johann Wolfgang Goethe-Universität},
  doi       = {10.21248/gups.73448},
  year      = {2023},
  pdf       = {https://publikationen.ub.uni-frankfurt.de/files/73448/main.pdf},
  keywords  = {gemdis}
}

Tolga Uslu. 2020. PhD Thesis: Multi-document analysis : semantic analysis of large text corpora beyond topic modeling.

BibTeX

@phdthesis{Uslu:2020,
  author    = {Tolga Uslu},
  title     = {Multi-document analysis : semantic analysis of large text corpora
               beyond topic modeling},
  pages     = {204},
  year      = {2020},
  url       = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56140},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/56140/Dissertation_Tolga_Uslu.pdf}
}

Wahed Hemati. 2020. PhD Thesis: TextImager-VSD : large scale verb sense disambiguation and named entity recognition in the context of TextImager.

BibTeX

@phdthesis{Hemati:2020,
  author    = {Wahed Hemati},
  title     = {TextImager-VSD : large scale verb sense disambiguation and named
               entity recognition in the context of TextImager},
  pages     = {174},
  year      = {2020},
  url       = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56089},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/56089/dissertation_Wahed_Hemati.pdf}
}

Armin Hoenen. 2018. PhD Thesis: Tools, evaluation and preprocessing for stemmatology.

BibTeX

@phdthesis{Hoenen2018,
  type      = {Dissertation},
  author    = {Armin Hoenen},
  title     = {Tools, evaluation and preprocessing for stemmatology},
  school    = {Goethe University Frankfurt},
  year      = {2018}
}

Mohammad Zahurul Islam. 2015. PhD Thesis: Multilingual text classification using information-theoretic features.

BibTeX

@phdthesis{Islam:2015,
  author    = {Mohammad Zahurul Islam},
  title     = {Multilingual text classification using information-theoretic features},
  pages     = {189},
  year      = {2015},
  pdf       = {http://publikationen.ub.uni-frankfurt.de/files/38157/thesis.pdf},
  abstract  = {The number of multilingual texts in the World Wide Web (WWW) is
               increasing dramatically and a multilingual economic zone like
               the European Union (EU) requires the availability of multilingual
               Natural Language Processing (NLP) tools. Due to a rapid development
               of NLP tools, many lexical, syntactic, semantic and other linguistic
               features have been used in different NLP applications. However,
               there are some situations where these features can not be used
               due the application type or unavailability of NLP resources for
               some of the languages. That is why an application that is intended
               to handle multilingual texts must have features that are not dependent
               on a particular language and specific linguistic tools. In this
               thesis, we will focus on two such applications: text readability
               and source and translation classification. In this thesis, we
               provide 18 features that are not only suitable for both applications,
               but are also language and linguistic tools independent. In order
               to build a readability classifier, we use texts from three different
               languages: English, German and Bangla. Our proposed features achieve
               a classification accuracy that is comparable with a classifier
               using 40 linguistic features. The readability classifier achieves
               a classification F-score of 74.21\% on the English Wikipedia corpus,
               an F-score of 75.47\% on the English textbook corpus, an F-score
               of 86.46\% on the Bangla textbook corpus and an F-score of 86.26\%
               on the German GEO/GEOLino corpus. We used more than two million
               sentence pairs from 21 European languages in order to build the
               source and translation classifier. The classifier using the same
               eighteen features achieves a classification accuracy of 86.63\%.
               We also used the same features to build a classifier that classifies
               translated texts based on their origin. The classifier achieves
               classification accuracy of 75\% for texts from 10 European languages.
               In this thesis, we also provide four different corpora, three
               for text readability analysis and one for corpus based translation
               studies.}
}

Olga Abramov. 2012. PhD Thesis: Network theory applied to linguistics: new advances in language classification and typology.

BibTeX

@phdthesis{Abramov:2012,
  author    = {Abramov, Olga},
  title     = {Network theory applied to linguistics: new advances in language
               classification and typology},
  school    = {Bielefeld University, Germany},
  abstract  = {This thesis bridges between two scientific fields -- linguistics
               and computer science -- in terms of Linguistic Networks. From
               the linguistic point of view we examine whether languages can
               be distinguished when looking at network topology of different
               linguistic networks. We deal with up to 17 languages and ask how
               far the methods of network theory reveal the peculiarities of
               single languages. We present and apply network models from different
               levels of linguistic representation: syntactic, phonological and
               morphological. The network models presented here allow to integrate
               various linguistic features at once, which enables a more abstract,
               holistic view at the particular language. From the point of view
               of computer science we elaborate the instrumentarium of network
               theory applying it to a new field. We study the expressiveness
               of different network features and their ability to characterize
               language structure. We evaluate the interplay of these features
               and their goodness in the task of classifying languages genealogically.
               Among others we compare network features related to: average degree,
               average geodesic distance, clustering, entropy-based indices,
               assortativity, centrality, compactness etc. We also propose some
               new indices that can serve as additional characteristics of networks.
               The results obtained show that network models succeed in classifying
               related languages, and allow to study language structure in general.
               The mathematical analysis of the particular network indices brings
               new insights into the nature of these indices and their potential
               when applied to different networks.},
  pdf       = {https://pub.uni-bielefeld.de/download/2538828/2542368},
  website   = {http://pub.uni-bielefeld.de/publication/2538828},
  year      = {2012}
}

Proceedings

Andy Lücking, Chiara Mazzocconi and Darinka Verdonik. 2023. Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue. SemDial 2023 – MariLogue. University of Maribor.

BibTeX

@proceedings{SemDial:2023-marilogue,
  title     = {Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue},
  keywords  = {own,editor},
  year      = {2023},
  editor    = {Lücking, Andy and Mazzocconi, Chiara and Verdonik, Darinka},
  editor+an = {1=highlight},
  series    = {SemDial 2023 -- MariLogue},
  publisher = {University of Maribor},
  url       = {https://www.semdial.org/anthology/events/semdial-2023/}
}

Techreports

BibTeX

@techreport{Artelt:et:al:2026,
  author    = {Cordula Artelt and Anika Schenck-Fontaine and Corinna Kleinert
               and Stefan Liebig and Alexander Mehler and Reinhard Pollak},
  title     = {Infrastructure Priority Programme "New Data Spaces for the Social
               Sciences" (SPP 2431) -- Programme Overview},
  institution = {New Data Spaces for the Social Sciences (SPP 2431)},
  series    = {New Data Spaces | Reports},
  number    = {1},
  year      = {2026},
  month     = {feb},
  keywords  = {spp},
  doi       = {10.5157/SPP2431:WP1:1.0},
  pdf       = {https://www.new-data-spaces.de/Portals/11/adam/Publication/ZhRUzWdz0kyeOzHlsyzWOw/PDFfile/WP-1 Programme Description.pdf}
}

Nasimeh Bahmanian, Mercedes Martinez Bruera, Andy Lücking, Leon Hammerla, Giuseppe Abrami, Manfred Sailer, Alexander Mehler and Sol Lago. 2025. Data management protocol for CRC 1629.

BibTeX

@techreport{Bahmanian:et:al:2025,
  author    = {Bahmanian, Nasimeh and Martinez Bruera, Mercedes and Lücking, Andy
               and Hammerla, Leon and Abrami, Giuseppe and Sailer, Manfred and Mehler, Alexander
               and Lago, Sol},
  title     = {Data management protocol for CRC 1629},
  institution = {CRC 1629 NegLaB - INF},
  year      = {2025},
  number    = {1},
  keywords  = {neglab},
  url       = {https://next.hessenbox.de/index.php/s/zQYBAfeXTJSDaib}
}

Andy Lücking and Jens Stegmann. 2005. Assessing Reliability on Annotations (2): Statistical Results for the DeiKon Scheme.

BibTeX

@techreport{Luecking:Stegmann:2005,
  author    = {Andy L\"{u}cking and Jens Stegmann},
  title     = {Assessing Reliability on Annotations (2): Statistical Results
               for the \textsc{DeiKon} Scheme},
  institution = {SFB 360},
  year      = {2005},
  number    = {3},
  address   = {Universit\"{a}t Bielefeld},
  url       = {http://www.sfb360.uni-bielefeld.de/reports/2005/2005-03.html}
}

Jens Stegmann and Andy Lücking. 2005. Assessing Reliability on Annotations (1): Theoretical Considerations.

BibTeX

@techreport{Stegmann:Luecking:2005,
  author    = {Jens Stegmann and Andy L\"{u}cking},
  title     = {Assessing Reliability on Annotations (1): Theoretical Considerations},
  institution = {SFB 360},
  year      = {2005},
  number    = {2},
  address   = {Universit\"{a}t Bielefeld},
  url       = {http://www.sfb360.uni-bielefeld.de/reports/2005/2005-02.html}
}