# Publications

## Total: 395

### 2021 (9)

• A. Mehler, D. Baumartz, and T. Uslu, “SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs,” in International Quantitative Linguistics Conference (QUALICO 2021), 2021.
[Poster][BibTeX]

@InProceedings{Mehler:Uslu:Baumartz:2021,
Author         = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
Title          = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
BookTitle      = {International Quantitative Linguistics Conference (QUALICO 2021)},
Series         = {QUALICO 2021},
location       = {Tokyo, Japan},
year           = {2021},
poster   = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}
• J. Ginzburg and A. Lücking, “Requesting clarifications with speech and gestures,” in Beyond Language: Multimodal Semantic Representations, 2021.
[BibTeX]

@InProceedings{Ginzburg:Luecking:2021:a,
title =     {Requesting clarifications with speech and gestures},
author =     {Ginzburg, Jonathan and L{\"u}cking, Andy},
booktitle =     {Beyond Language: Multimodal Semantic
Representations},
series =     {MMSR I},
year =     2021,
location =     {Virtually at the University of Groningen, held in
conjuction with IWCS 2021},
url =
{https://iwcs2021.github.io/proceedings/mmsr/pdf/2021.mmsr-1.3.pdf}
}
• A. Henlein, G. Abrami, A. Kett, C. Spiekermann, and A. Mehler, “Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine,” in Remote Learning in Times of Pandemic – Issues, Implications and Best Practice, L. Daniela and A. Visvizin, Eds., Thames, Oxfordshire, England, UK: Routledge, 2021.
[BibTeX]

@incollection{Henlein:et:al:2021,
author    = {Alexander Henlein and Giuseppe Abrami and Attila Kett and Christian Spiekermann and Alexander Mehler},
title     = {Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine},
editor      = "Linda Daniela and Anna Visvizin",
booktitle   = "Remote Learning in Times of Pandemic - Issues, Implications and Best Practice",
publisher   = "Routledge",
address     = "Thames, Oxfordshire, England, UK",
year        = 2021,
chapter     = 3
}
• A. Lücking, C. Driller, M. Stoeckel, G. Abrami, A. Pachzelt, and A. Mehler, “Multiple Annotation for Biodiversity: Developing an annotation framework among biology, linguistics and text technology,” Language Resources and Evaluation, 2021.
[BibTeX]

@Article{Luecking:et:al:2021,
author =     {Andy Lücking and Christine Driller and Manuel
Stoeckel and Giuseppe Abrami and Adrian Pachzelt and
Alexander Mehler},
year =     {2021},
journal = {Language Resources and Evaluation},
title =     {Multiple Annotation for Biodiversity: Developing an annotation framework among biology,
linguistics and text technology},
editor =     {Nancy Ide and Nicoletta Calzolari},
doi = {10.1007/s10579-021-09553-5},
}
• P. Fischer, A. Smajic, G. Abrami, and A. Mehler, “Multi-Type-TD-TSR – Extracting Tables from Document Images using a Multi-stage Pipeline for Table Detection and Table Structure Recognition: from OCR to Structured Table Representations,” in Proceedings of the 44th German Conference on Artificial Intelligence, 2021.
[BibTeX]

@InProceedings{Fischer:et:al:2021,
Author         = {Fischer, Pascal and Smajic, Alen and Abrami, Giuseppe and Mehler, Alexander},
Title          = {Multi-Type-TD-TSR - Extracting Tables from Document Images using a Multi-stage Pipeline for Table Detection and Table Structure Recognition: from OCR to Structured Table Representations},
BookTitle      = {Proceedings of the 44th German Conference on Artificial Intelligence},
Series         = {KI2021},
location       = {Berlin, Germany},
year           = {2021},
url     = {https://www.springerprofessional.de/multi-type-td-tsr-extracting-tables-from-document-images-using-a/19711570},
pdf     = {https://arxiv.org/pdf/2105.11021.pdf}
}
• M. Klement, A. Henlein, and A. Mehler, “VoxML Annotation Tool Review and Suggestions for Improvement,” in Proceedings of the Seventeenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation (ISA-17, Note for special track on visual information annotation), 2021.
[BibTeX]

@InProceedings{Klement:et:al:2021,
Author         = {Klement, Mark and Henlein, Alexander and Mehler, Alexander},
Title          = {VoxML Annotation Tool Review and Suggestions for Improvement},
BookTitle      = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-17, Note for special track on visual information annotation)},
Series         = {ISA-17},
location       = {Groningen, Netherlands},
month     = {June},
year           = {2021},
pdf      = {https://sigsem.uvt.nl/isa17/32_Klement-Paper.pdf}
}
• G. Abrami, A. Henlein, A. Lücking, A. Kett, P. Adeberg, and A. Mehler, “Unleashing annotations with TextAnnotator: Multimedia, multi-perspective document views for ubiquitous annotation,” in Proceedings of the Seventeenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation (ISA-17), 2021.
[BibTeX]

@InProceedings{Abrami:et:al:2021,
Author         = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila and Adeberg, Pascal and Mehler, Alexander},
Title          = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective document views for ubiquitous annotation},
BookTitle      = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-17)},
Series         = {ISA-17},
location       = {Groningen, Netherlands},
month     = {June},
year           = {2021},
pdf     = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf}
}
• A. Lücking, S. Brückner, G. Abrami, T. Uslu, and A. Mehler, “Computational linguistic assessment of textbooks and online texts by means of threshold concepts in economics,” Frontiers in Education, 2021.
[BibTeX]

@article{Luecking:Brueckner:Abrami:Uslu:Mehler:2021,
journal =     {Frontiers in Education},
doi =         {10.3389/feduc.2020.578475},
title =     {Computational linguistic assessment of textbooks and
online texts by means of threshold concepts in
economics},
author =     {L{\"u}cking, Andy and Br{\"u}ckner, Sebastian and
Abrami, Giuseppe and Uslu, Tolga and Mehler,
Alexander},
eid =         {578475},
url =
{https://www.frontiersin.org/articles/10.3389/feduc.2020.578475/},
year = {2021}
}
• A. Mehler, D. Baumartz, and T. Uslu, “SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs,” in International Quantitative Linguistics Conference (QUALICO 2021), 2021.
[BibTeX]

@InProceedings{Mehler:Uslu:Baumartz:2021,
Author         = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
Title          = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
BookTitle      = {International Quantitative Linguistics Conference (QUALICO 2021)},
Series         = {QUALICO 2021},
location       = {Tokyo, Japan},
year           = {2021}
}

### 2020 (21)

• J. Ginzburg and A. Lücking, “I thought pointing is rude: A dialogue-semantic analysis of pointing at the addressee,” in Proceedings of \textitSinn und Bedeutung 25, 2020. Special session: Gestures and Natural Language Semantics
[BibTeX]

@InProceedings{Ginzburg:Luecking:2020:b,
author =     {Ginzburg, Jonathan and L{\"u}cking, Andy},
title =     {I thought pointing is rude: {A} dialogue-semantic
analysis of pointing at the addressee},
booktitle =     {Proceedings of \textit{Sinn und Bedeutung 25}},
series =     {SuB 25},
year =     2020,
note =     {Special session: Gestures and Natural Language
Semantics},
location =     {Virtually at University College London}.
}
• T. Uslu, “Multi-document analysis : semantic analysis of large text corpora beyond topic modeling,” PhD Thesis, 2020.
[BibTeX]

@phdthesis{Uslu:2020,
author      = {Tolga Uslu},
title       = {Multi-document analysis : semantic analysis of large text corpora beyond topic modeling},
pages       = {204},
year        = {2020},
url         = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56140},
pdf        = {http://publikationen.ub.uni-frankfurt.de/files/56140/Dissertation_Tolga_Uslu.pdf}
}
• W. Hemati, “TextImager-VSD : large scale verb sense disambiguation and named entity recognition in the context of TextImager,” PhD Thesis, 2020.
[BibTeX]

@phdthesis{Hemati:2020,
author      = {Wahed Hemati},
title       = {TextImager-VSD : large scale verb sense disambiguation and named entity recognition in the context of TextImager},
pages       = {174},
year        = {2020},
url        = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56089},
pdf        = {http://publikationen.ub.uni-frankfurt.de/files/56089/dissertation_Wahed_Hemati.pdf}
}
• A. Mehler, W. Hemati, P. Welke, M. Konca, and T. Uslu, “Multiple Texts as a Limiting Factor in Online Learning: Quantifying (Dis-)similarities of Knowledge Networks,” Frontiers in Education, vol. 5, p. 206, 2020.
[Abstract] [BibTeX]

We test the hypothesis that the extent to which one obtains information on a given topic through Wikipedia depends on the language in which it is consulted. Controlling the size factor, we investigate this hypothesis for a number of 25 subject areas. Since Wikipedia is a central part of the web-based information landscape, this indicates a language-related, linguistic bias. The article therefore deals with the question of whether Wikipedia exhibits this kind of linguistic relativity or not. From the perspective of educational science, the article develops a computational model of the information landscape from which multiple texts are drawn as typical input of web-based reading. For this purpose, it develops a hybrid model of intra- and intertextual similarity of different parts of the information landscape and tests this model on the example of 35 languages and corresponding Wikipedias. In the way it measures the similarities of hypertexts, the article goes beyond existing approaches by examining their structural and semantic aspects intra- and intertextually. In this way it builds a bridge between reading research, educational science, Wikipedia research and computational linguistics.
@article{Mehler:Hemati:Welke:Konca:Uslu:2020,
abstract = {We test the hypothesis that the extent to which one obtains information on a given topic through Wikipedia depends on the language in which it is consulted. Controlling the size factor, we investigate this hypothesis for a number of 25 subject areas. Since Wikipedia is a central part of the web-based information landscape, this indicates a language-related, linguistic bias. The article therefore deals with the question of whether Wikipedia exhibits this kind of linguistic relativity or not. From the perspective of educational science, the article develops a computational model of the information landscape from which multiple texts are drawn as typical input of web-based reading. For this purpose, it develops a hybrid model of intra- and intertextual similarity of different parts of the information landscape and tests this model on the example of 35 languages and corresponding Wikipedias. In the way it measures the similarities of hypertexts, the article goes beyond existing approaches by examining their structural and semantic aspects intra- and intertextually. In this way it builds a bridge between reading research, educational science, Wikipedia research and computational linguistics.},
author = {Mehler, Alexander and Hemati, Wahed and Welke, Pascal and Konca, Maxim and Uslu, Tolga},
doi = {10.3389/feduc.2020.562670},
issn = {2504-284X},
journal = {Frontiers in Education},
pages = {206},
title = {Multiple Texts as a Limiting Factor in Online Learning: Quantifying (Dis-)similarities of Knowledge Networks},
url = {https://www.frontiersin.org/article/10.3389/feduc.2020.562670},
pdf = {https://www.frontiersin.org/articles/10.3389/feduc.2020.562670/pdf},
volume = {5},
year = {2020}
}
• A. Lücking, S. Brückner, G. Abrami, T. Uslu, and A. Mehler, “Computational linguistic assessment of textbook and online learning media by means of threshold concepts in business education,” CoRR, vol. abs/2008.02096, 2020.
[BibTeX]

@article{Luecking:et:al:2020,
author    = {Andy L{\"{u}}cking and
Sebastian Br{\"{u}}ckner and
Giuseppe Abrami and
Tolga Uslu and
Alexander Mehler},
title     = {Computational linguistic assessment of textbook and online learning
media by means of threshold concepts in business education},
journal   = {CoRR},
volume    = {abs/2008.02096},
year      = {2020},
url       = {https://arxiv.org/abs/2008.02096},
archivePrefix = {arXiv},
eprint    = {2008.02096},
timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
biburl    = {https://dblp.org/rec/journals/corr/abs-2008-02096.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
• D. Baumartz, “Automatic Topic Modeling in the Context of Digital Libraries: Mehrsprachige Korpus-basierte Erweiterung von text2ddc – eine experimentelle Studie,” , 2020.
[BibTeX]

@thesis{Baumartz:2020,
author = {Baumartz, Daniel},
title = {{Automatic Topic Modeling in the Context of Digital Libraries:
Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine experimentelle
Studie}},
year = 2020,
month = 6,
type = {bathesis},
school = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
url = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
pdf = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}
• C. Driller, M. Koch, G. Abrami, W. Hemati, A. Lücking, A. Mehler, A. Pachzelt, and G. Kasperek, “Fast and Easy Access to Central European Biodiversity Data with BIOfid,” Biodiversity Information Science and Standards, vol. 4, p. e59157, 2020.
[Abstract] [BibTeX]

The storage of data in public repositories such as the Global Biodiversity Information Facility (GBIF) or the National Center for Biotechnology Information (NCBI) is nowadays stipulated in the policies of many publishers in order to facilitate data replication or proliferation. Species occurrence records contained in legacy printed literature are no exception to this. The extent of their digital and machine-readable availability, however, is still far from matching the existing data volume (Thessen and Parr 2014). But precisely these data are becoming more and more relevant to the investigation of ongoing loss of biodiversity. In order to extract species occurrence records at a larger scale from available publications, one has to apply specialised text mining tools. However, such tools are in short supply especially for scientific literature in the German language.The Specialised Information Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims at reducing this desideratum, inter alia, by preparing a searchable text corpus semantically enriched by a new kind of multi-label annotation. For this purpose, we feed manual annotations into automatic, machine-learning annotators. This mixture of automatic and manual methods is needed, because BIOfid approaches a new application area with respect to language (mainly German of the 19th century), text type (biological reports), and linguistic focus (technical and everyday language).We will present current results of the performance of BIOfid’s semantic search engine and the application of independent natural language processing (NLP) tools. Most of these are freely available online, such as TextImager  (Hemati et al. 2016). We will show how TextImager is tied into the BIOfid pipeline and how it is made scalable (e.g. extendible by further modules) and usable on different systems (docker containers).Further, we will provide a short introduction to generating machine-learning training data using  TextAnnotator  (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility can be assessed by the implementation of inter-annotator agreement methods (Abrami et al. 2020). Beyond taxon recognition and entity linking, we place particular emphasis on location and time information. For this purpose, our annotation tag-set combines general categories and biology-specific categories (including taxonomic names) with location and time ontologies. The application of the annotation categories is regimented by annotation guidelines (Lücking et al. 2020). Within the next years, our work deliverable will be a semantically accessible and data-extractable text corpus of around two million pages. In this way, BIOfid is creating a new valuable resource that expands our knowledge of biodiversity and its determinants.
@article{Driller:et:al:2020,
author = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek},
title = {Fast and Easy Access to Central European Biodiversity Data with BIOfid},
volume = {4},
number = {},
year = {2020},
doi = {10.3897/biss.4.59157},
publisher = {Pensoft Publishers},
abstract = {The storage of data in public repositories such as the Global Biodiversity Information Facility (GBIF) or the National Center for Biotechnology Information (NCBI) is nowadays stipulated in the policies of many publishers in order to facilitate data replication or proliferation. Species occurrence records contained in legacy printed literature are no exception to this. The extent of their digital and machine-readable availability, however, is still far from matching the existing data volume (Thessen and Parr 2014). But precisely these data are becoming more and more relevant to the investigation of ongoing loss of biodiversity. In order to extract species occurrence records at a larger scale from available publications, one has to apply specialised text mining tools. However, such tools are in short supply especially for scientific literature in the German language.The Specialised Information Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims at reducing this desideratum, inter alia, by preparing a searchable text corpus semantically enriched by a new kind of multi-label annotation. For this purpose, we feed manual annotations into automatic, machine-learning annotators. This mixture of automatic and manual methods is needed, because BIOfid approaches a new application area with respect to language (mainly German of the 19th century), text type (biological reports), and linguistic focus (technical and everyday language).We will present current results of the performance of BIOfid’s semantic search engine and the application of independent natural language processing (NLP) tools. Most of these are freely available online, such as TextImager  (Hemati et al. 2016). We will show how TextImager is tied into the BIOfid pipeline and how it is made scalable (e.g. extendible by further modules) and usable on different systems (docker containers).Further, we will provide a short introduction to generating machine-learning training data using  TextAnnotator  (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility can be assessed by the implementation of inter-annotator agreement methods (Abrami et al. 2020). Beyond taxon recognition and entity linking, we place particular emphasis on location and time information. For this purpose, our annotation tag-set combines general categories and biology-specific categories (including taxonomic names) with location and time ontologies. The application of the annotation categories is regimented by annotation guidelines (Lücking et al. 2020). Within the next years, our work deliverable will be a semantically accessible and data-extractable text corpus of around two million pages. In this way, BIOfid is creating a new valuable resource that expands our knowledge of biodiversity and its determinants.},
issn = {},
pages = {e59157},
URL = {https://doi.org/10.3897/biss.4.59157},
eprint = {https://doi.org/10.3897/biss.4.59157},
journal = {Biodiversity Information Science and Standards}
}
• J. Ginzburg and A. Lücking, “On Laughter and Forgetting and Reconversing: A neurologically-inspired model of conversational context,” in Proceedings of The 24th Workshop on the Semantics and Pragmatics of Dialogue, 2020.
[BibTeX]

@InProceedings{Ginzburg:Luecking:2020:a,
author =     {Ginzburg, Jonathan and L{\"u}cking, Andy},
title =     {On Laughter and Forgetting and Reconversing: A neurologically-inspired model of conversational context},
booktitle =     {Proceedings of The 24th Workshop on the Semantics
and Pragmatics of Dialogue},
series =     {SemDial/WatchDial},
year =     2020,
location =     {Brandeis University, Waltham (Watch City), MA},
}
• A. Lücking and J. Ginzburg, “Towards the score of communication,” in Proceedings of The 24th Workshop on the Semantics and Pragmatics of Dialogue, 2020.
[BibTeX]

@InProceedings{Luecking:Ginzburg:2020,
author =     {L{\"u}cking, Andy and Ginzburg, Jonathan},
title =     {Towards the score of communication},
booktitle =     {Proceedings of The 24th Workshop on the Semantics
and Pragmatics of Dialogue},
series =     {SemDial/WatchDial},
year =     2020,
location =     {Brandeis University, Waltham (Watch City), MA},
}
• G. Abrami, A. Mehler, and M. Stoeckel, “TextAnnotator: A web-based annotation suite for texts,” in Proceedings of the Digital Humanities 2020, 2020.
[Abstract] [Poster][BibTeX]

The TextAnnotator is a tool for simultaneous and collaborative annotation of texts with visual annotation support, integration of knowledge bases and, by pipelining the TextImager, a rich variety of pre-processing and automatic annotation tools. It includes a variety of modules for the annotation of texts, which contains the annotation of argumentative, rhetorical, propositional and temporal structures as well as a module for named entity linking and rapid annotation of named entities. Especially the modules for annotation of temporal, argumentative and propositional structures are currently unique in web-based annotation tools. The TextAnnotator, which allows the annotation of texts as a platform, is divided into a front- and a backend component. The backend is a web service based on WebSockets, which integrates the UIMA Database Interface to manage and use texts. Texts are made accessible by using the ResourceManager and the AuthorityManager, based on user and group access permissions. Different views of a document can be created and used depending on the scenario. Once a document has been opened, access is gained to the annotations stored within annotation views in which these are organized. Any annotation view can be assigned with access permissions and by default, each user obtains his or her own user view for every annotated document. In addition, with sufficient access permissions, all annotation views can also be used and curated. This allows the possibility to calculate an Inter-Annotator-Agreement for a document, which shows an agreement between the annotators. Annotators without sufficient rights cannot display this value so that the annotators do not influence each other. This contribution is intended to reflect the current state of development of TextAnnotator, demonstrate the possibilities of an instantaneous Inter-Annotator-Agreement and trigger a discussion about further functions for the community.
@InProceedings{Abrami:Mehler:Stoeckel:2020,
author         = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
title          = {{TextAnnotator}: A web-based annotation suite for texts},
booktitle      = {Proceedings of the Digital Humanities 2020},
series         = {DH 2020},
year           = {2020},
doi     = {http://dx.doi.org/10.17613/tenm-4907},
abstract    = {The TextAnnotator is a tool for simultaneous and collaborative annotation of texts with visual annotation support, integration of knowledge bases and, by pipelining the TextImager, a rich variety of pre-processing and automatic annotation tools. It includes a variety of modules for the annotation of texts, which contains the annotation of argumentative, rhetorical, propositional and temporal structures as well as a module for named entity linking and rapid annotation of named entities. Especially the modules for annotation of temporal, argumentative and propositional structures are currently unique in web-based annotation tools. The TextAnnotator, which allows the annotation of texts as a platform, is divided into a front- and a backend component. The backend is a web service based on WebSockets, which integrates the UIMA Database Interface to manage and use texts. Texts are made accessible by using the ResourceManager and the AuthorityManager, based on user and group access permissions. Different views of a document can be created and used depending on the scenario. Once a document has been opened, access is gained to the annotations stored within annotation views in which these are organized. Any annotation view can be assigned with access permissions and by default, each user obtains his or her own user view for every annotated document. In addition, with sufficient access permissions, all annotation views can also be used and curated. This allows the possibility to calculate an Inter-Annotator-Agreement for a document, which shows an agreement between the annotators. Annotators without sufficient rights cannot display this value so that the annotators do not influence each other. This contribution is intended to reflect the current state of development of TextAnnotator, demonstrate the possibilities of an instantaneous Inter-Annotator-Agreement and trigger a discussion about further functions for the community.},
}
• G. Abrami, M. Stoeckel, and A. Mehler, “TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts,” in Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, 2020, pp. 891-900.
[Abstract] [BibTeX]

The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.
@InProceedings{Abrami:Stoeckel:Mehler:2020,
author    = {Abrami, Giuseppe  and  Stoeckel, Manuel  and  Mehler, Alexander},
title     = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
month     = {May},
year      = {2020},
publisher = {European Language Resources Association},
pages     = {891--900},
ISBN = "979-10-95546-34-4",
abstract  = {The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.},
url       = {https://www.aclweb.org/anthology/2020.lrec-1.112},
pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}
• G. Abrami, A. Henlein, A. Kett, and A. Mehler, “Text2SceneVR: Generating Hypertexts with VAnnotatoR as a Pre-processing Step for Text2Scene Systems,” in Proceedings of the 31st ACM Conference on Hypertext and Social Media, New York, NY, USA, 2020, p. 177–186.
[BibTeX]

@InProceedings{Abrami:Henlein:Kett:Mehler:2020,
author = {Abrami, Giuseppe and Henlein, Alexander and Kett, Attila and Mehler, Alexander},
title = {{Text2SceneVR}: Generating Hypertexts with VAnnotatoR as a Pre-processing Step for Text2Scene Systems},
booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media},
series = {HT ’20},
year = {2020},
location = {Virtual Event, USA},
isbn = {9781450370981},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3372923.3404791},
doi = {10.1145/3372923.3404791},
pages = {177–186},
numpages = {10},
pdf={https://dl.acm.org/doi/pdf/10.1145/3372923.3404791}
}
• M. Stoeckel, A. Henlein, W. Hemati, and A. Mehler, “Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin,” in Proceedings of LT4HALA 2020 – 1st Workshop on Language Technologies for Historical and Ancient Languages, Marseille, France, 2020, pp. 130-135.
[Abstract] [BibTeX]

Despite the great importance of the Latin language in the past, there are relatively few resources available today to develop modern NLP tools for this language. Therefore, the EvaLatin Shared Task for Lemmatization and Part-of-Speech (POS) tagging was published in the LT4HALA workshop. In our work, we dealt with the second EvaLatin task, that is, POS tagging. Since most of the available Latin word embeddings were trained on either few or inaccurate data, we trained several embeddings on better data in the first step. Based on these embeddings, we trained several state-of-the-art taggers and used them as input for an ensemble classifier called LSTMVoter. We were able to achieve the best results for both the cross-genre and the cross-time task (90.64\% and 87.00\%) without using additional annotated data (closed modality). In the meantime, we further improved the system and achieved even better results (96.91\% on classical, 90.87\% on cross-genre and 87.35\% on cross-time).
@InProceedings{Stoeckel:et:al:2020,
author    = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander},
title     = {{Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin}},
booktitle      = {Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages},
month          = {May},
year           = {2020},
publisher      = {European Language Resources Association (ELRA)},
pages     = {130--135},
abstract  = {Despite the great importance of the Latin language in the past, there are relatively few resources available today to develop modern NLP tools for this language. Therefore, the EvaLatin Shared Task for Lemmatization and Part-of-Speech (POS) tagging was published in the LT4HALA workshop. In our work, we dealt with the second EvaLatin task, that is, POS tagging. Since most of the available Latin word embeddings were trained on either few or inaccurate data, we trained several embeddings on better data in the first step. Based on these embeddings, we trained several state-of-the-art taggers and used them as input for an ensemble classifier called LSTMVoter. We were able to achieve the best results for both the cross-genre and the cross-time task (90.64\% and 87.00\%) without using additional annotated data (closed modality). In the meantime, we further improved the system and achieved even better results (96.91\% on classical, 90.87\% on cross-genre and 87.35\% on cross-time).},
url       = {https://www.aclweb.org/anthology/2020.lt4hala-1.21},
pdf       = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/LT4HALA/pdf/2020.lt4hala-1.21.pdf}

}
• A. Mehler, B. Jussen, T. Geelhaar, A. Henlein, G. Abrami, D. Baumartz, T. Uslu, and W. Hemati, “The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs,” Studi e Saggi Linguistici, vol. 58, iss. 1, pp. 121-155, 2020.
[Abstract] [BibTeX]

In this article we present the Frankfurt Latin Lexicon (FLL), a lexical resource for Medieval Latin that is used both for the lemmatization of Latin texts and for the post-editing of lemmatizations. We describe recent advances in the development of lemmatizers and test them against the Capitularies corpus (comprising Frankish royal edicts, mid-6th to mid-9th century), a corpus created as a reference for processing Medieval Latin. We also consider the post-correction of lemmatizations using a limited crowdsourcing process aimed at continuous review and updating of the FLL. Starting from the texts resulting from this lemmatization process, we describe the extension of the FLL by means of word embeddings, whose interactive traversing by means of SemioGraphs completes the digital enhanced hermeneutic circle. In this way, the article argues for a more comprehensive understanding of lemmatization, encompassing classical machine learning as well as intellectual post-corrections and, in particular, human computation in the form of interpretation processes based on graph representations of the underlying lexical resources.
@article{Mehler:et:al:2020b,
author={Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
title={{The Frankfurt Latin Lexicon. From Morphological Expansion and Word Embeddings to SemioGraphs}},
journal={Studi e Saggi Linguistici},
doi={10.4454/ssl.v58i1.276},
year={2020},
volume={58},
number={1},
pages={121--155},
abstract={In this article we present the Frankfurt Latin Lexicon (FLL), a lexical resource for Medieval Latin that is used both for the lemmatization of Latin texts and for the post-editing of lemmatizations. We describe recent advances in the development of lemmatizers and test them against the Capitularies corpus (comprising Frankish royal edicts, mid-6th to mid-9th century), a corpus created as a reference for processing Medieval Latin. We also consider the post-correction of lemmatizations using a limited crowdsourcing process aimed at continuous review and updating of the FLL. Starting from the texts resulting from this lemmatization process, we describe the extension of the FLL by means of word embeddings, whose interactive traversing by means of SemioGraphs completes the digital enhanced hermeneutic circle. In this way, the article argues for a more comprehensive understanding of lemmatization, encompassing classical machine learning as well as intellectual post-corrections and, in particular, human computation in the form of interpretation processes based on graph representations of the underlying lexical resources.},
url={https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
}
• A. Henlein, G. Abrami, A. Kett, and A. Mehler, “Transfer of ISOSpace into a 3D Environment for Annotations and Applications,” in Proceedings of the 16th Joint ACL – ISO Workshop on Interoperable Semantic Annotation, Marseille, 2020, pp. 32-35.
[Abstract] [BibTeX]

People's visual perception is very pronounced and therefore it is usually no problem for them to describe the space around them in words. Conversely, people also have no problems imagining a concept of a described space. In recent years many efforts have been made to develop a linguistic concept for spatial and spatial-temporal relations. However, the systems have not really caught on so far, which in our opinion is due to the complex models on which they are based and the lack of available training data and automated taggers. In this paper we describe a project to support spatial annotation, which could facilitate annotation by its many functions, but also enrich it with many more information. This is to be achieved by an extension by means of a VR environment, with which spatial relations can be better visualized and connected with real objects. And we want to use the available data to develop a new state-of-the-art tagger and thus lay the foundation for future systems such as improved text understanding for Text2Scene.
@InProceedings{Henlein:et:al:2020,
Author         = {Henlein, Alexander and Abrami, Giuseppe and Kett, Attila and Mehler, Alexander},
Title          = {Transfer of ISOSpace into a 3D Environment for Annotations and Applications},
booktitle      = {Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable Semantic Annotation},
month          = {May},
year           = {2020},
publisher      = {European Language Resources Association},
pages     = {32--35},
abstract  = {People's visual perception is very pronounced and therefore it is usually no problem for them to describe the space around them in words. Conversely, people also have no problems imagining a concept of a described space. In recent years many efforts have been made to develop a linguistic concept for spatial and spatial-temporal relations. However, the systems have not really caught on so far, which in our opinion is due to the complex models on which they are based and the lack of available training data and automated taggers. In this paper we describe a project to support spatial annotation, which could facilitate annotation by its many functions, but also enrich it with many more information. This is to be achieved by an extension by means of a VR environment, with which spatial relations can be better visualized and connected with real objects. And we want to use the available data to develop a new state-of-the-art tagger and thus lay the foundation for future systems such as improved text understanding for Text2Scene.},
url       = {https://www.aclweb.org/anthology/2020.isa-1.4},
pdf      = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/ISA16/pdf/2020.isa-1.4.pdf}
}
• J. Hildebrand, W. Hemati, and A. Mehler, “Recognizing Sentence-level Logical Document Structures with the Help of Context-free Grammars,” in Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, 2020, pp. 5282-5290.
[Abstract] [BibTeX]

Current sentence boundary detectors split documents into sequentially ordered sentences by detecting their beginnings and ends. Sentences, however, are more deeply structured even on this side of constituent and dependency structure: they can consist of a main sentence and several subordinate clauses as well as further segments (e.g. inserts in parentheses); they can even recursively embed whole sentences and then contain multiple sentence beginnings and ends. In this paper, we introduce a tool that segments sentences into tree structures to detect this type of recursive structure. To this end, we retrain different constituency parsers with the help of modified training data to transform them into sentence segmenters. With these segmenters, documents are mapped to sequences of sentence-related “logical document structures”. The resulting segmenters aim to improve downstream tasks by providing additional structural information. In this context, we experiment with German dependency parsing. We show that for certain sentence categories, which can be determined automatically, improvements in German dependency parsing can be achieved using our segmenter for preprocessing. The assumption suggests that improvements in other languages and tasks can be achieved.
@InProceedings{Hildebrand:Hemati:Mehler:2020,
Author         = {Hildebrand, Jonathan and Hemati, Wahed and Mehler, Alexander},
Title          = {Recognizing Sentence-level Logical Document Structures with the Help of Context-free Grammars},
booktitle      = {Proceedings of The 12th Language Resources and Evaluation Conference},
month          = {May},
year           = {2020},
publisher      = {European Language Resources Association},
pages     = {5282--5290},
abstract  = {Current sentence boundary detectors split documents into sequentially ordered sentences by detecting their beginnings and ends. Sentences, however, are more deeply structured even on this side of constituent and dependency structure: they can consist of a main sentence and several subordinate clauses as well as further segments (e.g. inserts in parentheses); they can even recursively embed whole sentences and then contain multiple sentence beginnings and ends. In this paper, we introduce a tool that segments sentences into tree structures to detect this type of recursive structure. To this end, we retrain different constituency parsers with the help of modified training data to transform them into sentence segmenters. With these segmenters, documents are mapped to sequences of sentence-related “logical document structures”. The resulting segmenters aim to improve downstream tasks by providing additional structural information. In this context, we experiment with German dependency parsing. We show that for certain sentence categories, which can be determined automatically, improvements in German dependency parsing can be achieved using our segmenter for preprocessing. The assumption suggests that improvements in other languages and tasks can be achieved.},
url       = {https://www.aclweb.org/anthology/2020.lrec-1.650},
pdf      = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.650.pdf}
}
• A. Henlein and A. Mehler, “On the Influence of Coreference Resolution on Word Embeddings in Lexical-semantic Evaluation Tasks,” in Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, 2020, pp. 27-33.
[Abstract] [BibTeX]

Coreference resolution (CR) aims to find all spans of a text that refer to the same entity. The F1-Scores on these task have been greatly improved by new developed End2End-approaches and transformer networks. The inclusion of CR as a pre-processing step is expected to lead to improvements in downstream tasks. The paper examines this effect with respect to word embeddings. That is, we analyze the effects of CR on six different embedding methods and evaluate them in the context of seven lexical-semantic evaluation tasks and instantiation/hypernymy detection. Especially in the last tasks we hoped for a significant increase in performance. We show that all word embedding approaches do not benefit significantly from pronoun substitution. The measurable improvements are only marginal (around 0.5\% in most test cases). We explain this result with the loss of contextual information, reduction of the relative occurrence of rare words and the lack of pronouns to be replaced.
@InProceedings{Henlein:Mehler:2020,
Author         = {Henlein, Alexander and Mehler, Alexander},
Title          = {{On the Influence of Coreference Resolution on Word Embeddings in Lexical-semantic Evaluation Tasks}},
booktitle      = {Proceedings of The 12th Language Resources and Evaluation Conference},
month          = {May},
year           = {2020},
publisher      = {European Language Resources Association},
pages     = {27--33},
abstract  = {Coreference resolution (CR) aims to find all spans of a text that refer to the same entity. The F1-Scores on these task have been greatly improved by new developed End2End-approaches and transformer networks. The inclusion of CR as a pre-processing step is expected to lead to improvements in downstream tasks. The paper examines this effect with respect to word embeddings. That is, we analyze the effects of CR on six different embedding methods and evaluate them in the context of seven lexical-semantic evaluation tasks and instantiation/hypernymy detection. Especially in the last tasks we hoped for a significant increase in performance. We show that all word embedding approaches do not benefit significantly from pronoun substitution. The measurable improvements are only marginal (around 0.5\% in most test cases). We explain this result with the loss of contextual information, reduction of the relative occurrence of rare words and the lack of pronouns to be replaced.},
url       = {https://www.aclweb.org/anthology/2020.lrec-1.4},
pdf      = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.4.pdf}
}
• A. Mehler, R. Gleim, R. Gaitsch, T. Uslu, and W. Hemati, “From Topic Networks to Distributed Cognitive Maps: Zipfian Topic Universes in the Area of Volunteered Geographic Information,” Complexity, vol. 4, pp. 1-47, 2020.
[BibTeX]

@Article{Mehler:Gleim:Gaitsch:Uslu:Hemati:2020,
author  = {Alexander Mehler and R{\"{u}}diger Gleim and Regina Gaitsch and Tolga Uslu and Wahed Hemati},
title   = {From Topic Networks to Distributed Cognitive Maps: {Zipfian} Topic Universes in the Area of Volunteered Geographic Information},
journal = {Complexity},
volume = {4},
doi={10.1155/2020/4607025},
pages = {1-47},
issuetitle = {Cognitive Network Science: A New Frontier},
year    = {2020},
}
• V. Kühn, G. Abrami, and A. Mehler, “WikNectVR: A Gesture-Based Approach for Interacting in Virtual Reality Based on WikNect and Gestural Writing,” in Virtual, Augmented and Mixed Reality. Design and Interaction – 12th International Conference, VAMR 2020, Held as Part of the 22nd HCI International Conference, HCII 2020, Copenhagen, Denmark, July 19-24, 2020, Proceedings, Part I, 2020, pp. 299-312.
[BibTeX]

@inproceedings{Kuehn:Abrami:Mehler:2020,
author    = {Vincent K{\"{u}}hn and Giuseppe Abrami and Alexander Mehler},
editor    = {Jessie Y. C. Chen and Gino Fragomeni},
title     = {WikNectVR: {A} Gesture-Based Approach for Interacting in Virtual Reality Based on WikNect and Gestural Writing},
booktitle = {Virtual, Augmented and Mixed Reality. Design and Interaction - 12th International Conference, {VAMR} 2020, Held as Part of the 22nd {HCI} International Conference, {HCII} 2020, Copenhagen, Denmark, July 19-24, 2020, Proceedings, Part {I}},
series    = {Lecture Notes in Computer Science},
volume    = {12190},
pages     = {299--312},
publisher = {Springer},
year      = {2020},
url       = {https://doi.org/10.1007/978-3-030-49695-1_20},
doi       = {10.1007/978-3-030-49695-1_20},
timestamp = {Tue, 14 Jul 2020 10:55:57 +0200},
biburl    = {https://dblp.org/rec/conf/hci/KuhnAM20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
• G. Abrami, A. Mehler, C. Spiekermann, A. Kett, S. Lööck, and L. Schwarz, “Educational Technologies in the area of ubiquitous historical computing in virtual reality,” in New Perspectives on Virtual and Augmented Reality: Finding New Ways to Teach in a Transformed Learning Environment, L. Daniela, Ed., Taylor & Francis, 2020.
[Abstract] [BibTeX]

At ever shorter intervals, new technologies are being developed that are opening up more and more areas of application. This regards, for example, Virtual Reality (VR) and Augmented Reality (AR) devices. In addition to the private sector, the public and education sectors, which already make intensive use of these devices, benefit from these technologies. However, especially in the field of historical education, there are not many frameworks for generating immersive virtual environments that can be used flexibly enough. This chapter addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile framework for the creation and use of virtual environments that serve to model historical processes in historical education. The paper describes the building blocks of VAnnotatoR and describes applications in historical education.
@InBook{Abrami:et:al:2020,
author="Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian and Kett, Attila and L{\"o}{\"o}ck, Simon and Schwarz, Lukas",
editor="Daniela, Linda",
title="Educational Technologies in the area of ubiquitous historical computing in virtual reality",
bookTitle="New Perspectives on Virtual and Augmented Reality: Finding New Ways to Teach in a Transformed Learning Environment",
year="2020",
publisher="Taylor \& Francis",
abstract="At ever shorter intervals, new technologies are being developed that are opening up more and more areas of application. This regards, for example, Virtual Reality (VR) and Augmented Reality (AR) devices. In addition to the private sector, the public and education sectors, which already make intensive use of these devices, benefit from these technologies. However, especially in the field of historical education, there are not many frameworks for generating immersive virtual environments that can be used flexibly enough. This chapter addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile framework for the creation and use of virtual environments that serve to model historical processes in historical education. The paper describes the building blocks of VAnnotatoR and describes applications in historical education.",
isbn={978-0-367-43211-9},
url={https://www.routledge.com/New-Perspectives-on-Virtual-and-Augmented-Reality-Finding-New-Ways-to-Teach/Daniela/p/book/9780367432119}
}
• C. Stegbauer and A. Mehler, “Ursachen der Entstehung von ubiquitären Zentrum-Peripheriestrukturen und ihre Folgen,” Soziale Welt — Zeitschrift für sozialwissenschaftliche Forschung und Praxis (SozW), vol. Sonderband 23, pp. 265-284, 2020.
[BibTeX]

@Article{Stegbauer:Mehler:2020,
author       = {Christian Stegbauer and Alexander Mehler},
title        = {Ursachen der Entstehung von ubiquit{\"{a}}ren Zentrum-Peripheriestrukturen und ihre Folgen},
journal = {Soziale Welt -- Zeitschrift f\"{u}r sozialwissenschaftliche Forschung und Praxis (SozW)},
volume = {Sonderband 23},
year         = {2020},
pages = {265--284}
}

### 2019 (25)

• O. Zlatkin-Troitschanskaia, W. Bisang, A. Mehler, M. Banerjee, and J. Roeper, “Positive Learning in the Internet Age: Developments and Perspectives in the PLATO Program,” in Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), O. Zlatkin-Troitschanskaia, Ed., Cham: Springer International Publishing, 2019, pp. 1-5.
[Abstract] [BibTeX]

The Internet has become the main informational entity, i.e., a public source of information. The Internet offers many new benefits and opportunities for human learning, teaching, and research. However, by providing a vast amount of information from innumerable sources, it also enables the manipulation of information; there are countless examples of disseminated misinformation and false data in mass and social media. Much of the information presented online is conflicting, preselected, or algorithmically obscure, often colliding with fundamental humanistic values and posing moral or ethical problems.
@Inbook{Zlatkin-Troitschanskaia:et:al:2019,
author="Zlatkin-Troitschanskaia, Olga and Bisang, Walter and Mehler, Alexander and Banerjee, Mita and Roeper, Jochen",
editor="Zlatkin-Troitschanskaia, Olga",
title="Positive Learning in the Internet Age: Developments and Perspectives in the PLATO Program",
bookTitle="Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)",
year="2019",
publisher="Springer International Publishing",
pages="1--5",
abstract="The Internet has become the main informational entity, i.e., a public source of information. The Internet offers many new benefits and opportunities for human learning, teaching, and research. However, by providing a vast amount of information from innumerable sources, it also enables the manipulation of information; there are countless examples of disseminated misinformation and false data in mass and social media. Much of the information presented online is conflicting, preselected, or algorithmically obscure, often colliding with fundamental humanistic values and posing moral or ethical problems.",
isbn="978-3-030-26578-6",
doi="10.1007/978-3-030-26578-6_1",
url="https://doi.org/10.1007/978-3-030-26578-6_1"
}
• A. Mehler and V. Ramesh, “TextInContext: On the Way to a Framework for Measuring the Context-Sensitive Complexity of Educationally Relevant Texts—A Combined Cognitive and Computational Linguistic Approach,” in Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), O. Zlatkin-Troitschanskaia, Ed., Cham: Springer International Publishing, 2019, pp. 167-195.
[Abstract] [BibTeX]

We develop a framework for modeling the context sensitivity of text interpretation. As a point of reference, we focus on the complexity of educational texts. To open up a broader basis for representing phenomena of context sensitivity, we integrate a learning theory (i.e., the Cognitive Load Theory) with a theory of discourse comprehension (i.e., the Construction Integration Model) and a theory of cognitive semantics (i.e., the theory of Conceptual Spaces). The aim is to construct measures that view text complexity as a relational attribute by analogy to the relational concept of meaning in situation semantics. To this end, we reconstruct the situation semantic notion of relational meaning from the perspective of a computationally informed cognitive semantics. The aim is to prepare the development of measurements for predicting learning outcomes in the form of positive or negative learning. This prediction ideally depends on the underlying learning material, the learner's situational context, and knowledge retrieved from his or her long-term memory, which he or she uses to arrive at coherent mental representations of the underlying texts. Finally, our model refers to machine learning as a tool for modeling such memory content. In this way, the chapter integrates approaches from different disciplines (linguistic semantics, computational linguistics, cognitive science, and data science).
@Inbook{Mehler:Ramesh:2019,
author="Mehler, Alexander and Ramesh, Visvanathan",
editor="Zlatkin-Troitschanskaia, Olga",
title="{TextInContext}: On the Way to a Framework for Measuring the Context-Sensitive Complexity of Educationally Relevant Texts---A Combined Cognitive and Computational Linguistic Approach",
bookTitle="Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)",
year="2019",
publisher="Springer International Publishing",
pages="167--195",
abstract="We develop a framework for modeling the context sensitivity of text interpretation. As a point of reference, we focus on the complexity of educational texts. To open up a broader basis for representing phenomena of context sensitivity, we integrate a learning theory (i.e., the Cognitive Load Theory) with a theory of discourse comprehension (i.e., the Construction Integration Model) and a theory of cognitive semantics (i.e., the theory of Conceptual Spaces). The aim is to construct measures that view text complexity as a relational attribute by analogy to the relational concept of meaning in situation semantics. To this end, we reconstruct the situation semantic notion of relational meaning from the perspective of a computationally informed cognitive semantics. The aim is to prepare the development of measurements for predicting learning outcomes in the form of positive or negative learning. This prediction ideally depends on the underlying learning material, the learner's situational context, and knowledge retrieved from his or her long-term memory, which he or she uses to arrive at coherent mental representations of the underlying texts. Finally, our model refers to machine learning as a tool for modeling such memory content. In this way, the chapter integrates approaches from different disciplines (linguistic semantics, computational linguistics, cognitive science, and data science).",
isbn="978-3-030-26578-6",
doi="10.1007/978-3-030-26578-6_14",
url="https://doi.org/10.1007/978-3-030-26578-6_14"
}
• A. Lücking, “Dialogue semantics: From cognitive structures to positive and negative learning,” in Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), O. Zlatkin-Troitschankskaia, Ed., Cham, Switzerland: Springer Nature Switzerland AG, 2019, pp. 197-205.
[BibTeX]

@InCollection{Luecking:2019:a,
author =       {L\"{u}cking, Andy},
title =        {Dialogue semantics: {From} cognitive structures to
positive and negative learning},
year =         2019,
pages =        {197-205},
publisher =    {Springer Nature Switzerland AG},
editor =       {Zlatkin-Troitschankskaia, Olga},
booktitle =    {Frontiers and Advances in Positive Learning in the
Age of InformaTiOn (PLATO)},
doi =          {10.1007/978-3-030-26578-6},
url =
}
• A. Lücking and J. Ginzburg, “Not few but all quantifiers can be negated: towards a referentially transparent semantics of quantified noun phrases,” in Proceedings of the Amsterdam Colloquium 2019, 2019, pp. 269-278.
[BibTeX]

@InProceedings{Luecking:Ginzburg:2019,
author =       {L{\"u}cking, Andy and Ginzburg, Jonathan},
title =        {Not few but all quantifiers can be negated: towards
a referentially transparent semantics of quantified
noun phrases},
booktitle =    {Proceedings of the Amsterdam Colloquium 2019},
series =       {AC'19},
location =     {University of Amsterdam},
year =         2019,
pages =        {269-278},
url =          {http://events.illc.uva.nl/AC/AC2019/},
pdf =
}
• A. Lücking, “Gesture,” in Head-Driven Phrase Structure Grammar: The handbook, S. Müller, A. Abeillé, R. D. Borsley, and J. Koenig, Eds., Berlin: Language Science Press, 2019.
[BibTeX]

@InCollection{Luecking:2019:b,
keywords =     {own,bookchapter},
author+an =    {1=highlight},
author =       {L\"{u}cking, Andy},
year =         2019,
title =        {Gesture},
editor =       {M\"{u}ller, Stefan and Abeill\'{e}, Anne and
Borsley, Robert D. and Koenig, Jean-Pierre},
booktitle =    {{Head-Driven Phrase Structure Grammar}: {The}
handbook},
publisher =    {Language Science Press},
pdf =
{https://hpsg.hu-berlin.de/Projects/HPSG-handbook/PDFs/gesture.pdf},
url =          {https://langsci-press.org/catalog/book/259}
}
• A. Lücking, J. Ginzburg, and R. Cooper, “Grammar in dialogue,” in Head-Driven Phrase Structure Grammar: The handbook, S. Müller, A. Abeillé, R. D. Borsley, and J. Koenig, Eds., Berlin: Language Science Press, 2019.
[BibTeX]

@InCollection{Luecking:Ginzburg:Cooper:2019,
keywords =     {own,bookchapter},
author+an =    {1=highlight},
author =       {L\"{u}cking, Andy and Ginzburg, Jonathan and Cooper,
Robin},
year =         2019,
title =        {Grammar in dialogue},
editor =       {M\"{u}ller, Stefan and Abeill\'{e}, Anne and
Borsley, Robert D. and Koenig, Jean-Pierre},
booktitle =    {{Head-Driven Phrase Structure Grammar}: {The}
handbook},
publisher =    {Language Science Press},
pdf =
{https://hpsg.hu-berlin.de/Projects/HPSG-handbook/PDFs/dialogue.pdf},
url =          {https://langsci-press.org/catalog/book/259}
}
• S. Schweter and S. Ahmed, “Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection,” in Proceedings of the 15th Conference on Natural Language Processing (KONVENS), 2019.
[BibTeX]

@InProceedings{Schweter:Ahmed:2019,
author = {Stefan Schweter and Sajawel Ahmed},
title = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
location = {Erlangen, Germany},
year = 2019
}
• M. Stoeckel, W. Hemati, and A. Mehler, “When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in Spanish,” in Proceedings of The 5th Workshop on BioNLP Open Shared Tasks, Hong Kong, China, 2019, pp. 11-15.
[Abstract] [BibTeX]

The recognition of pharmacological substances, compounds and proteins is an essential preliminary work for the recognition of relations between chemicals and other biomedically relevant units. In this paper, we describe an approach to Task 1 of the PharmaCoNER Challenge, which involves the recognition of mentions of chemicals and drugs in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF sequence tagger with stacked Pooled Contextualized Embeddings, word and sub-word embeddings using the open-source framework FLAIR. We present a new corpus composed of articles and papers from Spanish health science journals, termed the Spanish Health Corpus, and use it to train domain-specific embeddings which we incorporate in our model training. We achieve a result of 89.76\% F1-score using pre-trained embeddings and are able to improve these results to 90.52\% F1-score using specialized embeddings.
@inproceedings{Stoeckel:Hemati:Mehler:2019,
title = "When Specialization Helps: Using Pooled Contextualized Embeddings to Detect Chemical and Biomedical Entities in {S}panish",
author = "Stoeckel, Manuel and Hemati, Wahed and Mehler, Alexander",
booktitle = "Proceedings of The 5th Workshop on BioNLP Open Shared Tasks",
month = nov,
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D19-5702",
doi = "10.18653/v1/D19-5702",
pages = "11--15",
abstract = "The recognition of pharmacological substances, compounds and proteins is an essential preliminary work for the recognition of relations between chemicals and other biomedically relevant units. In this paper, we describe an approach to Task 1 of the PharmaCoNER Challenge, which involves the recognition of mentions of chemicals and drugs in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF sequence tagger with stacked Pooled Contextualized Embeddings, word and sub-word embeddings using the open-source framework FLAIR. We present a new corpus composed of articles and papers from Spanish health science journals, termed the Spanish Health Corpus, and use it to train domain-specific embeddings which we incorporate in our model training. We achieve a result of 89.76{\%} F1-score using pre-trained embeddings and are able to improve these results to 90.52{\%} F1-score using specialized embeddings.",
}
• S. Ahmed, M. Stoeckel, C. Driller, A. Pachzelt, and Alexander Mehler, “BIOfid Dataset: Publishing a German Gold Standard for Named Entity Recognition in Historical Biodiversity Literature,” in Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), Hong Kong, China, 2019, pp. 871-880.
[Abstract] [BibTeX]

The Specialized Information Service Biodiversity Research (BIOfid) has been launched to mobilize valuable biological data from printed literature hidden in German libraries for over the past 250 years. In this project, we annotate German texts converted by OCR from historical scientific literature on the biodiversity of plants, birds, moths and butterflies. Our work enables the automatic extraction of biological information previously buried in the mass of papers and volumes. For this purpose, we generated training data for the tasks of Named Entity Recognition (NER) and Taxa Recognition (TR) in biological documents. We use this data to train a number of leading machine learning tools and create a gold standard for TR in biodiversity literature. More specifically, we perform a practical analysis of our newly generated BIOfid dataset through various downstream-task evaluations and establish a new state of the art for TR with 80.23\% F-score. In this sense, our paper lays the foundations for future work in the field of information extraction in biology texts.
@InProceedings{Ahmed:Stoeckel:Driller:Pachzelt:Mehler:2019,
author = {Sajawel Ahmed and Manuel Stoeckel and Christine Driller and Adrian Pachzelt and Alexander
Mehler},
title = {{BIOfid Dataset: Publishing a German Gold Standard for Named Entity Recognition in Historical
Biodiversity Literature}},
booktitle = {Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)},
publisher = {Association for Computational Linguistics},
year = 2019,
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
url = "https://www.aclweb.org/anthology/K19-1081",
doi = "10.18653/v1/K19-1081",
pages = "871--880",
abstract = "The Specialized Information Service Biodiversity Research (BIOfid) has been launched to mobilize valuable biological data from printed literature hidden in German libraries for over the past 250 years. In this project, we annotate German texts converted by OCR from historical scientific literature on the biodiversity of plants, birds, moths and butterflies. Our work enables the automatic extraction of biological information previously buried in the mass of papers and volumes. For this purpose, we generated training data for the tasks of Named Entity Recognition (NER) and Taxa Recognition (TR) in biological documents. We use this data to train a number of leading machine learning tools and create a gold standard for TR in biodiversity literature. More specifically, we perform a practical analysis of our newly generated BIOfid dataset through various downstream-task evaluations and establish a new state of the art for TR with 80.23{\%} F-score. In this sense, our paper lays the foundations for future work in the field of information extraction in biology texts.",
}
• A. Mehler and G. Abrami, “VAnnotatoR: A framework for the multimodal reconstruction of historical situations and spaces,” in Proceedings of the Time Machine Conference, Dresden, Germany, October 10-11 2019.
[Poster][BibTeX]

@inproceedings{Mehler:Abrami:2019,
author = {Mehler, Alexander and Abrami, Giuseppe},
title = {{VAnnotatoR}: A framework for the multimodal reconstruction of historical situations and spaces},
booktitle = {Proceedings of the Time Machine Conference},
year = {2019},
date = {October 10-11},
}
• A. Hunziker, H. Mammadov, W. Hemati, and A. Mehler, “Corpus2Wiki: A MediaWiki-based Tool for Automatically Generating Wikiditions in Digital Humanities,” in INF-DH-2019, Bonn, 2019.
[BibTeX]

@inproceedings{Hunziker:et:al:2019,
author = {Hunziker, Alex and Mammadov, Hasanagha and Hemati, Wahed and Mehler, Alexander},
title = {{Corpus2Wiki}: A MediaWiki-based Tool for Automatically Generating Wikiditions in Digital Humanities},
booktitle = {INF-DH-2019},
year = {2019},
editor = {Burghardt, Manuel AND Müller-Birn, Claudia},
publisher = {Gesellschaft für Informatik e.V.},
}
• A. Hoenen, “Rooting through Direction — New and Old Approaches,” in DHd 2019, 2019.
[BibTeX]

@InProceedings{Hoenen:2019dhd,
Author         = {Hoenen, Armin},
Title          = {{Rooting through Direction -- New and Old Approaches}},
BookTitle      = {DHd 2019},
url            = {https://zenodo.org/record/2596095#.XKtQb3Wg-vo},
year           = 2019,
month     = {jun}
}
• A. Hoenen, “Interpreting and Post-Correcting the Minimum Spanning Tree,” in DGfS 2019, 2019.
[BibTeX]

@InProceedings{Hoenen:2019dgfs,
Author         = {Hoenen, Armin},
Title          = {{Interpreting and Post-Correcting the Minimum Spanning Tree}},
BookTitle      = {DGfS 2019},
url            = {http://www.dgfs2019.uni-bremen.de/abstracts/poster/Hoenen.pdf},
year           = 2019
}
• A. Hoenen, “eLearning the URLCoFi – Digital Didactics for Humanists,” in AIUCD 2019, 2019.
[BibTeX]

@InProceedings{Hoenen:2019aiucd,
Author         = {Hoenen, Armin},
Title          = {{eLearning the URLCoFi – Digital Didactics for Humanists}},
BookTitle      = {AIUCD 2019},
year           = 2019
}
• A. Lücking, R. Cooper, S. Larsson, and J. Ginzburg, “Distribution is not enough — Going Firther,” in Proceedings of Natural Language and Computer Science, 2019.
[BibTeX]

@InProceedings{Luecking:Cooper:Larsson:Ginzburg:2019,
author =     {Lücking, Andy and Cooper, Robin and Larsson, Staffan and Ginzburg, Jonathan},
title =     {Distribution is not enough -- Going {Firther}},
booktitle =     {Proceedings of Natural Language and Computer Science},
maintitle =     {The 13th International Conference on Computational
Semantics (IWCS 2019)},
series =     {NLCS 6},
location =     {Gothenburg, Sweden},
month =     {May},
year =     2019,
}
• W. Hemati and A. Mehler, “CRFVoter: gene and protein related object recognition using a conglomerate of CRF-based tools,” Journal of Cheminformatics, vol. 11, iss. 1, p. 11, 2019.
[Abstract] [BibTeX]

Gene and protein related objects are an important class of entities in biomedical research, whose identification and extraction from scientific articles is attracting increasing interest. In this work, we describe an approach to the BioCreative V.5 challenge regarding the recognition and classification of gene and protein related objects. For this purpose, we transform the task as posed by BioCreative V.5 into a sequence labeling problem. We present a series of sequence labeling systems that we used and adapted in our experiments for solving this task. Our experiments show how to optimize the hyperparameters of the classifiers involved. To this end, we utilize various algorithms for hyperparameter optimization. Finally, we present CRFVoter, a two-stage application of Conditional Random Field (CRF) that integrates the optimized sequence labelers from our study into one ensemble classifier.
@article{Hemati:Mehler:2019b,
author="Hemati, Wahed and Mehler, Alexander",
title="{{CRFVoter}: gene and protein related object recognition using a conglomerate of CRF-based tools}",
journal={Journal of Cheminformatics},
year="2019",
month="Mar",
day="14",
volume="11",
number="1",
pages="11",
abstract="Gene and protein related objects are an important class of entities in biomedical research, whose identification and extraction from scientific articles is attracting increasing interest. In this work, we describe an approach to the BioCreative V.5 challenge regarding the recognition and classification of gene and protein related objects. For this purpose, we transform the task as posed by BioCreative V.5 into a sequence labeling problem. We present a series of sequence labeling systems that we used and adapted in our experiments for solving this task. Our experiments show how to optimize the hyperparameters of the classifiers involved. To this end, we utilize various algorithms for hyperparameter optimization. Finally, we present CRFVoter, a two-stage application of Conditional Random Field (CRF) that integrates the optimized sequence labelers from our study into one ensemble classifier.",
issn="1758-2946",
doi="10.1186/s13321-019-0343-x",
url="https://doi.org/10.1186/s13321-019-0343-x"
}
• G. Abrami, A. Mehler, A. Lücking, E. Rieb, and P. Helfrich, “TextAnnotator: A flexible framework for semantic annotations,” in Proceedings of the Fifteenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation, (ISA-15), 2019.
[Abstract] [BibTeX]

Modern annotation tools should meet at least the following general requirements: they can handle diverse data and annotation levels within one tool, and they support the annotation process with automatic (pre-)processing outcomes as much as possible. We developed a framework that meets these general requirements and that enables versatile and browser-based annotations of texts, the TextAnnotator. It combines NLP methods of pre-processing with methods of flexible post-processing. Infact, machine learning (ML) requires a lot of training and test data, but is usually far from achieving perfect results. Producing high-level annotations for ML and post-correcting its results are therefore necessary. This is the purpose of TextAnnotator, which is entirely implemented in ExtJS and provides a range of interactive visualizations of annotations. In addition, it allows for flexibly integrating knowledge resources, e.g. in the course of post-processing named entity recognition. The paper describes TextAnnotator’s architecture together with three use cases: annotating temporal structures, argument structures and named entity linking.
@InProceedings{Abrami:et:al:2019,
Author         = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias and Helfrich, Philipp},
Title          = {{TextAnnotator}: A flexible framework for semantic annotations},
BookTitle      = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation, (ISA-15)},
Series         = {ISA-15},
location       = {Gothenburg, Sweden},
month     = {May},
year           = 2019,
abstract   ="Modern annotation tools should meet at least the following general requirements: they can handle diverse data and annotation levels within one tool, and they support the annotation process with automatic (pre-)processing outcomes as much as possible. We developed a framework that meets these general requirements and that enables versatile and browser-based annotations of texts, the TextAnnotator. It combines NLP methods of pre-processing with methods of flexible post-processing. Infact, machine learning (ML) requires a lot of training and test data, but is usually far from achieving perfect results. Producing high-level annotations for ML and post-correcting its results are therefore necessary. This is the purpose of TextAnnotator, which is entirely implemented in ExtJS and provides a range of interactive visualizations of annotations. In addition, it allows for flexibly integrating knowledge resources, e.g. in the course of post-processing named entity recognition. The paper describes TextAnnotator’s architecture together with three use cases: annotating temporal structures, argument structures and named entity linking."
}
• T. Uslu, A. Mehler, and D. Baumartz, “Computing Classifier-based Embeddings with the Help of text2ddc,” in Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019), 2019.
[BibTeX]

@inproceedings{Uslu:Mehler:Baumartz:2019,
author = "Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel",
booktitle = "{Proceedings of the 20th International Conference on Computational Linguistics and Intelligent Text Processing, (CICLing 2019)}",
location = "La Rochelle, France",
series = "{CICLing 2019}",
title = "{Computing Classifier-based Embeddings with the Help of text2ddc}",
year = 2019
}
• T. Uslu, A. Mehler, C. Schulz, and D. Baumartz, “BigSense: a Word Sense Disambiguator for Big Data,” in Proceedings of the Digital Humanities 2019, (DH2019), 2019.
[BibTeX]

@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
author = "Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel",
booktitle = "{Proceedings of the Digital Humanities 2019, (DH2019)}",
location = "Utrecht, Netherlands",
series = "{DH2019}",
title = "{{BigSense}: a Word Sense Disambiguator for Big Data}",
year = 2019,
url={https://dev.clariah.nl/files/dh2019/boa/0199.html}
}
• W. Hemati and A. Mehler, “LSTMVoter: chemical named entity recognition using a conglomerate of sequence labeling tools,” Journal of Cheminformatics, vol. 11, iss. 1, p. 7, 2019.
[Abstract] [BibTeX]

Chemical and biomedical named entity recognition (NER) is an essential preprocessing task in natural language processing. The identification and extraction of named entities from scientific articles is also attracting increasing interest in many scientific disciplines. Locating chemical named entities in the literature is an essential step in chemical text mining pipelines for identifying chemical mentions, their properties, and relations as discussed in the literature. In this work, we describe an approach to the BioCreative V.5 challenge regarding the recognition and classification of chemical named entities. For this purpose, we transform the task of NER into a sequence labeling problem. We present a series of sequence labeling systems that we used, adapted and optimized in our experiments for solving this task. To this end, we experiment with hyperparameter optimization. Finally, we present LSTMVoter, a two-stage application of recurrent neural networks that integrates the optimized sequence labelers from our study into a single ensemble classifier.
@article{Hemati:Mehler:2019a,
abstract = "Chemical and biomedical named entity recognition (NER) is an essential preprocessing task in natural language processing. The identification and extraction of named entities from scientific articles is also attracting increasing interest in many scientific disciplines. Locating chemical named entities in the literature is an essential step in chemical text mining pipelines for identifying chemical mentions, their properties, and relations as discussed in the literature. In this work, we describe an approach to the BioCreative V.5 challenge regarding the recognition and classification of chemical named entities. For this purpose, we transform the task of NER into a sequence labeling problem. We present a series of sequence labeling systems that we used, adapted and optimized in our experiments for solving this task. To this end, we experiment with hyperparameter optimization. Finally, we present LSTMVoter, a two-stage application of recurrent neural networks that integrates the optimized sequence labelers from our study into a single ensemble classifier.",
author = "Hemati, Wahed and Mehler, Alexander",
day = "10",
doi = "10.1186/s13321-018-0327-2",
issn = "1758-2946",
journal = "Journal of Cheminformatics",
month = "Jan",
number = "1",
pages = "7",
title = "{{LSTMVoter}: chemical named entity recognition using a conglomerate of sequence labeling tools}",
url = "https://doi.org/10.1186/s13321-018-0327-2",
volume = "11",
year = "2019"
}
• G. Abrami, A. Mehler, and C. Spiekermann, “Graph-based Format for Modeling Multimodal Annotations in Virtual Reality by Means of VAnnotatoR,” in Proceedings of the 21th International Conference on Human-Computer Interaction, HCII 2019, Cham, 2019, pp. 351-358.
[Abstract] [BibTeX]

Projects in the field of Natural Language Processing (NLP), the Digital Humanities (DH) and related disciplines dealing with machine learning of complex relationships between data objects need annotations to obtain sufficiently rich training and test sets. The visualization of such data sets and their underlying Human Computer Interaction (HCI) are perennial problems of computer science. However, despite some success stories, the clarity of information presentation and the flexibility of the annotation process may decrease with the complexity of the underlying data objects and their relationships. In order to face this problem, the so-called VAnnotatoR was developed, as a flexible annotation tool using 3D glasses and augmented reality devices, which enables annotation and visualization in three-dimensional virtual environments. In addition, multimodal objects are annotated and visualized within a graph-based approach.
@InProceedings{Abrami:Mehler:Spiekermann:2019,
Author         = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian},
Title          = {{Graph-based Format for Modeling Multimodal Annotations in Virtual Reality by Means of VAnnotatoR}},
BookTitle      = {Proceedings of the 21th International Conference on Human-Computer Interaction, HCII 2019},
Series         = {HCII 2019},
location       = {Orlando, Florida, USA},
editor   = {Stephanidis, Constantine and Antona, Margherita},
month     = {July},
publisher="Springer International Publishing",
pages="351--358",
abstract="Projects in the field of Natural Language Processing (NLP), the Digital Humanities (DH) and related disciplines dealing with machine learning of complex relationships between data objects need annotations to obtain sufficiently rich training and test sets. The visualization of such data sets and their underlying Human Computer Interaction (HCI) are perennial problems of computer science. However, despite some success stories, the clarity of information presentation and the flexibility of the annotation process may decrease with the complexity of the underlying data objects and their relationships. In order to face this problem, the so-called VAnnotatoR was developed, as a flexible annotation tool using 3D glasses and augmented reality devices, which enables annotation and visualization in three-dimensional virtual environments. In addition, multimodal objects are annotated and visualized within a graph-based approach.",
isbn="978-3-030-30712-7",
year           = 2019
}
• A. Mehler, T. Uslu, R. Gleim, and D. Baumartz, “text2ddc meets Literature – Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen,” in Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019, 2019.
[Poster][BibTeX]

@InProceedings{Mehler:Uslu:Gleim:Baumartz:2019,
Author         = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
Title          = {{text2ddc meets Literature - Ein Verfahren für die Analyse und Visualisierung thematischer Makrostrukturen}},
BookTitle      = {Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019},
Series         = {DHd 2019},
location       = {Frankfurt, Germany},
year           = 2019
}
• G. Abrami, C. Spiekermann, and A. Mehler, “VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke in dreidimensionalen virtuellen Umgebungen,” in Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019, 2019.
[Poster][BibTeX]

@InProceedings{Abrami:Spiekermann:Mehler:2019,
Author         = {Abrami, Giuseppe and Spiekermann, Christian and Mehler, Alexander},
Title          = {{VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke in dreidimensionalen virtuellen Umgebungen}},
BookTitle      = {Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019},
Series   = {DHd 2019},
location       = {Frankfurt, Germany},
year           = 2019
}
• W. Hemati, A. Mehler, T. Uslu, and G. Abrami, “Der TextImager als Front- und Backend für das verteilte NLP von Big Digital Humanities Data,” in Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019, 2019.
[Poster][BibTeX]

@InProceedings{Hemati:Mehler:Uslu:Abrami:2019,
Author         = {Hemati, Wahed and Mehler, Alexander and Uslu, Tolga and Abrami, Giuseppe},
Title          = {{Der TextImager als Front- und Backend für das verteilte NLP von Big Digital Humanities Data}},
BookTitle      = {Proceedings of the 6th Digital Humanities Conference in the German-speaking Countries, DHd 2019},
Series         = {DHd 2019},
location       = {Frankfurt, Germany},
year           = 2019
}
• R. Gleim, S. Eger, A. Mehler, T. Uslu, W. Hemati, A. Lücking, A. Henlein, S. Kahlsdorf, and A. Hoenen, “A practitioner’s view: a survey and comparison of lemmatization and morphological tagging in German and Latin,” Journal of Language Modeling, 2019.
[BibTeX]

@article{Gleim:Eger:Mehler:2019,
author    = {Gleim, R\"{u}diger and Eger, Steffen and Mehler, Alexander and Uslu, Tolga and Hemati, Wahed and L\"{u}cking, Andy and Henlein, Alexander and Kahlsdorf, Sven and Hoenen, Armin},
title     = {A practitioner's view: a survey and comparison of lemmatization and morphological tagging in German and Latin},
journal   = {Journal of Language Modeling},
year      = {2019},
doi = {10.15398/jlm.v7i1.205},
url = {http://jlm.ipipan.waw.pl/index.php/JLM/article/view/205}
}

### 2018 (37)

• A. Hoenen, “Multi Modal Distance – An Approach to Stemma Generation With Weighting,” in Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{HOENEN18.285,
author = {Armin Hoenen},
title = "{Multi Modal Distance - An Approach to Stemma Generation With Weighting}",
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url={http://www.lrec-conf.org/proceedings/lrec2018/pdf/285.pdf},
language = {english}
}
• A. Hoenen, “From Manuscripts to Archetypes through Iterative Clustering,” in Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{HOENEN18.314,
author = {Armin Hoenen},
title = "{From Manuscripts to Archetypes through Iterative Clustering}",
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url={http://www.lrec-conf.org/proceedings/lrec2018/pdf/314.pdf},
language = {english}
}
• A. Hoenen and N. Schenk, “Knowing the Author by the Company His Words Keep,” in Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{HOENEN18.349,
author = {Armin Hoenen and Niko Schenk},
title = "{Knowing the Author by the Company His Words Keep}",
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url={http://www.lrec-conf.org/proceedings/lrec2018/pdf/349.pdf},
language = {english}
}
• A. Hoenen, “Attempts at Visualization of Etymological Information,” in Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), Paris, France, 7-12 2018.
[BibTeX]

@InProceedings{HOENEN18.9,
author = {Armin Hoenen},
title = {Attempts at Visualization of Etymological Information},
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year = {2018},
month = {may},
date = {7-12},
location = {Miyazaki, Japan},
editor = {Kernerman, Ilan and Krek, Simon},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-28-3},
language = {english},
url={http://lrec-conf.org/workshops/lrec2018/W33/pdf/book_of_proceedings.pdf}
}
• A. Hoenen, “Tools, evaluation and preprocessing for stemmatology,” Dissertation PhD Thesis, 2018.
[BibTeX]

@phdthesis{Hoenen2018,
type        = {Dissertation},
author      = {Armin Hoenen},
title       = {Tools, evaluation and preprocessing for stemmatology},
school      = {Goethe University Frankfurt},
year        = {2018}
}
• A. Hoenen and L. Samushia, “Principles Aiding in Reading Abbreviations in Old Georgian and Latin,” in DHd 2018, 2018.
[BibTeX]

@InProceedings{Hoenen:Samushia:2018dhd,
Author         = {Hoenen, Armin and Samushia, Lela},
Title          = {{Principles Aiding in Reading Abbreviations in Old Georgian and Latin}},
BookTitle      = {DHd 2018},
year           = 2018
}
• A. Hoenen, “Wikipedia Mention Graphs by Example,” in EADH 2018, 2018.
[BibTeX]

@InProceedings{Hoenen:2018eadh,
Author         = {Hoenen, Armin},
Title          = {{Wikipedia Mention Graphs by Example}},
year           = 2018
}
• A. Hoenen, “Annotated Timelines and Stacked Area Plots for Visualization in Lexicography,” in Elexis workshop at EADH 2018, 2018.
[BibTeX]

@InProceedings{Hoenen:2018elexis,
Author         = {Hoenen, Armin},
Title          = {{Annotated Timelines and Stacked Area Plots for Visualization in Lexicography}},
BookTitle      = {Elexis workshop at EADH 2018},
url            = {https://lexdhai.insight-centre.org/Lex_DH__AI_2018_paper_2.pdf},
year           = 2018
}
• A. Hoenen, “Recurrence Analysis Function, a Dynamic Heatmap for the Visualization of Verse Text and Beyond,” in Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools, Heidelberg University Press, 2018.
[Abstract] [BibTeX]

The Recurrence Analysis Function (ReAF) is a cross-linguistic visualization tool for (historical) verse text, especially handwritten epics. It can also provide a general visualization of various aspects of prose text. It aims to enable intuitive understanding through explorative data analysis of historical, especially bardic-oral texts.1 The assumption behind this is that bardic/born-oral and non-bardic/born-written texts differ drastically in the way they employ repetition. The ReAF in its first implementation, as presented here, is a language-independent tool that permits the visual exploration of such structures. Firstly, general aspects and formal characteristics of oral verse text are characterized, before the main technical details and some additional applications of the ReAF are explained and illustrated. 
@InBook{Hoenen:2018,
Author         = {Hoenen, Armin},
Title          = {Recurrence Analysis Function, a Dynamic Heatmap for the Visualization of Verse Text and Beyond},
BookTitle      = {Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools},
Publisher      = {Heidelberg University Press},
abstract       = {The Recurrence Analysis Function (ReAF) is a cross-linguistic visualization tool for (historical) verse text, especially handwritten epics. It can also provide a general visualization of various aspects of prose text. It aims to enable intuitive understanding through explorative data analysis of historical, especially bardic-oral texts.1 The assumption behind this is that bardic/born-oral and non-bardic/born-written texts differ drastically in the way they employ repetition. The ReAF in its first implementation, as presented here, is a language-independent tool that permits the visual exploration of such structures. Firstly, general aspects and formal characteristics of oral verse text are characterized, before the main technical details and some additional applications of the ReAF are explained and illustrated. },
year           = 2018,
editors        = {Bubenhofer, Noah and Kupietz, Marc},
place= Heidelberg,
}
• T. Lokot, A. Mehler, and O. Abramov, “On the limit value of compactness of some graph classes,” PLOS ONE, vol. 13, iss. 11, pp. 1-8, 2018.
[Abstract] [BibTeX]

In this paper, we study the limit of compactness which is a graph index originally introduced for measuring structural characteristics of hypermedia. Applying compactness to large scale small-world graphs (Mehler, 2008) observed its limit behaviour to be equal 1. The striking question concerning this finding was whether this limit behaviour resulted from the specifics of small-world graphs or was simply an artefact. In this paper, we determine the necessary and sufficient conditions for any sequence of connected graphs resulting in a limit value of CB = 1 which can be generalized with some consideration for the case of disconnected graph classes (Theorem 3). This result can be applied to many well-known classes of connected graphs. Here, we illustrate it by considering four examples. In fact, our proof-theoretical approach allows for quickly obtaining the limit value of compactness for many graph classes sparing computational costs.
@article{Lokot:Mehler:Abramov:2018,
author = {Lokot, Tatiana and Mehler, Alexander and Abramov, Olga},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {On the limit value of compactness of some graph classes},
year = {2018},
month = {11},
volume = {13},
url = {https://doi.org/10.1371/journal.pone.0207536},
pages = {1-8},
abstract = {In this paper, we study the limit of compactness which is a graph index originally introduced for measuring structural characteristics of hypermedia. Applying compactness to large scale small-world graphs (Mehler, 2008) observed its limit behaviour to be equal 1. The striking question concerning this finding was whether this limit behaviour resulted from the specifics of small-world graphs or was simply an artefact. In this paper, we determine the necessary and sufficient conditions for any sequence of connected graphs resulting in a limit value of CB = 1 which can be generalized with some consideration for the case of disconnected graph classes (Theorem 3). This result can be applied to many well-known classes of connected graphs. Here, we illustrate it by considering four examples. In fact, our proof-theoretical approach allows for quickly obtaining the limit value of compactness for many graph classes sparing computational costs.},
number = {11},
doi = {10.1371/journal.pone.0207536}
}
• E. Rutherford, W. Hemati, and A. Mehler, “Corpus2Wiki: A MediaWiki based Annotation & Visualisation Tool for the Digital Humanities,” in INF-DH-2018, Bonn, 2018.
[BibTeX]

@inproceedings{Rutherford:et:al:2018,
author = {Rutherford, Eleanor AND Hemati, Wahed AND Mehler, Alexander},
title = {{Corpus2Wiki}: A MediaWiki based Annotation \& Visualisation Tool for the Digital Humanities},
booktitle = {INF-DH-2018},
year = {2018},
editor = {Burghardt, Manuel AND Müller-Birn, Claudia},
publisher = {Gesellschaft für Informatik e.V.},
}
• G. Abrami, A. Mehler, P. Helfrich, and E. Rieb, “TextAnnotator: A Browser-based Framework for Annotating Textual Data in Digital Humanities,” in Proceedings of the Digital Humanities Austria 2018, 2018.
[BibTeX]

@InProceedings{Abrami:et:al:2018,
Author = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb},
Title = {{TextAnnotator}: A Browser-based Framework for Annotating Textual Data in Digital Humanities},
BookTitle = {Proceedings of the Digital Humanities Austria 2018},
location = {Salzburg, Austria},
year = 2018
}
• S. Ahmed and A. Mehler, “Resource-Size matters: Improving Neural Named Entity Recognition with Optimized Large Corpora,” in Proceedings of the 17th IEEE International Conference on Machine Learning and Applications (ICMLA), 2018.
[Abstract] [BibTeX]

This study improves the performance of neural named entity recognition by a margin of up to 11% in terms of F-score on the example of a low-resource language like German, thereby outperforming existing baselines and establishing a new state-of-the-art on each single open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank 2018). Rather than designing deeper and wider hybrid neural architectures, we gather all available resources and perform a detailed optimization and grammar-dependent morphological processing consisting of lemmatization and part-of-speech tagging prior to exposing the raw data to any training process. We test our approach in a threefold monolingual experimental setup of a) single, b) joint, and c) optimized training and shed light on the dependency of downstream-tasks on the size of corpora used to compute word embeddings.
@InProceedings{Ahmed:Mehler:2018,
author = {Sajawel Ahmed and Alexander Mehler},
title = {{Resource-Size matters: Improving Neural Named Entity Recognition with Optimized Large Corpora}},
abstract = {This study improves the performance of neural named entity recognition by a margin of up to 11% in terms of F-score on the example of a low-resource language like German, thereby outperforming existing baselines and establishing a new state-of-the-art on each single open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank 2018). Rather than designing deeper and wider hybrid neural architectures, we gather all available resources and perform a detailed optimization and grammar-dependent morphological processing consisting of lemmatization and part-of-speech tagging prior to exposing the raw data to any training process. We test our approach in a threefold monolingual experimental setup of a) single, b) joint, and c) optimized training and shed light on the dependency of downstream-tasks on the size of corpora used to compute word embeddings.},
booktitle = {Proceedings of the 17th IEEE International Conference on Machine Learning and Applications (ICMLA)},
location = {Orlando, Florida, USA},
pdf = {https://arxiv.org/pdf/1807.10675.pdf},
year = 2018
}
• C. Weiland, C. Driller, M. Koch, M. Schmidt, G. Abrami, S. Ahmed, A. Mehler, A. Pachzelt, G. Kasperek, A. Hausinger, and T. Hörnschemeyer, “BioFID, a platform to enhance accessibility of biodiversity data,” in Proceedings of the 10th International Conference on Ecological Informatics, 2018.
[BibTeX]

@inproceedings{Weiland:et:al:2018,
author = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
title = {{BioFID}, a platform to enhance accessibility of biodiversity data},
BookTitle = {Proceedings of the 10th International Conference on Ecological Informatics},
year = {2018},
location = {Jena, Germany}
}
• A. Kett, G. Abrami, A. Mehler, and C. Spiekermann, “Resources2City Explorer: A System for Generating Interactive Walkable Virtual Cities out of File Systems,” in Proceedings of the 31st ACM User Interface Software and Technology Symposium, 2018.
[Abstract] [Poster][BibTeX]

We present Resources2City Explorer (R2CE), a tool for representing file systems as interactive, walkable virtual cities. R2CE visualizes file systems based on concepts of spatial, 3D information processing. For this purpose, it extends the range of functions of conventional file browsers considerably. Visual elements in a city generated by R2CE represent (relations of) objects of the underlying file system. The paper describes the functional spectrum of R2CE and illustrates it by visualizing a sample of 940 files.
@InProceedings{Kett:et:al:2018,
Author = {Attila Kett and Giuseppe Abrami and Alexander Mehler and Christian Spiekermann},
Title = {{Resources2City Explorer}: A System for Generating Interactive Walkable Virtual Cities out of File Systems},
BookTitle = {Proceedings of the 31st ACM User Interface Software and Technology Symposium},
location = {Berlin, Germany},
abstract = {We present Resources2City Explorer (R2CE), a tool for representing file systems as interactive, walkable virtual cities. R2CE visualizes file systems based on concepts of spatial, 3D information processing. For this purpose, it extends the range of functions of conventional file browsers considerably. Visual elements in a city generated by R2CE represent (relations of) objects of the underlying file system. The paper describes the functional spectrum of R2CE and illustrates it by visualizing a sample of 940 files.},
year = 2018
}
• A. Lücking, “Witness-loaded and Witness-free Demonstratives,” in Atypical Demonstratives, M. Coniglio, A. Murphy, E. Schlachter, and T. Veenstra, Eds., De Gruyter, 2018.
[BibTeX]

@InCollection{Luecking:2018:a,
author =     {Andy L\"{u}cking},
title =     {Witness-loaded and Witness-free Demonstratives},
booktitle =     {Atypical Demonstratives},
publisher =     {De Gruyter},
year =     2018,
editor =     {Marco Coniglio and Andrew Murphy and Eva Schlachter
and Tonjes Veenstra},
isbn =     {978-3-11-056029-9},
url={https://www.degruyter.com/view/product/495228},

}
• A. Lücking and J. Ginzburg, “Most people but not Bill’: integrating sets, individuals and negation into a cognitively plausible account of noun phrase interpretation,” in Proceedings of Cognitive Structures: Linguistic, Philosophical and Psychological Perspectives, 2018.
[BibTeX]

@InProceedings{Luecking:Ginzburg:2018,
title =        {Most people but not {Bill}': integrating sets,
individuals and negation into a cognitively
plausible account of noun phrase interpretation},
booktitle =    {Proceedings of Cognitive Structures: Linguistic,
Philosophical and Psychological Perspectives},
series =       {CoSt'18},
author =       {L\"{u}cking, Andy and Ginzburg, Jonathan},
year =         2018
}
• T. Uslu and A. Mehler, “PolyViz: a Visualization System for a Special Kind of Multipartite Graphs,” in Proceedings of the IEEE VIS 2018, 2018.
[BibTeX]

@InProceedings{Uslu:Mehler:2018,
Author = {Tolga Uslu and Alexander Mehler},
Title = {{PolyViz}: a Visualization System for a Special Kind of Multipartite Graphs},
BookTitle = {Proceedings of the IEEE VIS 2018},
Series = {IEEE VIS 2018},
location = {Berlin, Germany},
year = 2018
}
• D. Baumartz, T. Uslu, and A. Mehler, “LTV: Labeled Topic Vector,” in Proceedings of COLING 2018, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26, Santa Fe, New Mexico, USA, 2018.
[Abstract] [BibTeX]

In this paper, we present LTV, a website and an API that generate labeled topic classifications based on the Dewey Decimal Classification (DDC), an international standard for topic classification in libraries. We introduce nnDDC, a largely language-independent neural network-based classifier for DDC-related topic classification, which we optimized using a wide range of linguistic features to achieve an F-score of 87.4%. To show that our approach is language-independent, we evaluate nnDDC using up to 40 different languages. We derive a topic model based on nnDDC, which generates probability distributions over semantic units for any input on sense-, word- and text-level. Unlike related approaches, however, these probabilities are estimated by means of nnDDC so that each dimension of the resulting vector representation is uniquely labeled by a DDC class. In this way, we introduce a neural network-based Classifier-Induced Semantic Space (nnCISS).
@InProceedings{Baumartz:Uslu:Mehler:2018,
author    = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
title     = {{LTV}: Labeled Topic Vector},
booktitle = {Proceedings of {COLING 2018}, the 27th International Conference on Computational Linguistics: System Demonstrations, August 20-26},
year      = {2018},
address   = {Santa Fe, New Mexico, USA},
publisher = {The COLING 2018 Organizing Committee},
abstract  = {In this paper, we present LTV, a website and an API that generate labeled topic classifications based on the Dewey Decimal Classification (DDC), an international standard for topic classification in libraries. We introduce nnDDC, a largely language-independent neural network-based classifier for DDC-related topic classification, which we optimized using a wide range of linguistic features to achieve an F-score of 87.4%. To show that our approach is language-independent, we evaluate nnDDC using up to 40 different languages. We derive a topic model based on nnDDC, which generates probability distributions over semantic units for any input on sense-, word- and text-level. Unlike related approaches, however, these probabilities are estimated by means of nnDDC so that each dimension of the resulting vector representation is uniquely labeled by a DDC class. In this way, we introduce a neural network-based Classifier-Induced Semantic Space (nnCISS).},
}
• C. Driller, M. Koch, M. Schmidt, C. Weiland, T. Hörnschemeyer, T. Hickler, G. Abrami, S. Ahmed, R. Gleim, W. Hemati, T. Uslu, A. Mehler, A. Pachzelt, J. Rexhepi, T. Risse, J. Schuster, G. Kasperek, and A. Hausinger, “Workflow and Current Achievements of BIOfid, an Information Service Mobilizing Biodiversity Data from Literature Sources,” Biodiversity Information Science and Standards, vol. 2, p. e25876, 2018.
[Abstract] [BibTeX]

BIOfid is a specialized information service currently being developed to mobilize biodiversity data dormant in printed historical and modern literature and to offer a platform for open access journals on the science of biodiversity. Our team of librarians, computer scientists and biologists produce high-quality text digitizations, develop new text-mining tools and generate detailed ontologies enabling semantic text analysis and semantic search by means of user-specific queries. In a pilot project we focus on German publications on the distribution and ecology of vascular plants, birds, moths and butterflies extending back to the Linnaeus period about 250 years ago. The three organism groups have been selected according to current demands of the relevant research community in Germany. The text corpus defined for this purpose comprises over 400 volumes with more than 100,000 pages to be digitized and will be complemented by journals from other digitization projects, copyright-free and project-related literature. With TextImager (Natural Language Processing & Text Visualization) and TextAnnotator (Discourse Semantic Annotation) we have already extended and launched tools that focus on the text-analytical section of our project. Furthermore, taxonomic and anatomical ontologies elaborated by us for the taxa prioritized by the project’s target group - German institutions and scientists active in biodiversity research - are constantly improved and expanded to maximize scientific data output. Our poster describes the general workflow of our project ranging from literature acquisition via software development, to data availability on the BIOfid web portal (http://biofid.de/), and the implementation into existing platforms which serve to promote global accessibility of biodiversity data.
@article{Driller:et:al:2018,
author = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster and Gerwin Kasperek and Angela Hausinger},
title = {Workflow and Current Achievements of BIOfid, an Information Service Mobilizing Biodiversity Data from Literature Sources},
volume = {2},
number = {},
year = {2018},
doi = {10.3897/biss.2.25876},
publisher = {Pensoft Publishers},
abstract = {BIOfid is a specialized information service currently being developed to mobilize biodiversity data dormant in printed historical and modern literature and to offer a platform for open access journals on the science of biodiversity. Our team of librarians, computer scientists and biologists produce high-quality text digitizations, develop new text-mining tools and generate detailed ontologies enabling semantic text analysis and semantic search by means of user-specific queries. In a pilot project we focus on German publications on the distribution and ecology of vascular plants, birds, moths and butterflies extending back to the Linnaeus period about 250 years ago. The three organism groups have been selected according to current demands of the relevant research community in Germany. The text corpus defined for this purpose comprises over 400 volumes with more than 100,000 pages to be digitized and will be complemented by journals from other digitization projects, copyright-free and project-related literature. With TextImager (Natural Language Processing & Text Visualization) and TextAnnotator (Discourse Semantic Annotation) we have already extended and launched tools that focus on the text-analytical section of our project. Furthermore, taxonomic and anatomical ontologies elaborated by us for the taxa prioritized by the project’s target group - German institutions and scientists active in biodiversity research - are constantly improved and expanded to maximize scientific data output. Our poster describes the general workflow of our project ranging from literature acquisition via software development, to data availability on the BIOfid web portal (http://biofid.de/), and the implementation into existing platforms which serve to promote global accessibility of biodiversity data.},
issn = {},
pages = {e25876},
URL = {https://doi.org/10.3897/biss.2.25876},
eprint = {https://doi.org/10.3897/biss.2.25876},
journal = {Biodiversity Information Science and Standards}
}
• A. Mehler, G. Abrami, C. Spiekermann, and M. Jostock, “VAnnotatoR: A Framework for Generating Multimodal Hypertexts,” in Proceedings of the 29th ACM Conference on Hypertext and Social Media, New York, NY, USA, 2018.
[BibTeX]

@InProceedings{Mehler:Abrami:Spiekermann:Jostock:2018,
author = {Mehler, Alexander and Abrami, Giuseppe and Spiekermann, Christian and Jostock, Matthias},
title = {{VAnnotatoR}: {A} Framework for Generating Multimodal Hypertexts},
booktitle = {Proceedings of the 29th ACM Conference on Hypertext and Social Media},
series = {Proceedings of the 29th ACM Conference on Hypertext and Social Media (HT '18)},
year = {2018},
location = {Baltimore, Maryland},
publisher = {ACM},
address = {New York, NY, USA},
pdf = {http://delivery.acm.org/10.1145/3210000/3209572/p150-mehler.pdf}
}
• W. Hemati, A. Mehler, T. Uslu, D. Baumartz, and G. Abrami, “Evaluating and Integrating Databases in the Area of NLP,” in International Quantitative Linguistics Conference (QUALICO 2018), 2018.
[Poster][BibTeX]

@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
author={Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz and Giuseppe Abrami},
title={Evaluating and Integrating Databases in the Area of {NLP}},
booktitle={International Quantitative Linguistics Conference (QUALICO 2018)},
year={2018},
location={Wroclaw, Poland}
}
• G. Abrami, G. Boden, and L. Gleiß, “World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data by means of a digital ontology based on OWLnotator,” in Proceedings of the Digital Humanities 2018, 2018.
[BibTeX]

@InProceedings{Abrami:Boden:Gleiss:2018,
Author         = {Abrami, Giuseppe and Boden, Gertrud and Glei\ss{},
Lisa},
Title          = {{World of the Khwe Bushmen: Accessing Khwe Cultural
Heritage data by means of a digital ontology based on
OWLnotator}},
BookTitle      = {Proceedings of the Digital Humanities 2018},
Series         = {DH2018},
location       = {Mexico City, Mexico},
year           = 2018
}
• A. Mehler, W. Hemati, R. Gleim, and D. Baumartz, “VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte interaktive evolutionäre Verarbeitung natürlicher Sprache,” in Forschungsinfrastrukturen und digitale Informationssysteme in der germanistischen Sprachwissenschaft , H. Lobin, R. Schneider, and A. Witt, Eds., Berlin: De Gruyter, 2018, vol. 6.
[BibTeX]

@InCollection{Mehler:Hemati:Gleim:Baumartz:2018,
Author         = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim
and Daniel Baumartz},
Title          = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
interaktive evolutionäre Verarbeitung natürlicher
Sprache}},
BookTitle      = {Forschungsinfrastrukturen und digitale
Informationssysteme in der germanistischen
Sprachwissenschaft },
Publisher      = {De Gruyter},
Editor         = {Henning Lobin and Roman Schneider and Andreas Witt},
Volume         = {6},
year           = 2018
}
• A. Mehler, W. Hemati, T. Uslu, and A. Lücking, “A Multidimensional Model of Syntactic Dependency Trees for Authorship Attribution,” in Quantitative analysis of dependency structures, J. Jiang and H. Liu, Eds., Berlin/New York: De Gruyter, 2018.
[Abstract] [BibTeX]

Abstract: In this chapter we introduce a multidimensional model of syntactic dependency trees. Our ultimate goal is to generate fingerprints of such trees to predict the author of the underlying sentences. The chapter makes a first attempt to create such fingerprints for sentence categorization via the detour of text categorization. We show that at text level, aggregated dependency structures actually provide information about authorship. At the same time, we show that this does not hold for topic detection. We evaluate our model using a quarter of a million sentences collected in two corpora: the first is sampled from literary texts, the second from Wikipedia articles. As a second finding of our approach, we show that quantitative models of dependency structure do not yet allow for detecting syntactic alignment in written communication. We conclude that this is mainly due to effects of lexical alignment on syntactic alignment.
@InCollection{Mehler:Hemati:Uslu:Luecking:2018,
Author         = {Alexander Mehler and Wahed Hemati and Tolga Uslu and
Andy Lücking},
Title          = {A Multidimensional Model of Syntactic Dependency Trees
BookTitle      = {Quantitative analysis of dependency structures},
Publisher      = {De Gruyter},
Editor         = {Jingyang Jiang and Haitao Liu},
abstract       = {Abstract: In this chapter we introduce a
multidimensional model of syntactic dependency trees.
Our ultimate goal is to generate fingerprints of such
trees to predict the author of the underlying
sentences. The chapter makes a first attempt to create
such fingerprints for sentence categorization via the
detour of text categorization. We show that at text
level, aggregated dependency structures actually
provide information about authorship. At the same time,
we show that this does not hold for topic detection. We
evaluate our model using a quarter of a million
sentences collected in two corpora: the first is
sampled from literary texts, the second from Wikipedia
articles. As a second finding of our approach, we show
that quantitative models of dependency structure do not
yet allow for detecting syntactic alignment in written
communication. We conclude that this is mainly due to
effects of lexical alignment on syntactic alignment.},
keywords       = {Dependency structure, Authorship attribution, Text
categorization, Syntactic Alignment},
year           = 2018
}
• T. Uslu, A. Mehler, and D. Meyer, “LitViz: Visualizing Literary Data by Means of text2voronoi,” in Proceedings of the Digital Humanities 2018, 2018.
[BibTeX]

@InProceedings{Uslu:Mehler:Meyer:2018,
Author         = {Tolga Uslu and Alexander Mehler and Dirk Meyer},
Title          = {{{LitViz}: Visualizing Literary Data by Means of
text2voronoi}},
BookTitle      = {Proceedings of the Digital Humanities 2018},
Series         = {DH2018},
location       = {Mexico City, Mexico},
year           = 2018
}
• C. Spiekermann, G. Abrami, and A. Mehler, “VAnnotatoR: a Gesture-driven Annotation Framework for Linguistic and Multimodal Annotation,” in Proceedings of the Annotation, Recognition and Evaluation of Actions (AREA 2018) Workshop, 2018.
[BibTeX]

@InProceedings{Spiekerman:Abrami:Mehler:2018,
Author         = {Christian Spiekermann and Giuseppe Abrami and
Alexander Mehler},
Title          = {{VAnnotatoR}: a Gesture-driven Annotation Framework
for Linguistic and Multimodal Annotation},
BookTitle      = {Proceedings of the Annotation, Recognition and
Evaluation of Actions (AREA 2018) Workshop},
Series         = {AREA},
location       = {Miyazaki, Japan},
year           = 2018
}
• T. Uslu, L. Miebach, S. Wolfsgruber, M. Wagner, K. Fließbach, R. Gleim, W. Hemati, A. Henlein, and A. Mehler, “Automatic Classification in Memory Clinic Patients and in Depressive Patients,” in Proceedings of Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric impairments (RaPID-2), 2018.
[BibTeX]

@InProceedings{Uslu:et:al:2018:a,
Author         = {Tolga Uslu and Lisa Miebach and Steffen Wolfsgruber
and Michael Wagner and Klaus Fließbach and Rüdiger
Gleim and Wahed Hemati and Alexander Henlein and
Alexander Mehler},
Title          = {{Automatic Classification in Memory Clinic Patients
and in Depressive Patients}},
BookTitle      = {Proceedings of Resources and ProcessIng of linguistic,
para-linguistic and extra-linguistic Data from people
with various forms of cognitive/psychiatric impairments
(RaPID-2)},
Series         = {RaPID},
location       = {Miyazaki, Japan},
year           = 2018
}
• A. Mehler, R. Gleim, A. Lücking, T. Uslu, and C. Stegbauer, “On the Self-similarity of Wikipedia Talks: a Combined Discourse-analytical and Quantitative Approach,” Glottometrics, vol. 40, pp. 1-44, 2018.
[BibTeX]

@Article{Mehler:Gleim:Luecking:Uslu:Stegbauer:2018,
Author         = {Alexander Mehler and Rüdiger Gleim and Andy Lücking
and Tolga Uslu and Christian Stegbauer},
Title          = {On the Self-similarity of {Wikipedia} Talks: a
Combined Discourse-analytical and Quantitative Approach},
Journal        = {Glottometrics},
Volume         = {40},
Pages          = {1-44},
year           = 2018
}
• T. Uslu, A. Mehler, A. Niekler, and D. Baumartz, “Towards a DDC-based Topic Network Model of Wikipedia,” in Proceedings of 2nd International Workshop on Modeling, Analysis, and Management of Social Networks and their Applications (SOCNET 2018), February 28, 2018, 2018.
[BibTeX]

@InProceedings{Uslu:Mehler:Niekler:Baumartz:2018,
Author         = {Tolga Uslu and Alexander Mehler and Andreas Niekler
and Daniel Baumartz},
Title          = {Towards a {DDC}-based Topic Network Model of Wikipedia},
BookTitle      = {Proceedings of 2nd International Workshop on Modeling,
Analysis, and Management of Social Networks and their
Applications (SOCNET 2018), February 28, 2018},
year           = 2018
}
• T. Uslu, A. Mehler, D. Baumartz, A. Henlein, and W. Hemati, “fastSense: An Efficient Word Sense Disambiguation Classifier,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{Uslu:et:al:2018,
Author         = {Tolga Uslu and Alexander Mehler and Daniel Baumartz
and Alexander Henlein and Wahed Hemati },
Title          = {fastSense: An Efficient Word Sense Disambiguation
Classifier},
BookTitle      = {Proceedings of the 11th edition of the Language
Resources and Evaluation Conference, May 7 - 12},
Series         = {LREC 2018},
year           = 2018
}
• R. Gleim, A. Mehler, and S. Y. Song, “WikiDragon: A Java Framework For Diachronic Content And Network Analysis Of MediaWikis,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{Gleim:Mehler:Song:2018,
Author         = {R{\"u}diger Gleim and Alexander Mehler and Sung Y.
Song},
Title          = {WikiDragon: A Java Framework For Diachronic Content
And Network Analysis Of MediaWikis},
BookTitle      = {Proceedings of the 11th edition of the Language
Resources and Evaluation Conference, May 7 - 12},
Series         = {LREC 2018},
year           = 2018
}
• P. Helfrich, E. Rieb, G. Abrami, A. Lücking, and A. Mehler, “TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{Helfrich:et:al:2018,
Author         = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami
and Andy L{\"u}cking and Alexander Mehler},
Title          = {TreeAnnotator: Versatile Visual Annotation of
Hierarchical Text Relations},
BookTitle      = {Proceedings of the 11th edition of the Language
Resources and Evaluation Conference, May 7 - 12},
Series         = {LREC 2018},
year           = 2018
}
• G. Abrami and A. Mehler, “A UIMA Database Interface for Managing NLP-related Text Annotations,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]

@InProceedings{Abrami:Mehler:2018,
Author         = {Giuseppe Abrami and Alexander Mehler},
Title          = {A UIMA Database Interface for Managing NLP-related
Text Annotations},
BookTitle      = {Proceedings of the 11th edition of the Language
Resources and Evaluation Conference, May 7 - 12},
Series         = {LREC 2018},
year           = 2018
}
• A. Mehler, C. Stegbauer, and B. Frank-Job, “Ferdinand de Saussure. 1916. Cours de linguistique générale. Payot, Lausanne/Paris,” in Schlüsselwerke der Netzwerkforschung, C. Stegbauer and B. Holzer, Eds., Wiesbaden: Springer VS, 2018.
[BibTeX]

@InBook{Mehler:Stegbauer:Frank-Job:2018,
Author         = {Alexander Mehler and Christian Stegbauer and Barbara
Frank-Job},
Editor         = {Christian Stegbauer and Boris Holzer},
Title          = {{Ferdinand de Saussure. 1916. Cours de linguistique
générale. Payot, Lausanne/Paris}},
Publisher      = {Springer VS},
booktitle      = {Schlüsselwerke der Netzwerkforschung},
year           = 2018
}
• A. Mehler, O. Zlatkin-Troitschanskaia, W. Hemati, D. Molerov, A. Lücking, and S. Schmidt, “Integrating Computational Linguistic Analysis of Multilingual Learning Data and Educational Measurement Approaches to Explore Learning in Higher Education,” in Positive Learning in the Age of Information: A Blessing or a Curse?, O. Zlatkin-Troitschanskaia, G. Wittum, and A. Dengel, Eds., Wiesbaden: Springer Fachmedien Wiesbaden, 2018, pp. 145-193.
[Abstract] [BibTeX]

This chapter develops a computational linguistic model for analyzing and comparing multilingual data as well as its application to a large body of standardized assessment data from higher education. The approach employs both an automatic and a manual annotation of the data on several linguistic layers (including parts of speech, text structure and content). Quantitative features of the textual data are explored that are related to both the students' (domain-specific knowledge) test results and their level of academic experience. The respective analysis involves statistics of distance correlation, text categorization with respect to text types (questions and response options) as well as languages (English and German), and network analysis to assess dependencies between features. The correlation between correct test results of students and linguistic features of the verbal presentations of tests indicate to what extent language influences higher education test performance. It has also been found that this influence relates to specialized language. Thus, this integrative modeling approach contributes a test basis for a large-scale analysis of learning data and points to a number of subsequent, more detailed research questions.
@inbook{Mehler:et:al:2018,
abstract = "This chapter develops a computational linguistic model for analyzing and comparing multilingual data as well as its application to a large body of standardized assessment data from higher education. The approach employs both an automatic and a manual annotation of the data on several linguistic layers (including parts of speech, text structure and content). Quantitative features of the textual data are explored that are related to both the students' (domain-specific knowledge) test results and their level of academic experience. The respective analysis involves statistics of distance correlation, text categorization with respect to text types (questions and response options) as well as languages (English and German), and network analysis to assess dependencies between features. The correlation between correct test results of students and linguistic features of the verbal presentations of tests indicate to what extent language influences higher education test performance. It has also been found that this influence relates to specialized language. Thus, this integrative modeling approach contributes a test basis for a large-scale analysis of learning data and points to a number of subsequent, more detailed research questions.",
author = "Mehler, Alexander and Zlatkin-Troitschanskaia, Olga and Hemati, Wahed and Molerov, Dimitri and L{\"u}cking, Andy and Schmidt, Susanne",
booktitle = "Positive Learning in the Age of Information: A Blessing or a Curse?",
doi = "10.1007/978-3-658-19567-0_10",
editor = "Zlatkin-Troitschanskaia, Olga and Wittum, Gabriel and Dengel, Andreas",
isbn = "978-3-658-19567-0",
pages = "145--193",
title = "Integrating Computational Linguistic Analysis of Multilingual Learning Data and Educational Measurement Approaches to Explore Learning in Higher Education",
url = "https://doi.org/10.1007/978-3-658-19567-0_10",
year = "2018"
}
• G. Abrami, S. Ahmed, R. Gleim, W. Hemati, A. Mehler, and U. Tolga, Natural Language Processing and Text Mining for BIOfid, 2018.
[BibTeX]

@misc{Abrami:et:al:2018b,
author = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed and Mehler, Alexander and Uslu Tolga},
title = {{Natural Language Processing and Text Mining for BIOfid}},
howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
adress = {Goethe-University, Frankfurt am Main, Germany},
year = {2018},
month = {March},
day = {08},
pdf = {}
}

### 2017 (10)

• A. Mehler and A. Lücking, “Modelle sozialer Netzwerke und Natural Language Processing: eine methodologische Randnotiz,” Soziologie, vol. 46, iss. 1, pp. 43-47, 2017.
[BibTeX]

@Article{Mehler:Luecking:2017,
Author         = {Alexander Mehler and Andy Lücking},
Title          = {Modelle sozialer Netzwerke und Natural Language
Processing: eine methodologische Randnotiz},
Journal        = {Soziologie},
Volume         = {46},
Number         = {1},
Pages          = {43-47},
year           = 2017
}
• W. Hemati, A. Mehler, and T. Uslu, “CRFVoter: Chemical Entity Mention, Gene and Protein Related Object recognition using a conglomerate of CRF based tools,” in BioCreative V.5. Proceedings, 2017.
[BibTeX]

@InProceedings{Hemati:Mehler:Uslu:2017,
Author         = {Wahed Hemati and Alexander Mehler and Tolga Uslu},
Title          = {{CRFVoter}: Chemical Entity Mention, Gene and Protein
Related Object recognition using a conglomerate of CRF
based tools},
BookTitle      = {BioCreative V.5. Proceedings},
year           = 2017
}
• W. Hemati, T. Uslu, and A. Mehler, “TextImager as an interface to BeCalm,” in BioCreative V.5. Proceedings, 2017.
[BibTeX]

@InProceedings{Hemati:Uslu:Mehler:2017,
Author         = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
Title          = {{TextImager} as an interface to {BeCalm}},
BookTitle      = {BioCreative V.5. Proceedings},
year           = 2017
}
• A. Mehler, G. Abrami, S. Bruendel, L. Felder, T. Ostertag, and C. Spiekermann, “Stolperwege: An App for a Digital Public History of the Holocaust,” in Proceedings of the 28th ACM Conference on Hypertext and Social Media, New York, NY, USA, 2017, pp. 319-320.
[Abstract] [Poster][BibTeX]

We present the Stolperwege app, a web-based framework for ubiquitous modeling of historical processes. Starting from the art project Stolpersteine of Gunter Demnig, it allows for virtually connecting these stumbling blocks with information about the biographies of victims of Nazism. According to the practice of public history, the aim of Stolperwege is to deepen public knowledge of the Holocaust in the context of our everyday environment. Stolperwege uses an information model that allows for modeling social networks of agents starting from information about portions of their life. The paper exemplifies how Stolperwege is informationally enriched by means of historical maps and 3D animations of (historical) buildings.
@InProceedings{Mehler:et:al:2017:a,
Author         = {Alexander Mehler and Giuseppe Abrami and Steffen
Bruendel and Lisa Felder and Thomas Ostertag and
Christian Spiekermann},
Title          = {{Stolperwege:} An App for a Digital Public History of
the {Holocaust}},
BookTitle      = {Proceedings of the 28th ACM Conference on Hypertext
and Social Media},
Series         = {HT '17},
Pages          = {319--320},
Address        = {New York, NY, USA},
Publisher      = {ACM},
abstract       = {We present the Stolperwege app, a web-based framework
for ubiquitous modeling of historical processes.
Starting from the art project Stolpersteine of
Gunter Demnig, it allows for virtually connecting these
stumbling blocks with information about the biographies
of victims of Nazism. According to the practice of
public history, the aim of Stolperwege is to
deepen public knowledge of the Holocaust in the context
of our everyday environment. Stolperwege uses an
information model that allows for modeling social
networks of agents starting from information about
portions of their life. The paper exemplifies how
Stolperwege is informationally enriched by means of
historical maps and 3D animations of (historical)
buildings.},
acmid          = {3078748},
doi            = {10.1145/3078714.3078748},
isbn           = {978-1-4503-4708-2},
keywords       = {3d, geocaching, geotagging, historical maps,
historical processes, public history of the holocaust,
ubiquitous computing},
location       = {Prague, Czech Republic},
numpages       = {2},
url            = {http://doi.acm.org/10.1145/3078714.3078748},
year           = 2017
}
• A. Mehler, R. Gleim, W. Hemati, and T. Uslu, “Skalenfreie online soziale Lexika am Beispiel von Wiktionary,” in Proceedings of 53rd Annual Conference of the Institut für Deutsche Sprache (IDS), March 14-16, Mannheim, Germany, Berlin, 2017. In German. Title translates into: Scale-free online-social Lexika by Example of Wiktionary
[Abstract] [BibTeX]

In English: The paper deals with characteristics of the structural, thematic and participatory dynamics of collaboratively generated lexical networks. This is done by example of Wiktionary. Starting from a network-theoretical model in terms of so-called multi-layer networks, we describe Wiktionary as a scale-free lexicon. Systems of this sort are characterized by the fact that their content-related dynamics is determined by the underlying dynamics of collaborating authors. This happens in a way that social structure imprints on content structure. According to this conception, the unequal distribution of the activities of authors results in a correspondingly unequal distribution of the information units documented within the lexicon. The paper focuses on foundations for describing such systems starting from a parameter space which requires to deal with Wiktionary as an issue in big data analysis.  In German: Der Beitrag thematisiert Eigenschaften der strukturellen, thematischen und partizipativen Dynamik kollaborativ erzeugter lexikalischer Netzwerke am Beispiel von Wiktionary. Ausgehend von einem netzwerktheoretischen Modell in Form so genannter Mehrebenennetzwerke wird Wiktionary als ein skalenfreies Lexikon beschrieben. Systeme dieser Art zeichnen sich dadurch aus, dass ihre inhaltliche Dynamik durch die zugrundeliegende Kollaborationsdynamik bestimmt wird, und zwar so, dass sich die soziale Struktur der entsprechenden inhaltlichen Struktur aufprägt. Dieser Auffassung gemäß führt die Ungleichverteilung der Aktivitäten von Lexikonproduzenten zu einer analogen Ungleichverteilung der im Lexikon dokumentierten Informationseinheiten. Der Beitrag thematisiert Grundlagen zur Beschreibung solcher Systeme ausgehend von einem Parameterraum, welcher die netzwerkanalytische Betrachtung von Wiktionary als Big-Data-Problem darstellt.
@InProceedings{Mehler:Gleim:Hemati:Uslu:2017,
Author         = {Alexander Mehler and Rüdiger Gleim and Wahed Hemati
and Tolga Uslu},
Title          = {{Skalenfreie online soziale Lexika am Beispiel von
Wiktionary}},
BookTitle      = {Proceedings of 53rd Annual Conference of the Institut
für Deutsche Sprache (IDS), March 14-16, Mannheim,
Germany},
Editor         = {Stefan Engelberg and Henning Lobin and Kathrin Steyer
and Sascha Wolfer},
Publisher      = {De Gruyter},
Note           = {In German. Title translates into: Scale-free
online-social Lexika by Example of Wiktionary},
abstract       = {In English: The paper deals with characteristics of
the structural, thematic and participatory dynamics of
collaboratively generated lexical networks. This is
done by example of Wiktionary. Starting from a
network-theoretical model in terms of so-called
multi-layer networks, we describe Wiktionary as a
scale-free lexicon. Systems of this sort are
characterized by the fact that their content-related
dynamics is determined by the underlying dynamics of
collaborating authors. This happens in a way that
social structure imprints on content structure.
According to this conception, the unequal distribution
of the activities of authors results in a
correspondingly unequal distribution of the information
units documented within the lexicon. The paper focuses
on foundations for describing such systems starting
from a parameter space which requires to deal with
Wiktionary as an issue in big data analysis.
In German:
Der Beitrag thematisiert Eigenschaften der
strukturellen, thematischen und partizipativen Dynamik
kollaborativ erzeugter lexikalischer Netzwerke am
Beispiel von Wiktionary. Ausgehend von einem
netzwerktheoretischen Modell in Form so genannter
Mehrebenennetzwerke wird Wiktionary als ein
skalenfreies Lexikon beschrieben. Systeme dieser Art
zeichnen sich dadurch aus, dass ihre inhaltliche
Dynamik durch die zugrundeliegende
Kollaborationsdynamik bestimmt wird, und zwar so, dass
sich die soziale Struktur der entsprechenden
inhaltlichen Struktur aufprägt. Dieser Auffassung
gemäß führt die Ungleichverteilung der Aktivitäten
von Lexikonproduzenten zu einer analogen
Ungleichverteilung der im Lexikon dokumentierten
Informationseinheiten. Der Beitrag thematisiert
Grundlagen zur Beschreibung solcher Systeme ausgehend
von einem Parameterraum, welcher die
netzwerkanalytische Betrachtung von Wiktionary als
Big-Data-Problem darstellt.},
year           = 2017
}
• A. Hoenen, S. Eger, and R. Gehrke, “How Many Stemmata with Root Degree k?,” in Proceedings of the 15th Meeting on the Mathematics of Language, 2017, pp. 11-21.
[BibTeX]

@InProceedings{Hoenen:Eger:Gehrke:2017,
Author         = {Hoenen, Armin and Eger, Steffen and Gehrke, Ralf},
Title          = {{How Many Stemmata with Root Degree k?}},
BookTitle      = {Proceedings of the 15th Meeting on the Mathematics of
Language},
Pages          = {11--21},
Publisher      = {Association for Computational Linguistics},
location       = {London, UK},
url            = {http://aclweb.org/anthology/W17-3402},
year           = 2017
}
• A. Hoenen, “Using Word Embeddings for Computing Distances Between Texts and for Authorship Attribution,” in International Conference on Applications of Natural Language to Information Systems, 2017, pp. 274-277.
[BibTeX]

@InProceedings{Hoenen:2017:b,
Author         = {Hoenen, Armin},
Title          = {{Using Word Embeddings for Computing Distances Between
BookTitle      = {International Conference on Applications of Natural
Language to Information Systems},
Pages          = {274--277},
Organization   = {Springer},
year           = 2017
}
• T. Uslu, W. Hemati, A. Mehler, and D. Baumartz, “TextImager as a Generic Interface to R,” in Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2017), 2017.
[BibTeX]

@InProceedings{Uslu:Hemati:Mehler:Baumartz:2017,
Author         = {Tolga Uslu and Wahed Hemati and Alexander Mehler and
Daniel Baumartz},
Title          = {{TextImager} as a Generic Interface to {R}},
BookTitle      = {Software Demonstrations of the 15th Conference of the
European Chapter of the Association for Computational
Linguistics (EACL 2017)},
location       = {Valencia, Spain},
year           = 2017
}
• A. Hoenen, “Beyond the tree – a theoretical model of contamination and a software to generate multilingual stemmata,” in Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome, AIUCD, 2017.
[BibTeX]

@InCollection{Hoenen:2017,
Author         = {Hoenen, Armin},
Title          = {{Beyond the tree – a theoretical model of
contamination and a software to generate multilingual
stemmata}},
BookTitle      = {{Book of Abstracts of the annual conference of the
AIUCD 2017, Sapienza, Rome}},
Publisher      = {AIUCD},
year           = 2017
}
• A. Lücking, “Indexicals as Weak Descriptors,” in Proceedings of the 12th International Conference on Computational Semantics, Montpellier (France), 2017.
[BibTeX]

@InProceedings{Luecking:2017:c,
Author         = {L\"{u}cking, Andy},
Title          = {Indexicals as Weak Descriptors},
BookTitle      = {Proceedings of the 12th International Conference on
Computational Semantics},
Series         = {IWCS 2017},
year           = 2017
}

### 2016 (19)

• S. Eger, A. Hoenen, and A. Mehler, “Language classification from bilingual word embedding graphs,” in Proceedings of COLING 2016, 2016.
[BibTeX]

@InProceedings{Eger:Hoenen:Mehler:2016,
Author         = {Steffen Eger and Armin Hoenen and Alexander Mehler},
Title          = {Language classification from bilingual word embedding
graphs},
BookTitle      = {Proceedings of COLING 2016},
Publisher      = {ACL},
location       = {Osaka},
year           = 2016
}
• W. Hemati, T. Uslu, and A. Mehler, “TextImager: a Distributed UIMA-based System for NLP,” in Proceedings of the COLING 2016 System Demonstrations, 2016.
[BibTeX]

@InProceedings{Hemati:Uslu:Mehler:2016,
Author         = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
Title          = {TextImager: a Distributed UIMA-based System for NLP},
BookTitle      = {Proceedings of the COLING 2016 System Demonstrations},
Organization   = {Federated Conference on Computer Science and
Information Systems},
location       = {Osaka, Japan},
year           = 2016
}
• A. Lücking, “Modeling Co-Verbal Gesture Perception in Type Theory with Records,” in Proceedings of the 2016 Federated Conference on Computer Science and Information Systems, Gdansk, Poland, 2016, pp. 383-392. Best Paper Award
[BibTeX]

@InProceedings{Luecking:2016:b,
Author         = {L\"{u}cking, Andy},
Title          = {Modeling Co-Verbal Gesture Perception in Type Theory
with Records},
BookTitle      = {Proceedings of the 2016 Federated Conference on
Computer Science and Information Systems},
Editor         = {M. Ganzha and L. Maciaszek and M. Paprzycki},
Volume         = {8},
Series         = {Annals of Computer Science and Information Systems},
Pages          = {383-392},
Publisher      = {IEEE},
Note           = {Best Paper Award},
doi            = {10.15439/2016F83},
pdf            = {http://annals-csis.org/Volume_8/pliks/83.pdf},
url            = {http://annals-csis.org/Volume_8/drp/83.html},
year           = 2016
}
• A. Mehler, T. Uslu, and W. Hemati, “Text2voronoi: An Image-driven Approach to Differential Diagnosis,” in Proceedings of the 5th Workshop on Vision and Language (VL’16) hosted by the 54th Annual Meeting of the Association for Computational Linguistics (ACL), Berlin, 2016.
[BibTeX]

@InProceedings{Mehler:Uslu:Hemati:2016,
Author         = {Alexander Mehler and Tolga Uslu and Wahed Hemati},
Title          = {Text2voronoi: An Image-driven Approach to Differential
Diagnosis},
BookTitle      = {Proceedings of the 5th Workshop on Vision and Language
(VL'16) hosted by the 54th Annual Meeting of the
Association for Computational Linguistics (ACL), Berlin},
pdf            = {https://aclweb.org/anthology/W/W16/W16-3212.pdf},
year           = 2016
}
• S. Eger and A. Mehler, “On the linearity of semantic change: Investigating meaning variation via dynamic graph models,” in Proceedings of ACL 2016, 2016.
[BibTeX]

@InProceedings{Eger:Mehler:2016,
Author         = {Steffen Eger and Alexander Mehler},
Title          = {On the linearity of semantic change: {I}nvestigating
meaning variation via dynamic graph models},
BookTitle      = {Proceedings of ACL 2016},
location       = {Berlin},
pdf            = {https://www.aclweb.org/anthology/P/P16/P16-2009.pdf},
year           = 2016
}
• S. Eger, T. vor der Brück, and A. Mehler, “A Comparison of Four Character-Level String-to-String Translation Models for (OCR) Spelling Error Correction,” The Prague Bulletin of Mathematical Linguistics, vol. 105, pp. 77-99, 2016.
[BibTeX]

@Article{Eger:vorDerBrueck:Mehler:2016,
Author         = {Eger, Steffen and vor der Brück, Tim and Mehler,
Alexander},
Title          = {A Comparison of Four Character-Level String-to-String
Translation Models for (OCR) Spelling Error Correction},
Journal        = {The Prague Bulletin of Mathematical Linguistics},
Volume         = {105},
Pages          = {77-99},
doi            = {10.1515/pralin-2016-0004},
pdf            = {https://ufal.mff.cuni.cz/pbml/105/art-eger-vor-der-brueck.pdf},
year           = 2016
}
• A. Hoenen, “Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity in Stemmas,” in Digital Humanities 2016: Conference Abstracts. Jagiellonian University & Pedagogical University, 2016, pp. 557-560.
[Abstract] [BibTeX]

In 1928, the philologue Joseph Bédier explored contemporary stemmas and found them to contain a suspiciously large amount of bifurcations. In this paper, the argument is investigated that, with a large amount of lost manuscripts, the amount of bifurcations in the true stemmas would naturally be high because the probability for siblings to survive becomes very low is assessed via a computer simulation.
@InProceedings{Hoenen:2016DH,
Author         = {Hoenen, Armin},
Title          = {{Silva Portentosissima – Computer-Assisted
Reflections on Bifurcativity in Stemmas}},
BookTitle      = {Digital Humanities 2016: Conference Abstracts.
Jagiellonian University \& Pedagogical University},
Series         = {DH 2016},
Pages          = {557-560},
abstract       = {In 1928, the philologue Joseph Bédier explored
contemporary stemmas and found them to contain a
suspiciously large amount of bifurcations. In this
paper, the argument is investigated that, with a large
amount of lost manuscripts, the amount of bifurcations
in the true stemmas would naturally be high because the
probability for siblings to survive becomes very low is
assessed via a computer simulation.},
location       = {Kraków},
year           = 2016
}
• A. Mehler, B. Wagner, and R. Gleim, “Wikidition: Towards A Multi-layer Network Model of Intertextuality,” in Proceedings of DH 2016, 12-16 July, 2016.
[Abstract] [BibTeX]

The paper presents Wikidition, a novel text mining tool for generating online editions of text corpora. It explores lexical, sentential and textual relations to span multi-layer networks (linkification) that allow for browsing syntagmatic and paradigmatic relations among the constituents of its input texts. In this way, relations of text reuse can be explored together with lexical relations within the same literary memory information system. Beyond that, Wikidition contains a module for automatic lexiconisation to extract author specific vocabularies. Based on linkification and lexiconisation, Wikidition does not only allow for traversing input corpora on different (lexical, sentential and textual) levels. Rather, its readers can also study the vocabulary of authors on several levels of resolution including superlemmas, lemmas, syntactic words and wordforms. We exemplify Wikidition by a range of literary texts and evaluate it by means of the apparatus of quantitative network analysis.
@InProceedings{Mehler:Wagner:Gleim:2016,
Author         = {Mehler, Alexander and Wagner, Benno and Gleim,
R\"{u}diger},
Title          = {Wikidition: Towards A Multi-layer Network Model of
Intertextuality},
BookTitle      = {Proceedings of DH 2016, 12-16 July},
Series         = {DH 2016},
abstract       = {The paper presents Wikidition, a novel text mining
tool for generating online editions of text corpora. It
explores lexical, sentential and textual relations to
span multi-layer networks (linkification) that allow
for browsing syntagmatic and paradigmatic relations
among the constituents of its input texts. In this way,
relations of text reuse can be explored together with
lexical relations within the same literary memory
information system. Beyond that, Wikidition contains a
module for automatic lexiconisation to extract author
specific vocabularies. Based on linkification and
lexiconisation, Wikidition does not only allow for
traversing input corpora on different (lexical,
sentential and textual) levels. Rather, its readers can
also study the vocabulary of authors on several levels
of resolution including superlemmas, lemmas, syntactic
words and wordforms. We exemplify Wikidition by a range
of literary texts and evaluate it by means of the
apparatus of quantitative network analysis.},
location       = {Kraków},
year           = 2016
}
• T. vor der Brück and A. Mehler, “TLT-CRF: A Lexicon-supported Morphological Tagger for Latin Based on Conditional Random Fields,” in Proceedings of the 10th International Conference on Language Resources and Evaluation, 2016.
[BibTeX]

@InProceedings{vorderBrueck:Mehler:2016,
Author         = {vor der Br\"{u}ck, Tim and Mehler, Alexander},
Title          = {{TLT-CRF}: A Lexicon-supported Morphological Tagger
for {Latin} Based on Conditional Random Fields},
BookTitle      = {Proceedings of the 10th International Conference on
Language Resources and Evaluation},
Series         = {LREC 2016},
location       = {{Portoro\v{z} (Slovenia)}},
year           = 2016
}
• S. Eger, R. Gleim, and A. Mehler, “Lemmatization and Morphological Tagging in German and Latin: A comparison and a survey of the state-of-the-art,” in Proceedings of the 10th International Conference on Language Resources and Evaluation, 2016.
[BibTeX]

@InProceedings{Eger:Mehler:Gleim:2016,
Author         = {Eger, Steffen and Gleim, R\"{u}diger and Mehler,
Alexander},
Title          = {Lemmatization and Morphological Tagging in {German}
and {Latin}: A comparison and a survey of the
state-of-the-art},
BookTitle      = {Proceedings of the 10th International Conference on
Language Resources and Evaluation},
Series         = {LREC 2016},
location       = {Portoro\v{z} (Slovenia)},
year           = 2016
}
• A. Lücking, A. Mehler, D. Walther, M. Mauri, and D. Kurfürst, “Finding Recurrent Features of Image Schema Gestures: the FIGURE corpus,” in Proceedings of the 10th International Conference on Language Resources and Evaluation, 2016.
[BibTeX]

@InProceedings{Luecking:Mehler:Walther:Mauri:Kurfuerst:2016,
Author         = {L\"{u}cking, Andy and Mehler, Alexander and Walther,
D\'{e}sir\'{e}e and Mauri, Marcel and Kurf\"{u}rst,
Dennis},
Title          = {Finding Recurrent Features of Image Schema Gestures:
the {FIGURE} corpus},
BookTitle      = {Proceedings of the 10th International Conference on
Language Resources and Evaluation},
Series         = {LREC 2016},
location       = {Portoro\v{z} (Slovenia)},
year           = 2016
}
• A. Lücking, A. Hoenen, and A. Mehler, “TGermaCorp — A (Digital) Humanities Resource for (Computational) Linguistics,” in Proceedings of the 10th International Conference on Language Resources and Evaluation, 2016.
[BibTeX]

@InProceedings{Luecking:Hoenen:Mehler:2016,
Author         = {L\"{u}cking, Andy and Hoenen, Armin and Mehler,
Alexander},
Title          = {{TGermaCorp} -- A (Digital) Humanities Resource for
(Computational) Linguistics},
BookTitle      = {Proceedings of the 10th International Conference on
Language Resources and Evaluation},
Series         = {LREC 2016},
islrn          = {536-382-801-278-5},
location       = {Portoro\v{z} (Slovenia)},
year           = 2016
}
• B. Wagner, A. Mehler, and H. Biber, “Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische Erschließung und digitale Visualisierung intertextueller Beziehungen digitaler Korpora,” in DHd 2016, 2016.
[BibTeX]

@InProceedings{Wagner:Mehler:Biber:2016,
Author         = {Wagner, Benno and Mehler, Alexander and Biber, Hanno},
Title          = {{Transbiblionome Daten in der Literaturwissenschaft.
Texttechnologische Erschließung und digitale
Visualisierung intertextueller Beziehungen digitaler
Korpora}},
BookTitle      = {DHd 2016},
url            = {http://www.dhd2016.de/abstracts/sektionen-005.html#index.xml-body.1_div.4},
year           = 2016
}
• A. Mehler, R. Gleim, T. vor der Brück, W. Hemati, T. Uslu, and S. Eger, “Wikidition: Automatic Lexiconization and Linkiﬁcation of Text Corpora,” Information Technology, vol. 58, pp. 70-79, 2016.
[Abstract] [BibTeX]

We introduce a new text technology, called Wikidition, which automatically generates large scale editions of corpora of natural language texts. Wikidition combines a wide range of text mining tools for automatically linking lexical, sentential and textual units. This includes the extraction of corpus-specific lexica down to the level of syntactic words and their grammatical categories. To this end, we introduce a novel measure of text reuse and exemplify Wikidition by means of the capitularies, that is, a corpus of Medieval Latin texts.
@Article{Mehler:et:al:2016,
Author         = {Alexander Mehler and Rüdiger Gleim and Tim vor der
Brück and Wahed Hemati and Tolga Uslu and Steffen Eger},
Title          = {Wikidition: Automatic Lexiconization and
Journal        = {Information Technology},
Volume   = {58},
Pages          = {70-79},
abstract       = {We introduce a new text technology, called Wikidition,
which automatically generates large scale editions of
corpora of natural language texts. Wikidition combines
a wide range of text mining tools for automatically
linking lexical, sentential and textual units. This
includes the extraction of corpus-specific lexica down
to the level of syntactic words and their grammatical
categories. To this end, we introduce a novel measure
of text reuse and exemplify Wikidition by means of the
capitularies, that is, a corpus of Medieval Latin
texts.},
doi            = {10.1515/itit-2015-0035},
year           = 2016
}
• A. Hoenen, “Wikipedia Titles As Noun Tag Predictors,” in Proceedings of the 10th International Conference on Language Resources and Evaluation, 2016.
[BibTeX]

@InProceedings{Hoenen:2016x,
Author         = {Hoenen, Armin},
Title          = {{Wikipedia Titles As Noun Tag Predictors}},
BookTitle      = {Proceedings of the 10th International Conference on
Language Resources and Evaluation},
Series         = {LREC 2016},
location       = {Portoro\v{z} (Slovenia)},
pdf            = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/18_Paper.pdf},
year           = 2016
}
• A. Hoenen, “Das erste dynamische Stemma, Pionier des digitalen Zeitalters?,” in Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum, 2016.
[BibTeX]

@InProceedings{Hoenen:2016y,
Author         = {Hoenen, Armin},
Title          = {Das erste dynamische Stemma, Pionier des digitalen
Zeitalters?},
BookTitle      = {Accepted in the Proceedings of the Jahrestagung der
Digital Humanities im deutschsprachigen Raum},
url            = {http://www.dhd2016.de/abstracts/posters-060.html},
year           = 2016
}
• “Corpora and Resources for (Historical) Low Resource Languages,” , vol. 31, iss. 2, 2016.
[BibTeX]

@collection{GSCL:JLCL:2016:2,
bibsource      = {GSCL, http://www.gscl.info/},
editor         = {Armin Hoenen and Alexander Mehler and Jost Gippert},
issn           = {2190-6858},
number         = {2},
pdf            = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
publisher      = {JLCL},
title          = {{Corpora and Resources for (Historical) Low Resource
Languages}},
volume         = {31},
year           = 2016
}
• A. Hoenen, A. Mehler, and J. Gippert, “Editorial,” JLCL, vol. 31, iss. 2, p. iii–iv, 2016.
[BibTeX]

@Article{Hoenen:Mehler:Gippert:2016,
Author         = {Armin Hoenen and Alexander Mehler and Jost Gippert},
Title          = {{Editorial}},
Journal        = {JLCL},
Volume         = {31},
Number         = {2},
Pages          = {iii--iv},
pdf            = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
year           = 2016
}
• A. Hoenen and L. Samushia, “Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch for Aiding Reconstruction,” JLCL, vol. 31, iss. 2, pp. 25-38, 2016.
[BibTeX]

@Article{Hoenen:Samushia:2016,
Author         = {Armin Hoenen and Lela Samushia},
Title          = {{Gepi: An Epigraphic Corpus for Old Georgian and a
Tool Sketch for Aiding Reconstruction}},
Journal        = {JLCL},
Volume         = {31},
Number         = {2},
Pages          = {25--38},
year           = 2016
}

### 2015 (25)

• A. Hoenen and F. Mader, “A New LMF Schema Application by Example of an Austrian Lexicon Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal,” in Historical Corpora, 2015.
[BibTeX]

@InProceedings{Hoenen:Mader:2015,
Author         = {Hoenen, Armin and Mader, Franziska},
Title          = {A New LMF Schema Application by Example of an Austrian
Lexicon Applied to the Historical Corpus of the Writer
Hugo von Hofmannsthal},
BookTitle      = {Historical Corpora},
website        = {http://www.narr-shop.de/historical-corpora.html},
year           = 2015
}
• Text Mining: From Ontology Learning to Automated Text Processing Applications. Festschrift in Honor of Gerhard Heyer, C. Biemann and A. Mehler, Eds., Heidelberg: Springer, 2015.
[BibTeX]

@Book{Biemann:Mehler:2015,
Editor         = {Biemann, Chris and Mehler, Alexander},
Title          = {{Text Mining: From Ontology Learning to Automated Text
Processing Applications. Festschrift in Honor of
Gerhard Heyer}},
Publisher      = {Springer},
Series         = {Theory and Applications of Natural Language Processing},
year           = 2015
}
• M. Z. Islam, “Multilingual text classification using information-theoretic features,” PhD Thesis, 2015.
[Abstract] [BibTeX]

The number of multilingual texts in the World Wide Web (WWW) is increasing dramatically and a multilingual economic zone like the European Union (EU) requires the availability of multilingual Natural Language Processing (NLP) tools. Due to a rapid development of NLP tools, many lexical, syntactic, semantic and other linguistic features have been used in different NLP applications. However, there are some situations where these features can not be used due the application type or unavailability of NLP resources for some of the languages. That is why an application that is intended to handle multilingual texts must have features that are not dependent on a particular language and specific linguistic tools. In this thesis, we will focus on two such applications: text readability and source and translation classification. In this thesis, we provide 18 features that are not only suitable for both applications, but are also language and linguistic tools independent. In order to build a readability classifier, we use texts from three different languages: English, German and Bangla. Our proposed features achieve a classification accuracy that is comparable with a classifier using 40 linguistic features. The readability classifier achieves a classification F-score of 74.21% on the English Wikipedia corpus, an F-score of 75.47% on the English textbook corpus, an F-score of 86.46% on the Bangla textbook corpus and an F-score of 86.26% on the German GEO/GEOLino corpus. We used more than two million sentence pairs from 21 European languages in order to build the source and translation classifier. The classifier using the same eighteen features achieves a classification accuracy of 86.63%. We also used the same features to build a classifier that classifies translated texts based on their origin. The classifier achieves classification accuracy of 75% for texts from 10 European languages. In this thesis, we also provide four different corpora, three for text readability analysis and one for corpus based translation studies.
@phdthesis{Islam:2015,
title       = {Multilingual text classification using information-theoretic features},
pages       = {189},
year        = {2015},
pdf         = {http://publikationen.ub.uni-frankfurt.de/files/38157/thesis.pdf},
abstract    = {The number of multilingual texts in the World Wide Web (WWW) is increasing dramatically and a multilingual economic zone like the European Union (EU) requires the availability of multilingual Natural Language Processing (NLP) tools. Due to a rapid development of NLP tools, many lexical, syntactic, semantic and other linguistic features have been used in different NLP applications. However, there are some situations where these features can not be used due the application type or unavailability of NLP resources for some of the languages. That is why an application that is intended to handle multilingual texts must have features that are not dependent on a particular language and specific linguistic tools. In this thesis, we will focus on two such applications: text readability and source and translation classification.

In this thesis, we provide 18 features that are not only suitable for both applications, but are also language and linguistic tools independent. In order to build a readability classifier, we use texts from three different languages: English, German and Bangla. Our proposed features achieve a classification accuracy that is comparable with a classifier using 40 linguistic features. The readability classifier achieves a classification F-score of 74.21% on the English Wikipedia corpus, an F-score of 75.47% on the English textbook corpus, an F-score of 86.46% on the Bangla textbook corpus and an F-score of 86.26% on the German GEO/GEOLino corpus.

We used more than two million sentence pairs from 21 European languages in order to build the source and translation classifier. The classifier using the same eighteen features achieves a classification accuracy of 86.63%. We also used the same features to build a classifier that classifies translated texts based on their origin. The classifier achieves classification accuracy of 75% for texts from 10 European languages. In this thesis, we also provide four different corpora, three for text readability analysis and one for corpus based translation studies.}
}
• N. Dundua, A. Hoenen, and L. Samushia, “A Parallel Corpus of the Old Georgian Gospel Manuscripts and their Stemmatology,” The Georgian Journal for Language Logic Computation, vol. IV, pp. 176-185, 2015.
[BibTeX]

@Article{Dundua:Hoenen:Samushia:2015,
Author         = {Dundua, Natia and Hoenen, Armin and Samushia, Lela},
Title          = {{A Parallel Corpus of the Old Georgian Gospel
Manuscripts and their Stemmatology}},
Journal        = {The Georgian Journal for Language Logic Computation},
Volume         = {IV},
Pages          = {176-185},
publisher      = {CLLS, Tbilisi State University and Kurt G{\"o}del
Society},
year           = 2015
}
• T. vor der Brück, S. Eger, and A. Mehler, “Complex Decomposition of the Negative Distance Kernel,” in IEEE International Conference on Machine Learning and Applications, 2015.
[BibTeX]

@InProceedings{vor:der:Bruck:Eger:Mehler:2015,
Author         = {vor der Br{\"u}ck, Tim and Eger, Steffen and Mehler,
Alexander},
Title          = {Complex Decomposition of the Negative Distance Kernel},
BookTitle      = {IEEE International Conference on Machine Learning and
Applications},
location       = {Miami, Florida, USA},
year           = 2015
}
• S. Eger, “Do we need bigram alignment models? On the effect of alignment quality on transduction accuracy in G2P,” in Proceedings of EMNLP, 2015.
[BibTeX]

@InProceedings{Eger:2015_EMNLP,
Author         = {Eger, Steffen},
Title          = {Do we need bigram alignment models? On the effect of
alignment quality on transduction accuracy in G2P},
BookTitle      = {Proceedings of EMNLP},
year           = 2015,
pdf     = {https://www.aclweb.org/anthology/D15-1139}
}
• T. vor der Brück and S. Eger, “Deriving a primal form for the quadratic power kernel,” in Proceedings of the 38th German Conference on Artificial Intelligence (KI), 2015.
[BibTeX]

@InProceedings{vorDerBrueck:Eger:2015,
Author         = {vor der Brück, Tim and Eger, Steffen},
Title          = {Deriving a primal form for the quadratic power kernel},
BookTitle      = {Proceedings of the 38th German Conference on
Artificial Intelligence ({KI})},
year           = 2015
}
• S. Eger, “Improving G2P from Wiktionary and other (web) resources,” in Proceedings of Interspeech, 2015.
[BibTeX]

@InProceedings{Eger:2015_Interspeech,
Author         = {Eger, Steffen},
Title          = {Improving G2P from Wiktionary and other (web)
resources},
BookTitle      = {Proceedings of Interspeech},
pdf     = {https://pdfs.semanticscholar.org/bba8/30015d9cbfc40b975c25d0ec186280da6ab0.pdf},
year           = 2015
}
• S. Eger, T. vor der Brück, and A. Mehler, “Lexicon-assisted tagging and lemmatization in Latin: A comparison of six taggers and two lemmatization methods,” in Proceedings of the 9th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH 2015), Beijing, China, 2015.
[BibTeX]

@InProceedings{Eger:vor:der:Brueck:Mehler:2015,
Author         = {Eger, Steffen and vor der Brück, Tim and Mehler,
Alexander},
Title          = {Lexicon-assisted tagging and lemmatization in {Latin}:
A comparison of six taggers and two lemmatization
methods},
BookTitle      = {Proceedings of the 9th Workshop on Language Technology
for Cultural Heritage, Social Sciences, and Humanities
({LaTeCH 2015})},
year           = 2015
}
• Towards a Theoretical Framework for Analyzing Complex Linguistic Networks, A. Mehler, A. Lücking, S. Banisch, P. Blanchard, and B. Frank-Job, Eds., Springer, 2015.
[BibTeX]

@Book{Mehler:Luecking:Banisch:Blanchard:Frank-Job:2015,
Editor         = {Mehler, Alexander and Lücking, Andy and Banisch, Sven
and Blanchard, Philippe and Frank-Job, Barbara},
Title          = {Towards a Theoretical Framework for Analyzing Complex
Linguistic Networks},
Publisher      = {Springer},
Series         = {Understanding Complex Systems},
adress         = {Berlin and New York},
isbn           = {978-36-662-47237-8},
year           = 2015
}
• A. Mehler and R. Gleim, “Linguistic Networks — An Online Platform for Deriving Collocation Networks from Natural Language Texts,” in Towards a Theoretical Framework for Analyzing Complex Linguistic Networks, A. Mehler, A. Lücking, S. Banisch, P. Blanchard, and B. Frank-Job, Eds., Springer, 2015.
[BibTeX]

@InCollection{Mehler:Gleim:2015:a,
Author         = {Mehler, Alexander and Gleim, Rüdiger},
Title          = {Linguistic Networks -- An Online Platform for Deriving
Collocation Networks from Natural Language Texts},
BookTitle      = {Towards a Theoretical Framework for Analyzing Complex
Linguistic Networks},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Lücking, Andy and Banisch, Sven
and Blanchard, Philippe and Frank-Job, Barbara},
Series         = {Understanding Complex Systems},
year           = 2015
}
• S. Eger, “Multiple Many-To-Many Sequence Alignment For Combining String-Valued Variables: A G2P Experiment,” in ACL, 2015.
[BibTeX]

@InProceedings{Eger:2015_ACL,
Author         = {Eger, Steffen},
Title          = {Multiple Many-To-Many Sequence Alignment For Combining
String-Valued Variables: A G2P Experiment},
BookTitle      = {ACL},
Publisher      = {Association for Computational Linguistics},
year           = 2015
}
• S. Eger, “Designing and comparing G2P-type lemmatizers for a morphology-rich language.” 2015.
[BibTeX]

@InProceedings{Eger:2015_SFCM,
Author         = {Eger, Steffen},
Title          = {Designing and comparing G2P-type lemmatizers for a
morphology-rich language},
Publisher      = {Fourth International Workshop on Systems and
Frameworks for Computational Morphology},
year           = 2015
}
• S. Eger, N. Schenk, and A. Mehler, “Towards Semantic Language Classification: Inducing and Clustering Semantic Association Networks from Europarl,” in Proceedings of the Fourth Joint Conference on Lexical and Computational Semantics, 2015, pp. 127-136.
[BibTeX]

@InProceedings{Eger:Schenk:Mehler:2015,
Author         = {Eger, Steffen and Schenk, Niko and Mehler, Alexander},
Title          = {Towards Semantic Language Classification: Inducing and
Clustering Semantic Association Networks from Europarl},
BookTitle      = {Proceedings of the Fourth Joint Conference on Lexical
and Computational Semantics},
Pages          = {127--136},
Publisher      = {Association for Computational Linguistics},
month          = {June},
url            = {http://www.aclweb.org/anthology/S15-1014},
year           = 2015
}
• S. Eger, “Identities for Partial Bell Polynomials Derived from Identities for Weighted Integer Compositions.,” Aequationes Mathematicae, 2015.
[BibTeX]

@Article{Eger:2015b,
Author         = {Eger, Steffen},
Title          = {Identities for Partial Bell Polynomials Derived from
Identities for Weighted Integer Compositions.},
Journal        = {Aequationes Mathematicae},
doi            = {10.1007/s00010-015-0338-2},
year           = 2015
}
• S. Eger, “Some Elementary Congruences for the Number of Weighted Integer Compositions.,” Journal of Integer Sequences (electronic only), vol. 18, iss. 4, 2015.
[BibTeX]

@Article{Eger:2015a,
Author         = {Eger, Steffen},
Title          = {Some Elementary Congruences for the Number of Weighted
Integer Compositions.},
Journal        = {Journal of Integer Sequences (electronic only)},
Volume         = {18},
Number         = {4},
pdf            = {https://cs.uwaterloo.ca/journals/JIS/VOL18/Eger/eger11.pdf},
publisher      = {School of Computer Science, University of Waterloo,
Waterloo, ON},
year           = 2015
}
• A. Lücking, T. Pfeiffer, and H. Rieser, “Pointing and Reference Reconsidered,” Journal of Pragmatics, vol. 77, pp. 56-79, 2015.
[Abstract] [BibTeX]

Current semantic theory on indexical expressions claims that demonstratively used indexicals such as this lack a referent-determining meaning but instead rely on an accompanying demonstration act like a pointing gesture. While this view allows to set up a sound logic of demonstratives, the direct-referential role assigned to pointing gestures has never been scrutinized thoroughly in semantics or pragmatics. We investigate the semantics and pragmatics of co-verbal pointing from a foundational perspective combining experiments, statistical investigation, computer simulation and theoretical modeling techniques in a novel manner. We evaluate various referential hypotheses with a corpus of object identification games set up in experiments in which body movement tracking techniques have been extensively used to generate precise pointing measurements. Statistical investigation and computer simulations show that especially distal areas in the pointing domain falsify the semantic direct-referential hypotheses concerning pointing gestures. As an alternative, we propose that reference involving pointing rests on a default inference which we specify using the empirical data. These results raise numerous problems for classical semantics–pragmatics interfaces: we argue for pre-semantic pragmatics in order to account for inferential reference in addition to classical post-semantic Gricean pragmatics.
@Article{Luecking:Pfeiffer:Rieser:2015,
Author         = {Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes},
Title          = {Pointing and Reference Reconsidered},
Journal        = {Journal of Pragmatics},
Volume         = {77},
Pages          = {56-79},
abstract       = {Current semantic theory on indexical expressions
claims that demonstratively used indexicals such as
this lack a referent-determining meaning but instead
rely on an accompanying demonstration act like a
pointing gesture. While this view allows to set up a
sound logic of demonstratives, the direct-referential
role assigned to pointing gestures has never been
scrutinized thoroughly in semantics or pragmatics. We
investigate the semantics and pragmatics of co-verbal
pointing from a foundational perspective combining
experiments, statistical investigation, computer
simulation and theoretical modeling techniques in a
novel manner. We evaluate various referential
hypotheses with a corpus of object identification games
set up in experiments in which body movement tracking
techniques have been extensively used to generate
precise pointing measurements. Statistical
investigation and computer simulations show that
especially distal areas in the pointing domain falsify
the semantic direct-referential hypotheses concerning
pointing gestures. As an alternative, we propose that
reference involving pointing rests on a default
inference which we specify using the empirical data.
These results raise numerous problems for classical
semantics–pragmatics interfaces: we argue for
pre-semantic pragmatics in order to account for
inferential reference in addition to classical
post-semantic Gricean pragmatics.},
doi            = {10.1016/j.pragma.2014.12.013},
website        = {http://www.sciencedirect.com/science/article/pii/S037821661500003X},
year           = 2015
}
• A. Mehler, T. vor der Brück, R. Gleim, and T. Geelhaar, “Towards a Network Model of the Coreness of Texts: An Experiment in Classifying Latin Texts using the TTLab Latin Tagger,” in Text Mining: From Ontology Learning to Automated text Processing Applications, C. Biemann and A. Mehler, Eds., Berlin/New York: Springer, 2015, pp. 87-112.
[Abstract] [BibTeX]

The analysis of longitudinal corpora of historical texts requires the integrated development of tools for automatically preprocessing these texts and for building representation models of their genre- and register-related dynamics. In this chapter we present such a joint endeavor that ranges from resource formation via preprocessing to network-based text representation and classification. We start with presenting the so-called TTLab Latin Tagger (TLT) that preprocesses texts of classical and medieval Latin. Its lexical resource in the form of the Frankfurt Latin Lexicon (FLL) is also briefly introduced. As a first test case for showing the expressiveness of these resources, we perform a tripartite classification task of authorship attribution, genre detection and a combination thereof. To this end, we introduce a novel text representation model that explores the core structure (the so-called coreness) of lexical network representations of texts. Our experiment shows the expressiveness of this representation format and mediately of our Latin preprocessor.
@InCollection{Mehler:Brueck:Gleim:Geelhaar:2015,
Author         = {Mehler, Alexander and vor der Brück, Tim and Gleim,
Rüdiger and Geelhaar, Tim},
Title          = {Towards a Network Model of the Coreness of Texts: An
Experiment in Classifying Latin Texts using the TTLab
Latin Tagger},
BookTitle      = {Text Mining: From Ontology Learning to Automated text
Processing Applications},
Publisher      = {Springer},
Editor         = {Chris Biemann and Alexander Mehler},
Series         = {Theory and Applications of Natural Language Processing},
Pages          = {87-112},
abstract       = {The analysis of longitudinal corpora of historical
texts requires the integrated development of tools for
automatically preprocessing these texts and for
building representation models of their genre- and
register-related dynamics. In this chapter we present
such a joint endeavor that ranges from resource
formation via preprocessing to network-based text
presenting the so-called TTLab Latin Tagger (TLT) that
preprocesses texts of classical and medieval Latin. Its
lexical resource in the form of the Frankfurt Latin
Lexicon (FLL) is also briefly introduced. As a first
test case for showing the expressiveness of these
resources, we perform a tripartite classification task
of authorship attribution, genre detection and a
combination thereof. To this end, we introduce a novel
text representation model that explores the core
structure (the so-called coreness) of lexical network
representations of texts. Our experiment shows the
expressiveness of this representation format and
mediately of our Latin preprocessor.},
year           = 2015
}
• A. Hoenen, “Das artifizielle Manuskriptkorpus TASCFE,” in Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum, 2015.
[BibTeX]

@InProceedings{Hoenen:2015,
Author         = {Hoenen, Armin},
Title          = {Das artifizielle Manuskriptkorpus TASCFE},
BookTitle      = {Accepted in the Proceedings of the Jahrestagung der
Digital Humanities im deutschsprachigen Raum},
year           = 2015
}
• R. Gleim and A. Mehler, “TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung von Texten und deren Evaluation,” in Accepted in the Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum, 2015.
[BibTeX]

@InProceedings{Gleim:Mehler:2015,
Author         = {Gleim, Rüdiger and Mehler, Alexander},
Title          = {TTLab Preprocessor – Eine generische Web-Anwendung
für die Vorverarbeitung von Texten und deren
Evaluation},
BookTitle      = {Accepted in the Proceedings of the Jahrestagung der
Digital Humanities im deutschsprachigen Raum},
year           = 2015
}
• G. Abrami, A. Mehler, and S. Zeunert, “Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator,” in Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum, 2015.
[BibTeX]

@InProceedings{Abrami:Mehler:Zeunert:2015:a,
Author         = {Abrami, Giuseppe and Mehler, Alexander and Zeunert,
Susanne},
Title          = {Ontologiegestütze geisteswissenschaftliche
Annotationen mit dem OWLnotator},
BookTitle      = {Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum},
year           = 2015
}
• G. Abrami, A. Mehler, and D. Pravida, “Fusing Text and Image Data with the Help of the OWLnotator,” in Human Interface and the Management of Information. Information and Knowledge Design, S. Yamamoto, Ed., Springer International Publishing, 2015, vol. 9172, pp. 261-272.
[BibTeX]

@InCollection{Abrami:Mehler:Pravida:2015:b,
Author         = {Abrami, Giuseppe and Mehler, Alexander and Pravida,
Dietmar},
Title          = {Fusing Text and Image Data with the Help of the
OWLnotator},
BookTitle      = {Human Interface and the Management of Information.
Information and Knowledge Design},
Publisher      = {Springer International Publishing},
Editor         = {Yamamoto, Sakae},
Volume         = {9172},
Series         = {Lecture Notes in Computer Science},
Pages          = {261-272},
doi            = {10.1007/978-3-319-20612-7_25},
isbn           = {978-3-319-20611-0},
language       = {English},
website        = {http://dx.doi.org/10.1007/978-3-319-20612-7_25},
year           = 2015
}
• A. Hoenen, “Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora,” in Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL HLT), 2015. Citation: Trovato is published in 2014 not in 2009.
[Abstract] [BibTeX]

Two goals are targeted by computer philology for ancient manuscript corpora: firstly, making an edition, that is roughly speaking one text version representing the whole corpus, which contains variety induced through copy errors and other processes and secondly, producing a stemma. A stemma is a graph-based visualization of the copy history with manuscripts as nodes and copy events as edges. Its root, the so-called archetype is the supposed original text or urtext from which all subsequent copies are made. Our main contribution is to present one of the first computational approaches to automatic archetype reconstruction and to introduce the first text-based evaluation for automatically produced archetypes. We compare a philologically generated archetype with one generated by bio-informatic software.
@InProceedings{Hoenen:2015a,
Author         = {Hoenen, Armin},
Title          = {Lachmannian Archetype Reconstruction for Ancient
Manuscript Corpora},
BookTitle      = {Proceedings of the 2015 Conference of the North
American Chapter of the Association for Computational
Linguistics: Human Language Technologies (NAACL HLT)},
Note           = {Citation: Trovato is published in 2014 not in 2009.},
abstract       = {Two goals are targeted by computer philology for
ancient manuscript corpora: firstly, making an edition,
that is roughly speaking one text version representing
the whole corpus, which contains variety induced
through copy errors and other processes and secondly,
producing a stemma. A stemma is a graph-based
visualization of the copy history with manuscripts as
nodes and copy events as edges. Its root, the so-called
archetype is the supposed original text or urtext from
which all subsequent copies are made. Our main
contribution is to present one of the first
computational approaches to automatic archetype
reconstruction and to introduce the first text-based
evaluation for automatically produced archetypes. We
compare a philologically generated archetype with one
generated by bio-informatic software.},
website        = {http://www.aclweb.org/anthology/N15-1127},
year           = 2015
}
• A. Hoenen, “Simulating Misreading,” in Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB), 2015.
[Abstract] [BibTeX]

Physical misreading (as opposed to interpretational misreading) is an unnoticed substitution in silent reading. Especially for legally important documents or instruction manuals, this can lead to serious consequences. We present a prototype of an automatic highlighter targeting words which can most easily be misread in a given text using a dynamic orthographic neighbour concept. We propose measures of fit of a misread token based on Natural Language Processing and detect a list of short most easily misread tokens in the English language. We design a highlighting scheme for avoidance of misreading.
@InProceedings{Hoenen:2015b,
Author         = {Hoenen, Armin},
BookTitle      = {Proceedings of the 20TH INTERNATIONAL CONFERENCE ON
APPLICATIONS OF NATURAL LANGUAGE TO INFORMATION SYSTEMS
(NLDB)},
abstract       = {Physical misreading (as opposed to interpretational
misreading) is an unnoticed substitution in silent
reading. Especially for legally important documents or
instruction manuals, this can lead to serious
consequences. We present a prototype of an automatic
highlighter targeting words which can most easily be
misread in a given text using a dynamic orthographic
neighbour concept. We propose measures of fit of a
misread token based on Natural Language Processing and
detect a list of short most easily misread tokens in
the English language. We design a highlighting scheme
year           = 2015
}
• G. Abrami, M. Freiberg, and P. Warner, “Managing and Annotating Historical Multimodal Corpora with the eHumanities Desktop – An outline of the current state of the LOEWE project Illustrations of Goethe s Faust,” in Historical Corpora, 2015, pp. 353-363.
[Abstract] [BibTeX]

Text corpora are structured sets of text segments that can be annotated or interrelated. Expanding on this, we can define a database of images as an iconographic multimodal corpus with annotated images and the relations between images as well as between images and texts. The Goethe-Museum in Frankfurt holds a significant collection of art work and texts relating to Goethe’s Faust from the early 19th century until the present. In this project we create a database containing digitized items from this collection, and extend a tool, the ImageDB in the eHumanities Desktop, to annotate and provide relations between resources. This article gives an overview of the project and provides some technical details. Furthermore we show newly implemented features, explain the challenge of creating an ontology on multimodal corpora and give a forecast for future work.
@InProceedings{Abrami:Freiberg:Warner:2015,
Author         = {Abrami, Giuseppe and Freiberg, Michael and Warner,
Paul},
Title          = {Managing and Annotating Historical Multimodal Corpora
with the eHumanities Desktop - An outline of the
current state of the LOEWE project Illustrations of
Goethe s Faust},
BookTitle      = {Historical Corpora},
Pages          = {353 - 363},
abstract       = {Text corpora are structured sets of text segments that
can be annotated or interrelated. Expanding on this, we
can define a database of images as an iconographic
multimodal corpus with annotated images and the
relations between images as well as between images and
texts. The Goethe-Museum in Frankfurt holds a
significant collection of art work and texts relating
to Goethe’s Faust from the early 19th century until
the present. In this project we create a database
containing digitized items from this collection, and
extend a tool, the ImageDB in the eHumanities Desktop,
to annotate and provide relations between resources.
provides some technical details. Furthermore we show
newly implemented features, explain the challenge of
creating an ontology on multimodal corpora and give a
forecast for future work.},
website        = {http://www.narr-shop.de/historical-corpora.html},
year           = 2015
}

### 2014 (13)

• A. Hoenen, “Stemmatology, an interdisciplinary endeavour,” in Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities, DHd, 2014.
[BibTeX]

@InCollection{Hoenen:2014plz,
Author         = {Hoenen, Armin},
Title          = {{Stemmatology, an interdisciplinary endeavour}},
BookTitle      = {{Book of Abstracts zum DHd Workshop Informatik und die
Digital Humanities}},
Publisher      = {DHd},
url            = {http://dhd-wp.hab.de/files/book_of_abstracts.pdf},
year           = 2014
}
• X. Chen, “Language as a whole — A new framework for linguistic knowledge integration: Comment on “Approaching human language with complex networks” by Cong and Liu,” Physics of Life Reviews, vol. 11, iss. 4, pp. 628-629, 2014.
[BibTeX]

@Article{Chen:2014:a,
Author         = {Chen, Xinying},
Title          = {Language as a whole -- A new framework for linguistic
knowledge integration: Comment on "Approaching human
language with complex networks" by {Cong} and {Liu}},
Journal        = {Physics of Life Reviews},
Volume         = {11},
Number         = {4},
Pages          = {628-629},
doi            = {10.1016/j.plrev.2014.07.011},
url            = {http://www.sciencedirect.com/science/article/pii/S1571064514001249},
year           = 2014
}
• T. Gong, Y. W. Lam, X. Chen, and M. Zhang, “Review: Evolutionary Linguistics in the Past Two Decades — EVOLANG10: the 10th International Conference on Language Evolution,” Journal of Chinese Linguistics, vol. 42, iss. 2, pp. 499-530, 2014.
[BibTeX]

@Article{Gong:Lam:Chen:Zhang:2014,
Author         = {Gong, Tao and Lam, Yau Wai and Chen, Xinying and
Zhang, Menghan},
Title          = {Review: Evolutionary Linguistics in the Past Two
Decades -- EVOLANG10: the 10th International Conference
on Language Evolution},
Journal        = {Journal of Chinese Linguistics},
Volume         = {42},
Number         = {2},
Pages          = {499-530},
year           = 2014
}
• G. Abrami, A. Mehler, D. Pravida, and S. Zeunert, “Rubrik: Neues aus dem Netz,” Kunstchronik, vol. 12, p. 623, 2014.
[BibTeX]

@Article{Abrami:Mehler:Pravida:Zeunert:2014,
Author         = {Abrami, Giuseppe and Mehler, Alexander and Pravida,
Dietmar and Zeunert, Susanne},
Title          = {Rubrik: Neues aus dem Netz},
Journal        = {Kunstchronik},
Volume         = {12},
Pages          = {623},
month          = {12},
publisher      = {Zentralinstitut für Kunstgeschichte},
website        = {http://www.zikg.eu/publikationen/laufende-publikationen/kunstchronik},
year           = 2014
}
• S. Eger, “A proof of the Mann-Shanks primality criterion conjecture for extended binomial coefficients,” Integers: The Electronic Journal of Combinatorial Number Theory, vol. 14, 2014.
[Abstract] [BibTeX]

We show that the Mann-Shanks primality criterion holds for weighted extended binomial coefficients (which count the number of weighted integer compositions), not only for the ordinary binomial coefficients.
@Article{Eger:2014:a,
Author         = {Eger, Steffen},
Title          = {A proof of the Mann-Shanks primality criterion
conjecture for extended binomial coefficients},
Journal        = {Integers: The Electronic Journal of Combinatorial
Number Theory},
Volume         = {14},
abstract       = {We show that the Mann-Shanks primality criterion holds
for weighted extended binomial coefficients (which
count the number of weighted integer compositions), not
only for the ordinary binomial coefficients.},
pdf            = {http://www.emis.de/journals/INTEGERS/papers/o60/o60.pdf},
website        = {http://www.emis.de/journals/INTEGERS/vol14.html},
year           = 2014
}
• S. Eger, “Stirling’s approximation for central extended binomial coefficients.,” The American Mathematical Monthly, vol. 121, iss. 4, pp. 344-349, 2014.
[Abstract] [BibTeX]

We derive asymptotic formulas for central extended binomial coefficients, which are generalizations of binomial coefficients, using the distribution of the sum of independent discrete uniform random variables with the Central Limit Theorem and a local limit variant.
@Article{Eger:2014:b,
Author         = {Eger, Steffen},
Title          = {Stirling's approximation for central extended binomial
coefficients.},
Journal        = {The American Mathematical Monthly},
Volume         = {121},
Number         = {4},
Pages          = {344-349},
abstract       = {We derive asymptotic formulas for central extended
binomial coefficients, which are generalizations of
binomial coefficients, using the distribution of the
sum of independent discrete uniform random variables
with the Central Limit Theorem and a local limit
variant.},
website        = {http://www.jstor.org/stable/10.4169/amer.math.monthly.121.04.344},
year           = 2014
}
• A. Mehler, “On the Expressiveness, Validity and Reproducibility of Models of Language Evolution. Comment on ‘Modelling language evolution: Examples and predictions’ by Tao Gong, Shuai Lan, and Menghan Zhang,” Physics of Life Review, 2014.
[BibTeX]

@Article{Mehler:2014,
Author         = {Mehler, Alexander},
Title          = {On the Expressiveness, Validity and Reproducibility of
Models of Language Evolution. Comment on 'Modelling
language evolution: Examples and predictions' by Tao
Gong, Shuai Lan, and Menghan Zhang},
Journal        = {Physics of Life Review},
abstract       = {},
website        = {https://www.researchgate.net/publication/261290946_On_the_expressiveness_validity_and_reproducibility_of_models_of_language_evolution_Comment_on_Modelling_language_evolution_Examples_and_predictions_by_Tao_Gong_Shuai_Lan_and_Menghan_Zhang},
year           = 2014
}
• C. Biemann, G. R. Crane, C. D. Fellbaum, and A. Mehler, “Computational Humanities – bridging the gap between Computer Science and Digital Humanities (Dagstuhl Seminar 14301),” Dagstuhl Reports, vol. 4, iss. 7, pp. 80-111, 2014.
[Abstract] [BibTeX]

Research in the field of Digital Humanities, also known as Humanities Computing, has seen a steady increase over the past years. Situated at the intersection of computing science and the humanities, present efforts focus on making resources such as texts, images, musical pieces and other semiotic artifacts digitally available, searchable and analysable. To this end, computational tools enabling textual search, visual analytics, data mining, statistics and natural language processing are harnessed to support the humanities researcher. The processing of large data sets with appropriate software opens up novel and fruitful approaches to questions in the traditional humanities. This report summarizes the Dagstuhl seminar 14301 on “Computational Humanities – bridging the gap between Computer Science and Digital Humanities”
@Article{Biemann:Crane:Fellbaum:Mehler:2014,
Author         = {Chris Biemann and Gregory R. Crane and Christiane D. Fellbaum and Alexander Mehler},
Title          = {Computational Humanities - bridging the gap between Computer Science and Digital Humanities (Dagstuhl Seminar 14301)},
Journal        = {Dagstuhl Reports},
Volume         = {4},
Number         = {7},
Pages          = {80-111},
abstract       = {Research in the field of Digital Humanities, also known as Humanities Computing, has seen a steady increase over the past years. Situated at the intersection of computing science and the humanities, present efforts focus on making resources such as texts, images, musical pieces and other semiotic artifacts digitally available, searchable and analysable. To this end, computational tools enabling textual search, visual analytics, data mining, statistics and natural language processing are harnessed to support the humanities researcher. The processing of large data sets with appropriate software opens up novel and fruitful approaches to questions in the traditional humanities. This report summarizes the Dagstuhl seminar 14301 on “Computational Humanities – bridging the gap between Computer Science and Digital Humanities”},
issn           = {2192-5283},
publisher      = {Schloss Dagstuhl--Leibniz-Zentrum für Informatik},
year           = 2014
}
• M. Z. Islam, M. R. Rahman, and A. Mehler, “Readability Classification of Bangla Texts,” in 15th International Conference on Intelligent Text Processing and Computational Linguistics (cicLing), Kathmandu, Nepal, 2014.
[Abstract] [BibTeX]

Readability classification is an important application of Natural Language Processing. It aims at judging the quality of documents and to assist writers to identify possible problems. This paper presents a readability classifier for Bangla textbooks using information-theoretic and lexical features. All together 18 features are explored to achieve an F-score of 86.46
@InProceedings{Islam:Rahman:Mehler:2014,
Author         = {Islam, Md. Zahurul and Rahman, Md. Rashedur and
Mehler, Alexander},
Title          = {Readability Classification of Bangla Texts},
BookTitle      = {15th International Conference on Intelligent Text
Processing and Computational Linguistics (cicLing),
Kathmandu, Nepal},
abstract       = {Readability classification is an important application
of Natural Language Processing. It aims at judging the
quality of documents and to assist writers to identify
possible problems. This paper presents a readability
classifier for Bangla textbooks using
information-theoretic and lexical features. All
together 18 features are explored to achieve an F-score
of 86.46},
year           = 2014
}
• A. Mehler, T. vor der Brück, and A. Lücking, “Comparing Hand Gesture Vocabularies for HCI,” in Proceedings of HCI International 2014, 22 – 27 June 2014, Heraklion, Greece, Berlin/New York: Springer, 2014.
[Abstract] [BibTeX]

HCI systems are often equipped with gestural interfaces drawing on a predefined set of admitted gestures. We provide an assessment of the fitness of such gesture vocabularies in terms of their learnability and naturalness. This is done by example of rivaling gesture vocabularies of the museum information system WikiNect. In this way, we do not only provide a procedure for evaluating gesture vocabularies, but additionally contribute to design criteria to be followed by the gestures.
@InCollection{Mehler:vor:der:Brueck:Luecking:2014,
Author         = {Mehler, Alexander and vor der Brück, Tim and
Lücking, Andy},
Title          = {Comparing Hand Gesture Vocabularies for HCI},
BookTitle      = {Proceedings of HCI International 2014, 22 - 27 June
2014, Heraklion, Greece},
Publisher      = {Springer},
abstract       = {HCI systems are often equipped with gestural
interfaces drawing on a predefined set of admitted
gestures. We provide an assessment of the fitness of
such gesture vocabularies in terms of their
learnability and naturalness. This is done by example
of rivaling gesture vocabularies of the museum
information system WikiNect. In this way, we do not
only provide a procedure for evaluating gesture
vocabularies, but additionally contribute to design
criteria to be followed by the gestures.},
keywords       = {wikinect},
year           = 2014
}
• A. Mehler, A. Lücking, and G. Abrami, “WikiNect: Image Schemata as a Basis of Gestural Writing for Kinetic Museum Wikis,” Universal Access in the Information Society, pp. 1-17, 2014.
[Abstract] [BibTeX]

This paper provides a theoretical assessment of gestures in the context of authoring image-related hypertexts by example of the museum information system WikiNect. To this end, a first implementation of gestural writing based on image schemata is provided (Lakoff in Women, fire, and dangerous things: what categories reveal about the mind. University of Chicago Press, Chicago, 1987). Gestural writing is defined as a sort of coding in which propositions are only expressed by means of gestures. In this respect, it is shown that image schemata allow for bridging between natural language predicates and gestural manifestations. Further, it is demonstrated that gestural writing primarily focuses on the perceptual level of image descriptions (Hollink et al. in Int J Hum Comput Stud 61(5):601–626, 2004). By exploring the metaphorical potential of image schemata, it is finally illustrated how to extend the expressiveness of gestural writing in order to reach the conceptual level of image descriptions. In this context, the paper paves the way for implementing museum information systems like WikiNect as systems of kinetic hypertext authoring based on full-fledged gestural writing.
@Article{Mehler:Luecking:Abrami:2014,
Author         = {Mehler, Alexander and Lücking, Andy and Abrami,
Giuseppe},
Title          = {{WikiNect}: Image Schemata as a Basis of Gestural
Writing for Kinetic Museum Wikis},
Journal        = {Universal Access in the Information Society},
Pages          = {1-17},
abstract       = {This paper provides a theoretical assessment of
gestures in the context of authoring image-related
hypertexts by example of the museum information system
WikiNect. To this end, a first implementation of
gestural writing based on image schemata is provided
(Lakoff in Women, fire, and dangerous things: what
categories reveal about the mind. University of Chicago
Press, Chicago, 1987). Gestural writing is defined as a
sort of coding in which propositions are only expressed
by means of gestures. In this respect, it is shown that
image schemata allow for bridging between natural
language predicates and gestural manifestations.
Further, it is demonstrated that gestural writing
primarily focuses on the perceptual level of image
descriptions (Hollink et al. in Int J Hum Comput Stud
61(5):601–626, 2004). By exploring the metaphorical
potential of image schemata, it is finally illustrated
how to extend the expressiveness of gestural writing in
order to reach the conceptual level of image
descriptions. In this context, the paper paves the way
for implementing museum information systems like
WikiNect as systems of kinetic hypertext authoring
based on full-fledged gestural writing.},
doi            = {10.1007/s10209-014-0386-8},
issn           = {1615-5289},
keywords       = {wikinect},
website        = {http://dx.doi.org/10.1007/s10209-014-0386-8},
year           = 2014
}
• T. vor der Brück, A. Mehler, and M. Z. Islam, “ColLex.EN: Automatically Generating and Evaluating a Full-form Lexicon for English,” in Proceedings of LREC 2014, Reykjavik, Iceland, 2014.
[Abstract] [BibTeX]

Currently, a large number of different lexica is                    available for English. However, substantial and freely                    available fullform lexica with a high number of named                    entities are rather rare even in the case of this                    lingua franca. Existing lexica are often limited in                    several respects as explained in Section 2. What is                    missing so far is a freely available substantial                    machine-readable lexical resource of English that                    contains a high number of word forms and a large                    collection of named entities. In this paper, we                    describe a procedure to generate such a resource by                    example of English. This lexicon, henceforth called                    ColLex.EN (for Collecting Lexica for English ), will be                    made freely available to the public 1. In this paper,                    we describe how ColLex.EN was collected from existing                    lexical resources and specify the statistical                    procedures that we developed to extend and adjust it.                    No manual modifications were done on the generated word                    forms and lemmas. Our fully automatic procedure has the                    advantage that whenever new versions of the source                    lexica are available, a new version of ColLex.EN can be                    automatically generated with low effort.
@InProceedings{vor:der:Brueck:Mehler:Islam:2014,
Author         = {vor der Brück, Tim and Mehler, Alexander and Islam,
Md. Zahurul},
Title          = {ColLex.EN: Automatically Generating and Evaluating a
Full-form Lexicon for English},
BookTitle      = {Proceedings of LREC 2014},
abstract       = {Currently, a large number of different lexica is
available for English. However, substantial and freely
available fullform lexica with a high number of named
entities are rather rare even in the case of this
lingua franca. Existing lexica are often limited in
several respects as explained in Section 2. What is
missing so far is a freely available substantial
machine-readable lexical resource of English that
contains a high number of word forms and a large
collection of named entities. In this paper, we
describe a procedure to generate such a resource by
example of English. This lexicon, henceforth called
ColLex.EN (for Collecting Lexica for English ), will be
made freely available to the public 1. In this paper,
we describe how ColLex.EN was collected from existing
lexical resources and specify the statistical
procedures that we developed to extend and adjust it.
No manual modifications were done on the generated word
forms and lemmas. Our fully automatic procedure has the
advantage that whenever new versions of the source
lexica are available, a new version of ColLex.EN can be
automatically generated with low effort.},
website        = {
http://aclanthology.info/papers/collex-en-automatically-generating-and-evaluating-a-full-form-lexicon-for-english},
year           = 2014
}
• A. Hoenen, “Simulation of Scribal Letter Substitution,” in Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches, 2014.
[BibTeX]

@InProceedings{Hoenen:2014,
Author         = {Hoenen, Armin},
Title          = {Simulation of Scribal Letter Substitution},
BookTitle      = {Analysis of Ancient and Medieval Texts and
Manuscripts: Digital Approaches},
Editor         = {T.L Andrews and C.Macé},
owner          = {hoenen},
website        = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503552682-1},
year           = 2014
}

### 2013 (20)

• I. Sejane and S. Eger, “Semantic typologies by means of network analysis of bilingual dictionaries,” in Approaches to Measuring Linguistic Differences, L. Borin and A. Saxena, Eds., De Gruyter, 2013, pp. 447-474.
[BibTeX]

@InCollection{Sejane:Eger:2013,
Author         = {Sejane, Ineta and Eger, Steffen},
Title          = {Semantic typologies by means of network analysis of
bilingual dictionaries},
BookTitle      = {Approaches to Measuring Linguistic Differences},
Publisher      = {De Gruyter},
Editor         = {Borin, Lars and Saxena, Anju},
Pages          = {447-474},
bibtexkey      = {eger-sejane_network-typologies2013},
doi            = {10.1515/9783110305258.447},
inlg           = {English [eng]},
src            = {degruyter},
srctrickle     = {degruyter#/books/9783110305258/9783110305258.447/9783110305258.447.xml},
url            = {http://www.degruyter.com/view/books/9783110305258/9783110305258.447/9783110305258.447.xml},
year           = 2013
}
• S. Eger, “Sequence Segmentation by Enumeration: An Exploration.,” Prague Bull. Math. Linguistics, vol. 100, pp. 113-131, 2013.
[Abstract] [BibTeX]

We investigate exhaustive enumeration and subsequent                    language model evaluation (E&E approach) as an                    alternative to solving the sequence segmentation                    problem. We show that, under certain conditions (on                    string lengths and regarding a possibility to                    accurately estimate the number of segments), which are                    satisfied for important NLP applications, such as                    phonological segmentation, syllabification, and                    morphological segmentation, the E&E approach is                    feasible and promises superior results than the                    standard sequence labeling approach to sequence                    segmentation.
@Article{Eger:2013:a,
Author         = {Eger, Steffen},
Title          = {Sequence Segmentation by Enumeration: An Exploration.},
Journal        = {Prague Bull. Math. Linguistics},
Volume         = {100},
Pages          = {113-131},
abstract       = {We investigate exhaustive enumeration and subsequent
language model evaluation (E\&E approach) as an
alternative to solving the sequence segmentation
problem. We show that, under certain conditions (on
string lengths and regarding a possibility to
accurately estimate the number of segments), which are
satisfied for important NLP applications, such as
phonological segmentation, syllabification, and
morphological segmentation, the E\&E approach is
feasible and promises superior results than the
standard sequence labeling approach to sequence
segmentation.},
pdf            = {http://ufal.mff.cuni.cz/pbml/100/art-eger.pdf},
year           = 2013
}
• S. Eger, “A Contribution to the Theory of Word Length Distribution Based on a Stochastic Word Length Distribution Model.,” Journal of Quantitative Linguistics, vol. 20, iss. 3, pp. 252-265, 2013.
[Abstract] [BibTeX]

We derive a stochastic word length distribution model                    based on the concept of compound distributions and show                    its relationships with and implications for Wimmer et                    al. ’s (1994) synergetic word length distribution                    model.
@Article{Eger:2013:b,
Author         = {Eger, Steffen},
Title          = {A Contribution to the Theory of Word Length
Distribution Based on a Stochastic Word Length
Distribution Model.},
Journal        = {Journal of Quantitative Linguistics},
Volume         = {20},
Number         = {3},
Pages          = {252-265},
abstract       = {We derive a stochastic word length distribution model
based on the concept of compound distributions and show
its relationships with and implications for Wimmer et
al. ’s (1994) synergetic word length distribution
model.},
year           = 2013
}
• S. Eger, “Sequence alignment with arbitrary steps and further generalizations, with applications to alignments in linguistics.,” Information Sciences, vol. 237, pp. 287-304, 2013.
[Abstract] [BibTeX]

We provide simple generalizations of the classical                    Needleman–Wunsch algorithm for aligning two                    sequences. First, we let both sequences be defined over                    arbitrary, potentially different alphabets. Secondly,                    we consider similarity functions between elements of                    both sequences with ranges in a semiring. Thirdly,                    instead of considering only ‘match’, ‘mismatch’                    and ‘skip’ operations, we allow arbitrary                    non-negative alignment ‘steps’ S. Next, we present                    novel combinatorial formulas for the number of monotone                    alignments between two sequences for selected steps S.                    Finally, we illustrate sample applications in natural                    language processing that require larger steps than                    available in the original Needleman–Wunsch sequence                    alignment procedure such that our generalizations can                    be fruitfully adopted.
@Article{Eger:2013:c,
Author         = {Eger, Steffen},
Title          = {Sequence alignment with arbitrary steps and further
generalizations, with applications to alignments in
linguistics.},
Journal        = {Information Sciences},
Volume         = {237},
Pages          = {287-304},
abstract       = {We provide simple generalizations of the classical
Needleman–Wunsch algorithm for aligning two
sequences. First, we let both sequences be defined over
arbitrary, potentially different alphabets. Secondly,
we consider similarity functions between elements of
both sequences with ranges in a semiring. Thirdly,
instead of considering only ‘match’, ‘mismatch’
and ‘skip’ operations, we allow arbitrary
non-negative alignment ‘steps’ S. Next, we present
novel combinatorial formulas for the number of monotone
alignments between two sequences for selected steps S.
Finally, we illustrate sample applications in natural
language processing that require larger steps than
available in the original Needleman–Wunsch sequence
alignment procedure such that our generalizations can
website        = {http://www.sciencedirect.com/science/article/pii/S0020025513001485},
year           = 2013
}
• S. Eger, “Restricted weighted integer compositions and extended binomial coefficients.,” Journal of Integer Sequences (electronic only), vol. 16, iss. 1, 2013.
[Abstract] [BibTeX]

We prove a simple relationship between extended                    binomial coefficients — natural extensions of the                    well-known binomial coefficients — and weighted                    restricted integer compositions. Moreover,                    wegiveaveryuseful interpretation ofextendedbinomial                    coefficients as representing distributions of sums of                    independent discrete random variables. We apply our                    results, e.g., to determine the distribution of the sum                    of k logarithmically distributed random variables, and                    to determining the distribution, specifying all                    moments, of the random variable whose values are                    part-products of random restricted integer                    compositions. Based on our findings and using the                    central limit theorem, we also give generalized                    Stirling formulae for central extended binomial                    coefficients. We enlarge the list of known properties                    of extended binomial coefficients.
@Article{Eger:2013:d,
Author         = {Eger, Steffen},
Title          = {Restricted weighted integer compositions and extended
binomial coefficients.},
Journal        = {Journal of Integer Sequences (electronic only)},
Volume         = {16},
Number         = {1},
abstract       = {We prove a simple relationship between extended
binomial coefficients — natural extensions of the
well-known binomial coefficients — and weighted
restricted integer compositions. Moreover,
wegiveaveryuseful interpretation ofextendedbinomial
coefficients as representing distributions of sums of
independent discrete random variables. We apply our
results, e.g., to determine the distribution of the sum
of k logarithmically distributed random variables, and
to determining the distribution, specifying all
moments, of the random variable whose values are
part-products of random restricted integer
compositions. Based on our findings and using the
central limit theorem, we also give generalized
Stirling formulae for central extended binomial
coefficients. We enlarge the list of known properties
of extended binomial coefficients.},
issn           = {1530-7638},
pdf            = {https://cs.uwaterloo.ca/journals/JIS/VOL16/Eger/eger6.pdf},
publisher      = {School of Computer Science, University of Waterloo,
Waterloo, ON},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.397.3745},
year           = 2013
}
• A. Mehler, R. Schneider, and A. Storrer, Webkorpora in Computerlinguistik und Sprachforschung, R. Schneider, A. Storrer, and A. Mehler, Eds., JLCL, 2013, vol. 28.
[BibTeX]

@Book{Schneider:Storrer:Mehler:2013,
Author         = {Mehler, Alexander and Schneider, Roman and Storrer,
Angelika},
Editor         = {Roman Schneider and Angelika Storrer and Alexander
Mehler},
Title          = {Webkorpora in Computerlinguistik und Sprachforschung},
Publisher      = {JLCL},
Volume         = {28},
Number         = {2},
Series         = {Journal for Language Technology and Computational
Linguistics (JLCL)},
issn           = {2190-6858},
pagetotal      = {107},
pdf            = {http://www.jlcl.org/2013_Heft2/H2013-2.pdf},
year           = 2013
}
• A. Mehler, A. Lücking, T. vor der Brück, and G. Abrami, WikiNect – A Kinetic Artwork Wiki for Exhibition Visitors, 2013.
[Poster][BibTeX]

@Misc{Mehler:Luecking:vor:der:Brueck:2013:a,
Author         = {Mehler, Alexander and Lücking, Andy and vor der
Brück, Tim and Abrami, Giuseppe},
Title          = {WikiNect - A Kinetic Artwork Wiki for Exhibition
Visitors},
HowPublished   = {Poster Presentation at the Scientific Computing and
Cultural Heritage 2013 Conference, Heidelberg},
keywords       = {wikinect},
month          = {11},
url            = {http://scch2013.wordpress.com/},
year           = 2013
}
• A. Lücking, Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz von Gestik in der Aphasietherapie, 2013.
[BibTeX]

@Misc{Luecking:2013:c,
Author         = {Lücking, Andy},
Title          = {Theoretische Bausteine für einen semiotischen Ansatz
zum Einsatz von Gestik in der Aphasietherapie},
HowPublished   = {Talk at the BKL workshop 2013, Bochum},
month          = {05},
url            = {http://www.bkl-ev.de/bkl_workshop/archiv/workshop13_programm.php},
year           = 2013
}
• A. Lücking, Eclectic Semantics for Non-Verbal Signs, 2013.
[BibTeX]

@Misc{Luecking:2013:d,
Author         = {Lücking, Andy},
Title          = {Eclectic Semantics for Non-Verbal Signs},
HowPublished   = {Talk at the Conference on Investigating semantics:
Empirical and philosophical approaches, Bochum},
month          = {10},
url            = {http://www.ruhr-uni-bochum.de/phil-lang/investigating/index.html},
year           = 2013
}
• A. Lücking, “Multimodal Propositions? From Semiotic to Semantic Considerations in the Case of Gestural Deictics,” in Poster Abstracts of the Proceedings of the 17th Workshop on the Semantics and Pragmatics of Dialogue, Amsterdam, 2013, pp. 221-223.
[Poster][BibTeX]

@InProceedings{Luecking:2013:e,
Author         = {Lücking, Andy},
Title          = {Multimodal Propositions? From Semiotic to Semantic
Considerations in the Case of Gestural Deictics},
BookTitle      = {Poster Abstracts of the Proceedings of the 17th
Workshop on the Semantics and Pragmatics of Dialogue},
Editor         = {Fernandez, Raquel and Isard, Amy},
Series         = {SemDial 2013},
Pages          = {221-223},
month          = {12},
year           = 2013
}
• M. Z. Islam and A. Hoenen, “Source and Translation Classifiction using Most Frequent Words,” in Proceedings of the 6th International Joint Conference on Natural Language Processing (IJCNLP), 2013.
[Abstract] [BibTeX]

Recently, translation scholars have made some general                    claims about translation properties. Some of these are                    source language independent while others are not.                    Koppel and Ordan (2011) performed empirical studies to                    validate both types of properties using English source                    texts and other texts translated into English.                    Obviously, corpora of this sort, which focus on a                    single language, are not adequate for claiming                    universality of translation prop- erties. In this                    paper, we are validating both types of translation                    properties using original and translated texts from six                    European languages.
@InProceedings{Islam:Hoenen:2013,
Author         = {Islam, Md. Zahurul and Hoenen, Armin},
Title          = {Source and Translation Classifiction using Most
Frequent Words},
BookTitle      = {Proceedings of the 6th International Joint Conference
on Natural Language Processing (IJCNLP)},
abstract       = {Recently, translation scholars have made some general
claims about translation properties. Some of these are
source language independent while others are not.
Koppel and Ordan (2011) performed empirical studies to
validate both types of properties using English source
texts and other texts translated into English.
Obviously, corpora of this sort, which focus on a
single language, are not adequate for claiming
universality of translation prop- erties. In this
paper, we are validating both types of translation
properties using original and translated texts from six
European languages.},
pdf            = {http://www.aclweb.org/anthology/I/I13/I13-1185.pdf},
website        = {http://aclanthology.info/papers/source-and-translation-classification-using-most-frequent-words},
year           = 2013
}
• A. Lücking and A. Mehler, “On Three Notions of Grounding of Artificial Dialog Companions,” Science, Technology & Innovation Studies, vol. 10, iss. 1, pp. 31-36, 2013.
[Abstract] [BibTeX]

We provide a new, theoretically motivated evaluation                    grid for assessing the conversational achievements of                    Artificial Dialog Companions (ADCs). The grid is                    spanned along three grounding problems. Firstly, it is                    argued that symbol grounding in general has to be                    instrinsic. Current approaches in this context,                    however, are limited to a certain kind of expression                    that can be grounded in this way. Secondly, we identify                    three requirements for conversational grounding, the                    process leading to mutual understanding. Finally, we                    sketch a test case for symbol grounding in the form of                    the philosophical grounding problem that involves the                    use of modal language. Together, the three grounding                    problems provide a grid that allows us to assess                    ADCs’ dialogical performances and to pinpoint future                    developments on these grounds.
@Article{Luecking:Mehler:2013:a,
Author         = {Lücking, Andy and Mehler, Alexander},
Title          = {On Three Notions of Grounding of Artificial Dialog
Companions},
Journal        = {Science, Technology \& Innovation Studies},
Volume         = {10},
Number         = {1},
Pages          = {31-36},
abstract       = {We provide a new, theoretically motivated evaluation
grid for assessing the conversational achievements of
Artificial Dialog Companions (ADCs). The grid is
spanned along three grounding problems. Firstly, it is
argued that symbol grounding in general has to be
instrinsic. Current approaches in this context,
however, are limited to a certain kind of expression
that can be grounded in this way. Secondly, we identify
three requirements for conversational grounding, the
process leading to mutual understanding. Finally, we
sketch a test case for symbol grounding in the form of
the philosophical grounding problem that involves the
use of modal language. Together, the three grounding
problems provide a grid that allows us to assess
ADCs’ dialogical performances and to pinpoint future
developments on these grounds.},
website        = {http://www.sti-studies.de/ojs/index.php/sti/article/view/143},
year           = 2013
}
• Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden und empirische Untersuchungen an Beispielen des WWW, B. Frank-Job, A. Mehler, and T. Sutter, Eds., Wiesbaden: Springer VS, 2013.
[Abstract] [BibTeX]

In diesem Band präsentieren Medien- und                    Informationswissenschaftler, Netzwerkforscher aus                    Informatik, Texttechnologie und Physik, Soziologen und                    Linguisten interdisziplinär Aspekte der Erforschung                    komplexer Mehrebenen-Netzwerke. Im Zentrum ihres                    Interesses stehen Untersuchungen zum Zusammenhang                    zwischen sozialen und sprachlichen Netzwerken und ihrer                    Dynamiken, aufgezeigt an empirischen Beispielen aus dem                    Bereich des Web 2.0, aber auch an historischen                    Dokumentenkorpora sowie an Rezeptions-Netzwerken aus                    Kunst- und Literaturwissenschaft.
@Book{FrankJob:Mehler:Sutter:2013,
Editor         = {Barbara Frank-Job and Alexander Mehler and Tilmann
Sutter},
Title          = {Die Dynamik sozialer und sprachlicher Netzwerke:
Konzepte, Methoden und empirische Untersuchungen an
Beispielen des WWW},
Publisher      = {Springer VS},
abstract       = {In diesem Band pr{\"a}sentieren Medien- und
Informationswissenschaftler, Netzwerkforscher aus
Informatik, Texttechnologie und Physik, Soziologen und
Linguisten interdisziplin{\"a}r Aspekte der Erforschung
komplexer Mehrebenen-Netzwerke. Im Zentrum ihres
Interesses stehen Untersuchungen zum Zusammenhang
zwischen sozialen und sprachlichen Netzwerken und ihrer
Dynamiken, aufgezeigt an empirischen Beispielen aus dem
Bereich des Web 2.0, aber auch an historischen
Dokumentenkorpora sowie an Rezeptions-Netzwerken aus
Kunst- und Literaturwissenschaft.},
pagetotal      = {240},
year           = 2013
}
• A. Lücking, “Interfacing Speech and Co-Verbal Gesture: Exemplification,” in Proceedings of the 35th Annual Conference of the German Linguistic Society, Potsdam, Germany, 2013, pp. 284-286.
[BibTeX]

@InProceedings{Luecking:2013:b,
Author         = {Lücking, Andy},
Title          = {Interfacing Speech and Co-Verbal Gesture:
Exemplification},
BookTitle      = {Proceedings of the 35th Annual Conference of the
German Linguistic Society},
Series         = {DGfS 2013},
Pages          = {284-286},
year           = 2013
}
• A. Lücking, Ikonische Gesten. Grundzüge einer linguistischen Theorie, Berlin and Boston: De Gruyter, 2013. Zugl. Diss. Univ. Bielefeld (2011)
[Abstract] [BibTeX]

Nicht-verbale Zeichen, insbesondere sprachbegleitende                    Gesten, spielen eine herausragende Rolle in der                    menschlichen Kommunikation. Um eine Analyse von Gestik                    innerhalb derjenigen Disziplinen, die sich mit der                    Erforschung und Modellierung von Dialogen                    beschäftigen, zu ermöglichen, bedarf es einer                    entsprechenden linguistischen Rahmentheorie.                    „Ikonische Gesten“ bietet einen ersten zeichen- und                    wahrnehmungstheoretisch motivierten Rahmen an, in dem                    eine grammatische Analyse der Integration von Sprache                    und Gestik möglich ist. Ausgehend von einem Abriss                    semiotischer Zugänge zu ikonischen Zeichen wird der                    vorherrschende Ähnlichkeitsansatz unter Rückgriff                    auf Wahrnehmungstheorien zugunsten eines                    Exemplifikationsansatzes verworfen. Exemplifikation                    wird im Rahmen einer unifikationsbasierten Grammatik                    umgesetzt. Dort werden u.a. multimodale                    Wohlgeformtheit, Synchronie und multimodale                    Subkategorisierung als neue Gegenstände                    linguistischer Forschung eingeführt und im Rahmen                    einer integrativen Analyse von Sprache und Gestik                    modelliert.
@Book{Luecking:2013,
Author         = {Lücking, Andy},
Title          = {Ikonische Gesten. Grundzüge einer linguistischen
Theorie},
Publisher      = {De Gruyter},
Note           = {Zugl. Diss. Univ. Bielefeld (2011)},
abstract       = {Nicht-verbale Zeichen, insbesondere sprachbegleitende
Gesten, spielen eine herausragende Rolle in der
menschlichen Kommunikation. Um eine Analyse von Gestik
innerhalb derjenigen Disziplinen, die sich mit der
Erforschung und Modellierung von Dialogen
besch{\"a}ftigen, zu ermöglichen, bedarf es einer
entsprechenden linguistischen Rahmentheorie.
„Ikonische Gesten“ bietet einen ersten zeichen- und
wahrnehmungstheoretisch motivierten Rahmen an, in dem
eine grammatische Analyse der Integration von Sprache
und Gestik möglich ist. Ausgehend von einem Abriss
semiotischer Zug{\"a}nge zu ikonischen Zeichen wird der
vorherrschende {\"A}hnlichkeitsansatz unter Rückgriff
auf Wahrnehmungstheorien zugunsten eines
Exemplifikationsansatzes verworfen. Exemplifikation
wird im Rahmen einer unifikationsbasierten Grammatik
umgesetzt. Dort werden u.a. multimodale
Wohlgeformtheit, Synchronie und multimodale
Subkategorisierung als neue Gegenst{\"a}nde
linguistischer Forschung eingeführt und im Rahmen
einer integrativen Analyse von Sprache und Gestik
modelliert.},
year           = 2013
}
• M. Z. Islam and A. Mehler, “Automatic Readability Classification of Crowd-Sourced Data based on Linguistic and Information-Theoretic Features,” in 14th International Conference on Intelligent Text Processing and Computational Linguistics, 2013.
[Abstract] [BibTeX]

This paper presents a classifier of text readability                    based on information-theoretic features. The classifier                    was developed based on a linguistic approach to                    readability that explores lexical, syntactic and                    semantic features. For this evaluation we extracted a                    corpus of 645 articles from Wikipedia together with                    their quality judgments. We show that                    information-theoretic features perform as well as their                    linguistic counterparts even if we explore several                    linguistic levels at once.
@InProceedings{Islam:Mehler:2013:a,
Author         = {Islam, Md. Zahurul and Mehler, Alexander},
Title          = {Automatic Readability Classification of Crowd-Sourced
Data based on Linguistic and Information-Theoretic
Features},
BookTitle      = {14th International Conference on Intelligent Text
Processing and Computational Linguistics},
abstract       = {This paper presents a classifier of text readability
based on information-theoretic features. The classifier
was developed based on a linguistic approach to
readability that explores lexical, syntactic and
semantic features. For this evaluation we extracted a
corpus of 645 articles from Wikipedia together with
their quality judgments. We show that
information-theoretic features perform as well as their
linguistic counterparts even if we explore several
linguistic levels at once.},
owner          = {zahurul},
timestamp      = {2013.01.22},
website        = {http://www.redalyc.org/articulo.oa?id=61527437002},
year           = 2013
}
• M. Z. Islam and R. Rahman, “English to Bangla Name Transliteration System (Abstract),” in The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013), 2013.
[Abstract] [BibTeX]

Machine translation systems always struggle                    transliterating names and unknown words during the                    translation process. It becomes more problematic when                    the source and the target language use different                    scripts for writing. To handle this problem,                    transliteration systems are becoming popular as                    additional modules of the MT systems. In this abstract,                    we are presenting an English to Bangla name                    transliteration system that outperforms Google’s                    transliteration system. The transliteration system is                    the same as the phrase based statistical machine                    translation system, but it works on character level                    rather than on phrase level. The performance of a                    statistical system is directly correlated with the size                    of the training corpus. In this work, 2200 names are                    extracted from the Wikipedia cross lingual links and                    from Geonames . Also 3694 names are manually                    transliterated and added to the data. 4716 names are                    used for training, 590 for tuning and 588 names are                    used for testing. If we consider only the candidate                    transliterations, the system gives 64.28% accuracy. The                    performance increases to more than 90%, if we consider                    only the top 5 transliterations. To compare with the                    Google’s English to Bangla transliteration system, a                    list of 100 names are randomly selected from the test                    data and translated by both systems. Our system gives                    63% accuracy where the Google’s transliteration                    system does not transliterate a single name correctly.                    We have found significant improvement in terms of BLUE                    and TER score when we add the transliteration module                    with an English to Bangla machine transliteration                    system.
@InProceedings{Islam:Rahman:2013,
Author         = {Islam, Md. Zahurul and Rahman, Rashedur},
Title          = {English to Bangla Name Transliteration System
(Abstract)},
BookTitle      = {The 23rd Meeting of Computational Linguistics in the
Netherlands (CLIN 2013)},
abstract       = {Machine translation systems always struggle
transliterating names and unknown words during the
translation process. It becomes more problematic when
the source and the target language use different
scripts for writing. To handle this problem,
transliteration systems are becoming popular as
additional modules of the MT systems. In this abstract,
we are presenting an English to Bangla name
transliteration system. The transliteration system is
the same as the phrase based statistical machine
translation system, but it works on character level
rather than on phrase level. The performance of a
statistical system is directly correlated with the size
of the training corpus. In this work, 2200 names are
extracted from the Wikipedia cross lingual links and
from Geonames . Also 3694 names are manually
transliterated and added to the data. 4716 names are
used for training, 590 for tuning and 588 names are
used for testing. If we consider only the candidate
transliterations, the system gives 64.28% accuracy. The
performance increases to more than 90%, if we consider
only the top 5 transliterations. To compare with the
Google’s English to Bangla transliteration system, a
list of 100 names are randomly selected from the test
data and translated by both systems. Our system gives
63% accuracy where the Google’s transliteration
system does not transliterate a single name correctly.
We have found significant improvement in terms of BLUE
and TER score when we add the transliteration module
with an English to Bangla machine transliteration
system.},
owner          = {zahurul},
timestamp      = {2013.01.22},
year           = 2013
}
• A. Mehler, C. Stegbauer, and R. Gleim, “Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation am Beispiel des GuttenPlag Wiki: eine Vorstudie,” in Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden und empirische Untersuchungen am Beispiel des WWW, B. Frank-Job, A. Mehler, and T. Sutter, Eds., Wiesbaden: VS Verlag, 2013.
[BibTeX]

@InCollection{Mehler:Stegbauer:Gleim:2013,
Author         = {Mehler, Alexander and Stegbauer, Christian and Gleim,
Rüdiger},
Title          = {Zur Struktur und Dynamik der kollaborativen
Plagiatsdokumentation am Beispiel des GuttenPlag Wiki:
eine Vorstudie},
BookTitle      = {Die Dynamik sozialer und sprachlicher Netzwerke.
Konzepte, Methoden und empirische Untersuchungen am
Beispiel des WWW},
Publisher      = {VS Verlag},
Editor         = {Frank-Job, Barbara and Mehler, Alexander and Sutter,
Tilman},
year           = 2013
}
• A. Lücking, K. Bergman, F. Hahn, S. Kopp, and H. Rieser, “Data-based Analysis of Speech and Gesture: The Bielefeld Speech and Gesture Alignment Corpus (SaGA) and its Applications,” Journal of Multimodal User Interfaces, vol. 7, iss. 1-2, pp. 5-18, 2013.
[Abstract] [BibTeX]

Communicating face-to-face, interlocutors frequently                    produce multimodal meaning packages consisting of                    speech and accompanying gestures. We discuss a                    systematically annotated speech and gesture corpus                    consisting of 25 route-and-landmark-description                    dialogues, the Bielefeld Speech and Gesture Alignment                    corpus (SaGA), collected in experimental face-to-face                    settings. We first describe the primary and secondary                    data of the corpus and its reliability assessment. Then                    we go into some of the projects carried out using SaGA                    demonstrating the wide range of its usability: on the                    empirical side, there is work on gesture typology,                    individual and contextual parameters influencing                    gesture production and gestures’ functions for                    dialogue structure. Speech-gesture interfaces have been                    established extending unification-based grammars. In                    addition, the development of a computational model of                    speech-gesture alignment and its implementation                    constitutes a research line we focus on.
@Article{Luecking:Bergmann:Hahn:Kopp:Rieser:2012,
Author         = {Lücking, Andy and Bergman, Kirsten and Hahn, Florian
and Kopp, Stefan and Rieser, Hannes},
Title          = {Data-based Analysis of Speech and Gesture: The
Bielefeld Speech and Gesture Alignment Corpus (SaGA)
and its Applications},
Journal        = {Journal of Multimodal User Interfaces},
Volume         = {7},
Number         = {1-2},
Pages          = {5-18},
abstract       = {Communicating face-to-face, interlocutors frequently
produce multimodal meaning packages consisting of
speech and accompanying gestures. We discuss a
systematically annotated speech and gesture corpus
consisting of 25 route-and-landmark-description
dialogues, the Bielefeld Speech and Gesture Alignment
corpus (SaGA), collected in experimental face-to-face
settings. We first describe the primary and secondary
data of the corpus and its reliability assessment. Then
we go into some of the projects carried out using SaGA
demonstrating the wide range of its usability: on the
empirical side, there is work on gesture typology,
individual and contextual parameters influencing
gesture production and gestures’ functions for
dialogue structure. Speech-gesture interfaces have been
established extending unification-based grammars. In
addition, the development of a computational model of
speech-gesture alignment and its implementation
constitutes a research line we focus on.},
doi            = {10.1007/s12193-012-0106-8},
year           = 2013
}
• N. Beckage, M. S. Vitevitch, A. Mehler, and E. Colunga, “Using Complex Network Analysis in the Cognitive Sciences,” in Proceedings of the 35th Annual Meeting of the Cognitive Science Society, CogSci 2013, Berlin, Germany, July 31 – August 3, 2013, 2013.
[BibTeX]

@InProceedings{Beckage:et:al:2013,
Author         = {Nicole Beckage and Michael S. Vitevitch and Alexander
Mehler and Eliana Colunga},
Title          = {Using Complex Network Analysis in the Cognitive
Sciences},
BookTitle      = {Proceedings of the 35th Annual Meeting of the
Cognitive Science Society, CogSci 2013, Berlin,
Germany, July 31 - August 3, 2013},
Editor         = {Markus Knauff and Michael Pauen and Natalie Sebanz and
Ipke Wachsmuth},
Publisher      = {cognitivesciencesociety.org},
year           = 2013
}

### 2012 (26)

• A. Mehler and L. Romary, Handbook of Technical Communication, Berlin: De Gruyter Mouton, 2012.
[BibTeX]

@Book{Mehler:Romary:2012,
Author         = {Mehler, Alexander and Romary, Laurent},
Title          = {Handbook of Technical Communication},
Publisher      = {De Gruyter Mouton},
pagetotal      = {839},
year           = 2012
}
• O. Abramov, “Network theory applied to linguistics: new advances in language classification and typology,” PhD Thesis, 2012.
[Abstract] [BibTeX]

This thesis bridges between two scientific fields --                    linguistics and computer science -- in terms of                    Linguistic Networks. From the linguistic point of view                    we examine whether languages can be distinguished when                    looking at network topology of different linguistic                    networks. We deal with up to 17 languages and ask how                    far the methods of network theory reveal the                    peculiarities of single languages. We present and apply                    network models from different levels of linguistic                    representation: syntactic, phonological and                    morphological. The network models presented here allow                    to integrate various linguistic features at once, which                    enables a more abstract, holistic view at the                    particular language. From the point of view of computer                    science we elaborate the instrumentarium of network                    theory applying it to a new field. We study the                    expressiveness of different network features and their                    ability to characterize language structure. We evaluate                    the interplay of these features and their goodness in                    the task of classifying languages genealogically. Among                    others we compare network features related to: average                    degree, average geodesic distance, clustering,                    entropy-based indices, assortativity, centrality,                    compactness etc. We also propose some new indices that                    can serve as additional characteristics of networks.                    The results obtained show that network models succeed                    in classifying related languages, and allow to study                    language structure in general. The mathematical                    analysis of the particular network indices brings new                    insights into the nature of these indices and their                    potential when applied to different networks.
@PhdThesis{Abramov:2012,
Author         = {Abramov, Olga},
Title          = {Network theory applied to linguistics: new advances in
language classification and typology},
School         = {Bielefeld University, Germany},
abstract       = {This thesis bridges between two scientific fields --
linguistics and computer science -- in terms of
Linguistic Networks. From the linguistic point of view
we examine whether languages can be distinguished when
looking at network topology of different linguistic
networks. We deal with up to 17 languages and ask how
far the methods of network theory reveal the
peculiarities of single languages. We present and apply
network models from different levels of linguistic
representation: syntactic, phonological and
morphological. The network models presented here allow
to integrate various linguistic features at once, which
enables a more abstract, holistic view at the
particular language. From the point of view of computer
science we elaborate the instrumentarium of network
theory applying it to a new field. We study the
expressiveness of different network features and their
ability to characterize language structure. We evaluate
the interplay of these features and their goodness in
the task of classifying languages genealogically. Among
others we compare network features related to: average
degree, average geodesic distance, clustering,
entropy-based indices, assortativity, centrality,
compactness etc. We also propose some new indices that
can serve as additional characteristics of networks.
The results obtained show that network models succeed
in classifying related languages, and allow to study
language structure in general. The mathematical
analysis of the particular network indices brings new
insights into the nature of these indices and their
potential when applied to different networks.},
website        = {http://pub.uni-bielefeld.de/publication/2538828},
year           = 2012
}
• A. Hoenen, “Measuring Repetitiveness in Texts, a Preliminary Investigation,” Sprache und Datenverarbeitung. International Journal for Language Data Processing, vol. 36, iss. 2, pp. 93-104, 2012.
[Abstract] [BibTeX]

In this paper, a model is presented for the automatic                    measurement that can systematically describe the usage                    and function of the phenomenon of repetition in written                    text. The motivating hypothesis for this study is that                    the more repetitive a text is, the easier it is to                    memorize. Therefore, an automated measurement index can                    provide feedback to writers and for those who design                    texts that are often memorized including songs, holy                    texts, theatrical plays, and advertising slogans. The                    potential benefits of this kind of systematic feedback                    are numerous, the main one being that content creators                    would be able to employ a standard threshold of                    memorizability. This study explores multiple ways of                    implementing and calculating repetitiveness across                    levels of analysis (such as paragraph-level or sub-word                    level) genres (such as songs, holy texts, and other                    genres) and languages, integrating these into the a                    model for the automatic measurement of repetitiveness.                    The Avestan language and some of its idiosyncratic                    features are explored in order to illuminate how the                    proposed index is applied in the ranking of texts                    according to their repetitiveness.
@Article{Hoenen:2012:a,
Author         = {Hoenen, Armin},
Title          = {Measuring Repetitiveness in Texts, a Preliminary
Investigation},
Journal        = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
Volume         = {36},
Number         = {2},
Pages          = {93-104},
abstract       = {In this paper, a model is presented for the automatic
measurement that can systematically describe the usage
and function of the phenomenon of repetition in written
text. The motivating hypothesis for this study is that
the more repetitive a text is, the easier it is to
memorize. Therefore, an automated measurement index can
provide feedback to writers and for those who design
texts that are often memorized including songs, holy
texts, theatrical plays, and advertising slogans. The
potential benefits of this kind of systematic feedback
are numerous, the main one being that content creators
would be able to employ a standard threshold of
memorizability. This study explores multiple ways of
implementing and calculating repetitiveness across
levels of analysis (such as paragraph-level or sub-word
level) genres (such as songs, holy texts, and other
genres) and languages, integrating these into the a
model for the automatic measurement of repetitiveness.
The Avestan language and some of its idiosyncratic
features are explored in order to illuminate how the
proposed index is applied in the ranking of texts
according to their repetitiveness.},
website        = {http://www.linse.uni-due.de/jahrgang-36-2012/articles/measuring-repetitiveness-in-texts-a-preliminary-investigation.html},
year           = 2012
}
• S. Eger, “The Combinatorics of String Alignments: Reconsidering the Problem.,” Journal of Quantitative Linguistics, vol. 19, iss. 1, pp. 32-53, 2012.
[Abstract] [BibTeX]

In recent work, Covington discusses the number of                    alignments of two strings. Thereby, Covington defines                    an alignment as “a way of pairing up elements of two                    strings, optionally skipping some but preserving the                    order”. This definition has drawbacks as it excludes                    many relevant situations. In this work, we specify the                    notion of an alignment so that many linguistically                    interesting situations are covered. To this end, we                    define an alignment in an abstract manner as a set of                    pairs and then define three properties on such sets.                    Secondly, we specify the numbers of possibilities of                    aligning two strings in each case.
@Article{Eger:2012:a,
Author         = {Eger, Steffen},
Title          = {The Combinatorics of String Alignments: Reconsidering
the Problem.},
Journal        = {Journal of Quantitative Linguistics},
Volume         = {19},
Number         = {1},
Pages          = {32-53},
abstract       = {In recent work, Covington discusses the number of
alignments of two strings. Thereby, Covington defines
an alignment as “a way of pairing up elements of two
strings, optionally skipping some but preserving the
order”. This definition has drawbacks as it excludes
many relevant situations. In this work, we specify the
notion of an alignment so that many linguistically
interesting situations are covered. To this end, we
define an alignment in an abstract manner as a set of
pairs and then define three properties on such sets.
Secondly, we specify the numbers of possibilities of
aligning two strings in each case.},
website        = {
http://www.tandfonline.com/doi/full/10.1080/09296174.2011.638792#tabModule},
year           = 2012
}
• S. Eger, “S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications,” in Proceedings of COLING 2012, Mumbai, India, 2012, pp. 781-798.
[Abstract] [BibTeX]

We present a simple and straightforward alignment                    algorithm for monotone many-to-many alignments in                    grapheme-to-phoneme conversion and related fields such                    as morphology, and discuss a few noteworthy extensions.                    Moreover, we specify combinatorial formulas for                    monotone many-to-many alignments and decoding in G2P                    which indicate that exhaustive enumeration is generally                    possible, so that some limitations of our approach can                    easily be overcome. Finally, we present a decoding                    scheme, within the monotone many-to-many alignment                    paradigm, that relates the decoding problem to                    restricted integer compositions and that is,                    putatively, superior to alternatives suggested in the                    literatur
@InProceedings{Eger:2012:b,
Author         = {Eger, Steffen},
Title          = {S-Restricted Monotone Alignments: Algorithm, Search
Space, and Applications},
BookTitle      = {Proceedings of COLING 2012},
Pages          = {781-798},
Publisher      = {The COLING 2012 Organizing Committee},
abstract       = {We present a simple and straightforward alignment
algorithm for monotone many-to-many alignments in
grapheme-to-phoneme conversion and related fields such
as morphology, and discuss a few noteworthy extensions.
Moreover, we specify combinatorial formulas for
monotone many-to-many alignments and decoding in G2P
which indicate that exhaustive enumeration is generally
possible, so that some limitations of our approach can
easily be overcome. Finally, we present a decoding
scheme, within the monotone many-to-many alignment
paradigm, that relates the decoding problem to
restricted integer compositions and that is,
putatively, superior to alternatives suggested in the
literatur},
pdf            = {http://aclweb.org/anthology/C/C12/C12-1048.pdf},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.370.5941},
year           = 2012
}
• S. Eger, “Lexical semantic typologies from bilingual corpora – A framework,” in SEM 2012: The First Joint Conference on Lexical and Computational Semantics — Volume 1: Proceedings of the main conference and the shared task, and Volume 2: Proceedings of the Sixth International Workshop on Semantic Evaluation (SemEval 2012), Montreal, Canada, 2012, pp. 90-94.
[Abstract] [BibTeX]

We present a framework, based on Sejane and Eger                    (2012), for inducing lexical semantic typologies for                    groups of languages. Our framework rests on lexical                    semantic association networks derived from encoding,                    via bilingual corpora, each language in a common                    reference language, the tertium comparationis, so that                    distances between languages can easily be determined.
@InProceedings{Eger:2012:c,
Author         = {Eger, Steffen},
Title          = {Lexical semantic typologies from bilingual corpora - A
framework},
BookTitle      = {SEM 2012: The First Joint Conference on Lexical and
Computational Semantics -- Volume 1: Proceedings of the
main conference and the shared task, and Volume 2:
Proceedings of the Sixth International Workshop on
Semantic Evaluation (SemEval 2012)},
Pages          = {90-94},
Publisher      = {Association for Computational Linguistics},
abstract       = {We present a framework, based on Sejane and Eger
(2012), for inducing lexical semantic typologies for
groups of languages. Our framework rests on lexical
semantic association networks derived from encoding,
via bilingual corpora, each language in a common
reference language, the tertium comparationis, so that
distances between languages can easily be determined.},
pdf            = {http://www.aclweb.org/anthology/S12-1015},
website        = {http://dl.acm.org/citation.cfm?id=2387653},
year           = 2012
}
• A. Mehler, C. Stegbauer, and R. Gleim, “Latent Barriers in Wiki-based Collaborative Writing,” in Proceedings of the Wikipedia Academy: Research and Free Knowledge. June 29 – July 1 2012, Berlin, 2012.
[BibTeX]

@InProceedings{Mehler:Stegbauer:Gleim:2012:b,
Author         = {Mehler, Alexander and Stegbauer, Christian and Gleim,
Rüdiger},
Title          = {Latent Barriers in Wiki-based Collaborative Writing},
BookTitle      = {Proceedings of the Wikipedia Academy: Research and
Free Knowledge. June 29 - July 1 2012},
month          = {July},
year           = 2012
}
• A. Hoenen and T. Jügel, Altüberlieferte Sprachen als Gegenstand der Texttechnologie — Ancient Languages as the Object of Text Technology, A. Hoenen and T. Jügel, Eds., JLCL, 2012, vol. 27.
[Abstract] [BibTeX]

‘Avestan’ is the name of the ritual language of                    Zor oastrianism, which was the state religion of the                    Iranian empire in Achaemenid, Arsacid and Sasanid                    times, covering a time span of more than 1200 years.                    [1] It is named after the ‘Avesta’, i.e., the                    collection of holy scriptures that form the basis of                    the religion which was allegedly founded by                    Zarathushtra, also known as Zoroaster, by about the                    beginning of the first millennium B.C. Together with                    Vedic Sanskrit, Avestan represents one of the most                    archaic witnesses of the Indo-Iranian branch of the                    Indo-European languages, which makes it especially                    interesting for historical-comparative linguistics.                    This is why the texts of the Avesta were among the                    first objects of electronic corpus building that were                    undertaken in the framework of Indo-European studies,                    leading to the establishment of the TITUS database                    (‘Thesaurus indogermanischer Text- u nd                    Sprachmaterialien’). [2] Today, the complete Avestan                    corpus is available, together with elaborate search                    functions [3] and an extended version of the subcorpus                    of the so-called ‘Yasna’, which covers a great deal                    of the attestation of variant readings. [4] Right from                    the beginning of their computational work concerning                    the Avesta, the compilers [5] had to cope with the fact                    that the texts contained in it have been transmitted in                    a special script written from right to left, which was                    also used for printing them in the scholarly editions                    used until today. [6] It goes without saying that there                    was no way in the middle of the 1980s to encode the                    Avestan scriptures exactly as they are found in the                    manuscripts. Instead, we had to rely upon                    transcriptional devices that were dictated by the                    restrictions of character encoding as provided by the                    computer systems used. As the problems we had to face                    in this respect and the solutions we could apply are                    typical for the development of computational work on                    ancient languages, it seems worthwhile to sketch them                    out here.
@Book{Hoenen:Jügel:2012,
Author         = {Hoenen, Armin and Jügel, Thomas},
Editor         = {Armin Hoenen and Thomas Jügel},
Title          = {Altüberlieferte Sprachen als Gegenstand der
Texttechnologie -- Ancient Languages as the Object of
Text Technology},
Publisher      = {JLCL},
Volume         = {27},
Number         = {2},
abstract       = {‘Avestan’ is the name of the ritual language of
Zor oastrianism, which was the state religion of the
Iranian empire in Achaemenid, Arsacid and Sasanid
times, covering a time span of more than 1200 years.
[1] It is named after the ‘Avesta’, i.e., the
collection of holy scriptures that form the basis of
the religion which was allegedly founded by
Zarathushtra, also known as Zoroaster, by about the
beginning of the first millennium B.C. Together with
Vedic Sanskrit, Avestan represents one of the most
archaic witnesses of the Indo-Iranian branch of the
Indo-European languages, which makes it especially
interesting for historical-comparative linguistics.
This is why the texts of the Avesta were among the
first objects of electronic corpus building that were
undertaken in the framework of Indo-European studies,
leading to the establishment of the TITUS database
(‘Thesaurus indogermanischer Text- u nd
Sprachmaterialien’). [2] Today, the complete Avestan
corpus is available, together with elaborate search
functions [3] and an extended version of the subcorpus
of the so-called ‘Yasna’, which covers a great deal
of the attestation of variant readings. [4] Right from
the beginning of their computational work concerning
the Avesta, the compilers [5] had to cope with the fact
that the texts contained in it have been transmitted in
a special script written from right to left, which was
also used for printing them in the scholarly editions
used until today. [6] It goes without saying that there
was no way in the middle of the 1980s to encode the
Avestan scriptures exactly as they are found in the
transcriptional devices that were dictated by the
restrictions of character encoding as provided by the
computer systems used. As the problems we had to face
in this respect and the solutions we could apply are
typical for the development of computational work on
ancient languages, it seems worthwhile to sketch them
out here.},
issn           = {2190-6858},
pdf            = {http://www.jlcl.org/2012_Heft2/H2012-2.pdf},
year           = 2012
}
• T. vor der Brück, Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen semantischen Repräsentationen, Heidelberg, Germany: Springer, 2012.
[Abstract] [BibTeX]

Eine große Wissensbasis ist eine Voraussetzung                    für eine Vielzahl von Anwendungen im Bereich der                    automatischen Sprachverarbeitung, wie Frage-Antwort-                    oder Information-Retrieval-Systeme. Ein Mensch hat sich                    das erforderliche Wissen, um Informationen zu suchen                    oder Fragen zu beantworten, im Laufe seines Lebens                    angeeignet. Einem Computer muss dieses Wissen explizit                    mitgeteilt werden. Tim vor der Brück beschreibt einen                    Ansatz, wie ein Computer dieses Wissen ähnlich wie                    ein Mensch durch die Lektüre von Texten erwerben kann.                    Dabei kommen Methoden der Logik und des maschinellen                    Lernens zum Einsatz.
@Book{vor:der:Brueck:2012:a,
Author         = {vor der Brück, Tim},
Title          = {Wissensakquisition mithilfe maschineller Lernverfahren
auf tiefen semantischen Repr{\"a}sentationen},
Publisher      = {Springer},
abstract       = {Eine gro{\ss}e Wissensbasis ist eine Voraussetzung
für eine Vielzahl von Anwendungen im Bereich der
automatischen Sprachverarbeitung, wie Frage-Antwort-
oder Information-Retrieval-Systeme. Ein Mensch hat sich
das erforderliche Wissen, um Informationen zu suchen
oder Fragen zu beantworten, im Laufe seines Lebens
angeeignet. Einem Computer muss dieses Wissen explizit
mitgeteilt werden. Tim vor der Brück beschreibt einen
Ansatz, wie ein Computer dieses Wissen {\"a}hnlich wie
ein Mensch durch die Lektüre von Texten erwerben kann.
Dabei kommen Methoden der Logik und des maschinellen
Lernens zum Einsatz.},
school         = {FernUniversit{\"a}t in Hagen},
year           = 2012
}
• T. vor der Brück and Y. Wang, “Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods,” in Proceedings of the 20th European Conference on Artificial Intelligence (ECAI), Montpellier, France, 2012, pp. 822-827.
[Abstract] [BibTeX]

Synonyms are a highly relevant information source for                    natural language processing. Automatic synonym                    extraction methods have in common that they are either                    applied on the surface representation of the text or on                    a syntactical structure derived from it. In this paper,                    however, we present a semantic synonym extraction                    approach that operates directly on semantic networks                    (SNs), which were derived from text by a deep                    syntactico-semantic analysis. Synonymy hypotheses are                    extracted from the SNs by graph matching. These                    hypotheses are then validated by a support vector                    machine (SVM) employing a combined graph and string                    kernel. Our method was compared to several other                    approaches and the evaluation has shown that our                    results are considerably superior
@InProceedings{vor:der:Brueck:Wang:2012,
Author         = {vor der Brück, Tim and Wang, Yu-Fang},
Title          = {Synonymy Extraction from Semantic Networks Using
String and Graph Kernel Methods},
BookTitle      = {Proceedings of the 20th European Conference on
Artificial Intelligence (ECAI)},
Pages          = {822--827},
abstract       = {Synonyms are a highly relevant information source for
natural language processing. Automatic synonym
extraction methods have in common that they are either
applied on the surface representation of the text or on
a syntactical structure derived from it. In this paper,
however, we present a semantic synonym extraction
approach that operates directly on semantic networks
(SNs), which were derived from text by a deep
syntactico-semantic analysis. Synonymy hypotheses are
extracted from the SNs by graph matching. These
hypotheses are then validated by a support vector
machine (SVM) employing a combined graph and string
kernel. Our method was compared to several other
approaches and the evaluation has shown that our
results are considerably superior},
pdf            = {http://www.vdb1.de/papers/ECAI_535.pdf},
website        = {http://ebooks.iospress.nl/publication/7076},
year           = 2012
}
• T. vor der Brück, “Hyponym Extraction Employing a Weighted Graph Kernel,” in Statistical and Machine Learning Approaches for Network Analysis, M. Dehmer and S. C. Basak, Eds., Hoboken, New Jersey: Wiley, 2012.
[BibTeX]

@InCollection{vor:der:Brueck:2012:b,
Author         = {vor der Brück, Tim},
Title          = {Hyponym Extraction Employing a Weighted Graph Kernel},
BookTitle      = {Statistical and Machine Learning Approaches for
Network Analysis},
Publisher      = {Wiley},
Editor         = {Matthias Dehmer and Subhash C. Basak},
year           = 2012
}
• M. Z. Islam, A. Mehler, and R. Rahman, “Text Readability Classification of Textbooks of a Low-Resource Language,” in Accepted in the 26th Pacific Asia Conference on Language, Information, and Computation (PACLIC 26), 2012.
[Abstract] [BibTeX]

There are many languages considered to be low-density                    languages, either because the population speaking the                    language is not very large, or because insufficient                    digitized text material is available in the language                    even though millions of people speak the language.                    Bangla is one of the latter ones. Readability                    classification is an important Natural Language                    Processing (NLP) application that can be used to judge                    the quality of documents and assist writers to locate                    possible problems. This paper presents a readability                    classifier of Bangla textbook documents based on                    information-theoretic and lexical features. The                    features proposed in this paper result in an F-score                    that is 50% higher than that for traditional                    readability formulas.
@InProceedings{Islam:Mehler:Rahman:2012,
Author         = {Islam, Md. Zahurul and Mehler, Alexander and Rahman,
Rashedur},
Title          = {Text Readability Classification of Textbooks of a
Low-Resource Language},
BookTitle      = {Accepted in the 26th Pacific Asia Conference on
Language, Information, and Computation (PACLIC 26)},
abstract       = {There are many languages considered to be low-density
languages, either because the population speaking the
language is not very large, or because insufficient
digitized text material is available in the language
even though millions of people speak the language.
Bangla is one of the latter ones. Readability
classification is an important Natural Language
Processing (NLP) application that can be used to judge
the quality of documents and assist writers to locate
possible problems. This paper presents a readability
classifier of Bangla textbook documents based on
information-theoretic and lexical features. The
features proposed in this paper result in an F-score
that is 50% higher than that for traditional
owner          = {zahurul},
pdf            = {http://www.aclweb.org/anthology/Y12-1059},
timestamp      = {2012.08.14},
year           = 2012
}
• A. Mehler, L. Romary, and D. Gibbon, “Introduction: Framing Technical Communication,” in Handbook of Technical Communication, A. Mehler, L. Romary, and D. Gibbon, Eds., Berlin and Boston: De Gruyter Mouton, 2012, vol. 8, pp. 1-26.
[BibTeX]

@InCollection{Mehler:Romary:Gibbon:2012,
Author         = {Mehler, Alexander and Romary, Laurent and Gibbon,
Dafydd},
Title          = {Introduction: Framing Technical Communication},
BookTitle      = {Handbook of Technical Communication},
Publisher      = {De Gruyter Mouton},
Editor         = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
Volume         = {8},
Series         = {Handbooks of Applied Linguistics},
Pages          = {1-26},
year           = 2012
}
• A. Mehler and A. Lücking, “Pathways of Alignment between Gesture and Speech: Assessing Information Transmission in Multimodal Ensembles,” in Proceedings of the International Workshop on Formal and Computational Approaches to Multimodal Communication under the auspices of ESSLLI 2012, Opole, Poland, 6-10 August, 2012.
[Abstract] [BibTeX]

We present an empirical account of multimodal                    ensembles based on Hjelmslev’s notion of selection.                    This is done to get measurable evidence for the                    existence of speech-and-gesture ensembles. Utilizing                    information theory, we show that there is an                    information transmission that makes a gestures’                    representation technique predictable when merely                    knowing its lexical affiliate – in line with the                    notion of the primacy of language. Thus, there is                    evidence for a one-way coupling – going from words to                    gestures – that leads to speech-and-gesture alignment                    and underlies the constitution of multimodal ensembles.
@InProceedings{Mehler:Luecking:2012:d,
Author         = {Mehler, Alexander and Lücking, Andy},
Title          = {Pathways of Alignment between Gesture and Speech:
Assessing Information Transmission in Multimodal
Ensembles},
BookTitle      = {Proceedings of the International Workshop on Formal
and Computational Approaches to Multimodal
Communication under the auspices of ESSLLI 2012, Opole,
Poland, 6-10 August},
Editor         = {Gianluca Giorgolo and Katya Alahverdzhieva},
abstract       = {We present an empirical account of multimodal
ensembles based on Hjelmslev’s notion of selection.
This is done to get measurable evidence for the
existence of speech-and-gesture ensembles. Utilizing
information theory, we show that there is an
information transmission that makes a gestures’
representation technique predictable when merely
knowing its lexical affiliate – in line with the
notion of the primacy of language. Thus, there is
evidence for a one-way coupling – going from words to
gestures – that leads to speech-and-gesture alignment
and underlies the constitution of multimodal ensembles.},
keywords       = {wikinect},
website        = {http://www.researchgate.net/publication/268368670_Pathways_of_Alignment_between_Gesture_and_Speech_Assessing_Information_Transmission_in_Multimodal_Ensembles},
year           = 2012
}
• A. Lücking, “Towards a Conceptual, Unification-based Speech-Gesture Interface,” in Proceedings of the International Workshop on Formal and Computational Approaches to Multimodal Communication under the auspices of ESSLLI 2012, Opole, Poland, 6-10 August, 2012.
[Abstract] [BibTeX]

A framework for grounding the semantics of co-verbal                    iconic gestures is presented. A resemblance account to                    iconicity is discarded in favor of an exemplification                    approach. It is sketched how exemplification can be                    captured within a unification-based grammar that                    provides a conceptual interface. Gestures modeled as                    vector sequences are the exemplificational base. Some                    hypotheses that follow from the general account are                    pointed at and remaining challenges are discussed.
@InProceedings{Luecking:2012,
Author         = {Lücking, Andy},
Title          = {Towards a Conceptual, Unification-based Speech-Gesture
Interface},
BookTitle      = {Proceedings of the International Workshop on Formal
and Computational Approaches to Multimodal
Communication under the auspices of ESSLLI 2012, Opole,
Poland, 6-10 August},
Editor         = {Gianluca Giorgolo and Katya Alahverdzhieva},
abstract       = {A framework for grounding the semantics of co-verbal
iconic gestures is presented. A resemblance account to
iconicity is discarded in favor of an exemplification
approach. It is sketched how exemplification can be
captured within a unification-based grammar that
provides a conceptual interface. Gestures modeled as
vector sequences are the exemplificational base. Some
hypotheses that follow from the general account are
pointed at and remaining challenges are discussed.},
year           = 2012
}
• A. Mehler and A. Lücking, “WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis,” in Proceedings of the International Workshop On User Experience in e-Learning and Augmented Technologies in Education (UXeLATE 2012) in Conjunction with ACM Multimedia 2012, 29 October- 2 November, Nara, Japan, 2012, pp. 7-12.
[Abstract] [BibTeX]

We introduce WikiNect as a kinetic museum information                    system that allows museum visitors to give on-site                    feedback about exhibitions. To this end, WikiNect                    integrates three approaches to Human-Computer                    Interaction (HCI): games with a purpose, wiki-based                    collaborative writing and kinetic text-technologies.                    Our aim is to develop kinetic technologies as a new                    paradigm of HCI. They dispense with classical                    interfaces (e.g., keyboards) in that they build on                    non-contact modes of communication like gestures or                    facial expressions as input displays. In this paper, we                    introduce the notion of gestural writing as a kinetic                    text-technology that underlies WikiNect to enable                    museum visitors to communicate their feedback. The                    basic idea is to explore sequences of gestures that                    share the semantic expressivity of verbally manifested                    speech acts. Our task is to identify such gestures that                    are learnable on-site in the usage scenario of                    WikiNect. This is done by referring to so-called                    transient gestures as part of multimodal ensembles,                    which are candidate gestures of the desired                    functionality. 
@InProceedings{Mehler:Luecking:2012:c,
Author         = {Mehler, Alexander and Lücking, Andy},
Title          = {WikiNect: Towards a Gestural Writing System for
Kinetic Museum Wikis},
BookTitle      = {Proceedings of the International Workshop On User
Experience in e-Learning and Augmented Technologies in
Education (UXeLATE 2012) in Conjunction with ACM
Multimedia 2012, 29 October- 2 November, Nara, Japan},
Pages          = {7-12},
abstract       = {We introduce WikiNect as a kinetic museum information
system that allows museum visitors to give on-site
feedback about exhibitions. To this end, WikiNect
integrates three approaches to Human-Computer
Interaction (HCI): games with a purpose, wiki-based
collaborative writing and kinetic text-technologies.
Our aim is to develop kinetic technologies as a new
paradigm of HCI. They dispense with classical
interfaces (e.g., keyboards) in that they build on
non-contact modes of communication like gestures or
facial expressions as input displays. In this paper, we
introduce the notion of gestural writing as a kinetic
text-technology that underlies WikiNect to enable
museum visitors to communicate their feedback. The
basic idea is to explore sequences of gestures that
share the semantic expressivity of verbally manifested
speech acts. Our task is to identify such gestures that
are learnable on-site in the usage scenario of
WikiNect. This is done by referring to so-called
transient gestures as part of multimodal ensembles,
which are candidate gestures of the desired
functionality. },
keywords       = {wikinect},
website        = {http://www.researchgate.net/publication/262319200_WikiNect_towards_a_gestural_writing_system_for_kinetic_museum_wikis},
year           = 2012
}
• R. Gleim, A. Mehler, and A. Ernst, “SOA implementation of the eHumanities Desktop,” in Proceedings of the Workshop on Service-oriented Architectures (SOAs) for the Humanities: Solutions and Impacts, Digital Humanities 2012, Hamburg, Germany, 2012.
[Abstract] [BibTeX]

The eHumanities Desktop is a system which allows users                    to upload, organize and share resources using a web                    interface. Furthermore resources can be processed,                    annotated and analyzed in various ways. Registered                    users can organize themselves in groups and                    collaboratively work on their data. The eHumanities                    Desktop is platform independent and runs in a web                    browser. This paper presents the system focusing on its                    service orientation and process management.
@InProceedings{Gleim:Mehler:Ernst:2012,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Ernst,
Alexandra},
Title          = {SOA implementation of the eHumanities Desktop},
BookTitle      = {Proceedings of the Workshop on Service-oriented
Architectures (SOAs) for the Humanities: Solutions and
Impacts, Digital Humanities 2012, Hamburg, Germany},
abstract       = {The eHumanities Desktop is a system which allows users
to upload, organize and share resources using a web
interface. Furthermore resources can be processed,
annotated and analyzed in various ways. Registered
users can organize themselves in groups and
collaboratively work on their data. The eHumanities
Desktop is platform independent and runs in a web
browser. This paper presents the system focusing on its
service orientation and process management.},
year           = 2012
}
• A. Mehler and C. Stegbauer, “On the Self-similarity of Intertextual Structures in Wikipedia,” in Proceedings of the HotSocial ’12: The First ACM International Workshop on Hot Topics on Interdisciplinary Social Networks Research, Beijing, China, 2012, pp. 65-68.
[BibTeX]

@InProceedings{Mehler:Stegbauer:2012,
Author         = {Mehler, Alexander and Stegbauer, Christian},
Title          = {On the Self-similarity of Intertextual Structures in
Wikipedia},
BookTitle      = {Proceedings of the HotSocial '12: The First ACM
International Workshop on Hot Topics on
Interdisciplinary Social Networks Research},
Editor         = {Xiaoming Fu and Peter Gloor and Jie Tang},
Pages          = {65-68},
pdf            = {http://wan.poly.edu/KDD2012/forms/workshop/HotSocial12/doc/p64_mehler.pdf},
website        = {http://dl.acm.org/citation.cfm?id=2392633&bnc=1},
year           = 2012
}
• A. Mehler, S. Schwandt, R. Gleim, and A. Ernst, “Inducing Linguistic Networks from Historical Corpora: Towards a New Method in Historical Semantics,” in Proceedings of the Conference on New Methods in Historical Corpora, P. Bennett, M. Durrell, S. Scheible, and R. J. Whitt, Eds., Tübingen: Narr, 2012, vol. 3, pp. 257-274.
[BibTeX]

@InCollection{Mehler:Schwandt:Gleim:Ernst:2012,
Author         = {Mehler, Alexander and Schwandt, Silke and Gleim,
Rüdiger and Ernst, Alexandra},
Title          = {Inducing Linguistic Networks from Historical Corpora:
Towards a New Method in Historical Semantics},
BookTitle      = {Proceedings of the Conference on New Methods in
Historical Corpora},
Publisher      = {Narr},
Editor         = {Paul Bennett and Martin Durrell and Silke Scheible and
Richard J. Whitt},
Volume         = {3},
Series         = {Corpus linguistics and Interdisciplinary perspectives
on language (CLIP)},
Pages          = {257--274},
year           = 2012
}
• A. Lücking, S. Ptock, and K. Bergmann, “Assessing Agreement on Segmentations by Means of Staccato, the Segmentation Agreement Calculator according to Thomann,” in Gesture and Sign Language in Human-Computer Interaction and Embodied Communication, E. Efthimiou, G. Kouroupetroglou, and S. Fotina, Eds., Berlin and Heidelberg: Springer, 2012, vol. 7206, pp. 129-138.
[Abstract] [BibTeX]

Staccato, the Segmentation Agreement Calculator                    According to Thomann , is a software tool for assessing                    the degree of agreement of multiple segmentations of                    some time-related data (e.g., gesture phases or sign                    language constituents). The software implements an                    assessment procedure developed by Bruno Thomann and                    will be made publicly available. The article discusses                    the rationale of the agreement assessment procedure and                    points at future extensions of Staccato.
@InCollection{Luecking:Ptock:Bergmann:2012,
Author         = {Lücking, Andy and Ptock, Sebastian and Bergmann,
Kirsten},
Title          = {Assessing Agreement on Segmentations by Means of
Staccato, the Segmentation Agreement Calculator
according to Thomann},
BookTitle      = {Gesture and Sign Language in Human-Computer
Interaction and Embodied Communication},
Publisher      = {Springer},
Editor         = {Eleni Efthimiou and Georgios Kouroupetroglou and
Stavroula-Evita Fotina},
Volume         = {7206},
Series         = {Lecture Notes in Artificial Intelligence},
Pages          = {129-138},
abstract       = {Staccato, the Segmentation Agreement Calculator
According to Thomann , is a software tool for assessing
the degree of agreement of multiple segmentations of
some time-related data (e.g., gesture phases or sign
language constituents). The software implements an
assessment procedure developed by Bruno Thomann and
will be made publicly available. The article discusses
the rationale of the agreement assessment procedure and
points at future extensions of Staccato.},
booksubtitle   = {9th International Gesture Workshop, GW 2011, Athens,
Greece, May 2011, Revised Selected Papers},
year           = 2012
}
• A. Mehler, A. Lücking, and P. Menke, “Assessing Cognitive Alignment in Different Types of Dialog by means of a Network Model,” Neural Networks, vol. 32, pp. 159-164, 2012.
[Abstract] [BibTeX]

We present a network model of dialog lexica, called                    TiTAN (Two-layer Time-Aligned Network) series. TiTAN                    series capture the formation and structure of dialog                    lexica in terms of serialized graph representations.                    The dynamic update of TiTAN series is driven by the                    dialog-inherent timing of turn-taking. The model                    provides a link between neural, connectionist                    underpinnings of dialog lexica on the one hand and                    observable symbolic behavior on the other. On the                    neural side, priming and spreading activation are                    modeled in terms of TiTAN networking. On the symbolic                    side, TiTAN series account for cognitive alignment in                    terms of the structural coupling of the linguistic                    representations of dialog partners. This structural                    stance allows us to apply TiTAN in machine learning of                    data of dialogical alignment. In previous studies, it                    has been shown that aligned dialogs can be                    distinguished from non-aligned ones by means of TiTAN                    -based modeling. Now, we simultaneously apply this                    model to two types of dialog: task-oriented,                    experimentally controlled dialogs on the one hand and                    more spontaneous, direction giving dialogs on the                    other. We ask whether it is possible to separate                    aligned dialogs from non-aligned ones in a                    type-crossing way. Starting from a recent experiment                    (Mehler, Lücking, & Menke, 2011a), we show that such                    a type-crossing classification is indeed possible. This                    hints at a structural fingerprint left by alignment in                    networks of linguistic items that are routinely                    co-activated during conversation.
@Article{Mehler:Luecking:Menke:2012,
Author         = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
Title          = {Assessing Cognitive Alignment in Different Types of
Dialog by means of a Network Model},
Journal        = {Neural Networks},
Volume         = {32},
Pages          = {159-164},
abstract       = {We present a network model of dialog lexica, called
TiTAN (Two-layer Time-Aligned Network) series. TiTAN
series capture the formation and structure of dialog
lexica in terms of serialized graph representations.
The dynamic update of TiTAN series is driven by the
dialog-inherent timing of turn-taking. The model
provides a link between neural, connectionist
underpinnings of dialog lexica on the one hand and
observable symbolic behavior on the other. On the
neural side, priming and spreading activation are
modeled in terms of TiTAN networking. On the symbolic
side, TiTAN series account for cognitive alignment in
terms of the structural coupling of the linguistic
representations of dialog partners. This structural
stance allows us to apply TiTAN in machine learning of
data of dialogical alignment. In previous studies, it
has been shown that aligned dialogs can be
distinguished from non-aligned ones by means of TiTAN
-based modeling. Now, we simultaneously apply this
model to two types of dialog: task-oriented,
experimentally controlled dialogs on the one hand and
more spontaneous, direction giving dialogs on the
other. We ask whether it is possible to separate
aligned dialogs from non-aligned ones in a
type-crossing way. Starting from a recent experiment
(Mehler, Lücking, \& Menke, 2011a), we show that such
a type-crossing classification is indeed possible. This
hints at a structural fingerprint left by alignment in
networks of linguistic items that are routinely
co-activated during conversation.},
doi            = {10.1016/j.neunet.2012.02.013},
website        = {http://www.sciencedirect.com/science/article/pii/S0893608012000421},
year           = 2012
}
• M. Z. Islam and A. Mehler, “Customization of the Europarl Corpus for Translation Studies,” in Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC), 2012.
[Abstract] [BibTeX]

Currently, the area of translation studies lacks                    corpora by which translation scholars can validate                    their theoretical claims, for example, regarding the                    scope of the characteristics of the translation                    relation. In this paper, we describe a customized                    resource in the area of translation studies that mainly                    addresses research on the properties of the translation                    relation. Our experimental results show that the                    Type-Token-Ratio (TTR) is not a universally valid                    indicator of the simplification of translation.
@InProceedings{Islam:Mehler:2012:a,
Author         = {Islam, Md. Zahurul and Mehler, Alexander},
Title          = {Customization of the Europarl Corpus for Translation
Studies},
BookTitle      = {Proceedings of the 8th International Conference on
Language Resources and Evaluation (LREC)},
abstract       = {Currently, the area of translation studies lacks
corpora by which translation scholars can validate
their theoretical claims, for example, regarding the
scope of the characteristics of the translation
relation. In this paper, we describe a customized
resource in the area of translation studies that mainly
addresses research on the properties of the translation
relation. Our experimental results show that the
Type-Token-Ratio (TTR) is not a universally valid
indicator of the simplification of translation.},
owner          = {zahurul},
pdf            = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/729_Paper.pdf},
timestamp      = {2012.02.02},
year           = 2012
}
• A. Lücking and T. Pfeiffer, “Framing Multimodal Technical Communication. With Focal Points in Speech-Gesture-Integration and Gaze Recognition,” in Handbook of Technical Communication, A. Mehler, L. Romary, and D. Gibbon, Eds., De Gruyter Mouton, 2012, vol. 8, pp. 591-644.
[BibTeX]

@InCollection{Luecking:Pfeiffer:2012,
Author         = {Lücking, Andy and Pfeiffer, Thies},
Title          = {Framing Multimodal Technical Communication. With Focal
Points in Speech-Gesture-Integration and Gaze
Recognition},
BookTitle      = {Handbook of Technical Communication},
Publisher      = {De Gruyter Mouton},
Editor         = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
Volume         = {8},
Series         = {Handbooks of Applied Linguistics},
Chapter        = {18},
Pages          = {591-644},
website        = {http://www.degruyter.com/view/books/9783110224948/9783110224948.591/9783110224948.591.xml},
year           = 2012
}
• P. Kubina, O. Abramov, and A. Lücking, “Barrier-free Communication,” in Handbook of Technical Communication, A. Mehler and L. Romary, Eds., Berlin and Boston: De Gruyter Mouton, 2012, vol. 8, pp. 645-706.
[BibTeX]

@InCollection{Kubina:Abramov:Luecking:2012,
Author         = {Kubina, Petra and Abramov, Olga and Lücking, Andy},
Title          = {Barrier-free Communication},
BookTitle      = {Handbook of Technical Communication},
Publisher      = {De Gruyter Mouton},
Editor         = {Alexander Mehler and Laurent Romary},
Volume         = {8},
Series         = {Handbooks of Applied Linguistics},
Chapter        = {19},
Pages          = {645-706},
editora        = {Dafydd Gibbon},
editoratype    = {collaborator},
website        = {http://www.degruyter.com/view/books/9783110224948/9783110224948.645/9783110224948.645.xml},
year           = 2012
}
• A. Lücking and A. Mehler, “What’s the Scope of the Naming Game? Constraints on Semantic Categorization,” in Proceedings of the 9th International Conference on the Evolution of Language, Kyoto, Japan, 2012, pp. 196-203.
[Abstract] [BibTeX]

The Naming Game (NG) has become a vivid research                    paradigm for simulation studies on language evolution                    and the establishment of naming conventions. Recently,                    NGs were used for reconstructing the creation of                    linguistic categories, most notably for color terms. We                    recap the functional principle of NGs and the latter                    Categorization Games (CGs) and evaluate them in the                    light of semantic data of linguistic categorization                    outside the domain of colors. This comparison reveals                    two specifics of the CG paradigm: Firstly, the emerging                    categories draw basically on the predefined topology of                    the learning domain. Secondly, the kind of categories                    that can be learnt in CGs is bound to                    context-independent intersective categories. This                    suggests that the NG and the CG focus on a special                    aspect of natural language categorization, which                    disregards context-sensitive categories used in a                    non-compositional manner.
@InProceedings{Luecking:Mehler:2012,
Author         = {Lücking, Andy and Mehler, Alexander},
Title          = {What's the Scope of the Naming Game? Constraints on
Semantic Categorization},
BookTitle      = {Proceedings of the 9th International Conference on the
Evolution of Language},
Pages          = {196-203},
abstract       = {The Naming Game (NG) has become a vivid research
paradigm for simulation studies on language evolution
and the establishment of naming conventions. Recently,
NGs were used for reconstructing the creation of
linguistic categories, most notably for color terms. We
recap the functional principle of NGs and the latter
Categorization Games (CGs) and evaluate them in the
light of semantic data of linguistic categorization
outside the domain of colors. This comparison reveals
two specifics of the CG paradigm: Firstly, the emerging
categories draw basically on the predefined topology of
the learning domain. Secondly, the kind of categories
that can be learnt in CGs is bound to
context-independent intersective categories. This
suggests that the NG and the CG focus on a special
aspect of natural language categorization, which
disregards context-sensitive categories used in a
non-compositional manner.},
url            = {http://kyoto.evolang.org/},
website        = {https://www.researchgate.net/publication/267858061_WHAT'S_THE_SCOPE_OF_THE_NAMING_GAME_CONSTRAINTS_ON_SEMANTIC_CATEGORIZATION},
year           = 2012
}
• M. Sukhareva, M. Z. Islam, A. Hoenen, and A. Mehler, “A Three-step Model of Language Detection in Multilingual Ancient Texts,” in Proceedings of Workshop on Annotation of Corpora for Research in the Humanities, Heidelberg, Germany, 2012.
[Abstract] [BibTeX]

Ancient corpora contain various multilingual patterns.                    This imposes numerous problems on their manual                    annotation and automatic processing. We introduce a                    lexicon building system, called Lexicon Expander, that                    has an integrated language detection module, Language                    Detection (LD) Toolkit. The Lexicon Expander                    post-processes the output of the LD Toolkit which leads                    to the improvement of f-score and accuracy values.                    Furthermore, the functionality of the Lexicon Expander                    also includes manual editing of lexical entries and                    automatic morphological expansion by means of a                    morphological grammar.
@InProceedings{Sukhareva:Islam:Hoenen:Mehler:2012,
Author         = {Sukhareva, Maria and Islam, Md. Zahurul and Hoenen,
Armin and Mehler, Alexander},
Title          = {A Three-step Model of Language Detection in
Multilingual Ancient Texts},
BookTitle      = {Proceedings of Workshop on Annotation of Corpora for
Research in the Humanities},
abstract       = {Ancient corpora contain various multilingual patterns.
This imposes numerous problems on their manual
annotation and automatic processing. We introduce a
lexicon building system, called Lexicon Expander, that
has an integrated language detection module, Language
Detection (LD) Toolkit. The Lexicon Expander
post-processes the output of the LD Toolkit which leads
to the improvement of f-score and accuracy values.
Furthermore, the functionality of the Lexicon Expander
also includes manual editing of lexical entries and
automatic morphological expansion by means of a
morphological grammar.},
year           = 2012
}

### 2011 (25)

• A. Lücking and A. Mehler, “A Model of Complexity Levels of Meaning Constitution in Simulation Models of Language Evolution,” International Journal of Signs and Semiotic Systems, vol. 1, iss. 1, pp. 18-38, 2011.
[Abstract] [BibTeX]

Currently, some simulative accounts exist within                    dynamic or evolutionary frameworks that are concerned                    with the development of linguistic categories within a                    population of language users. Although these studies                    mostly emphasize that their models are abstract, the                    paradigm categorization domain is preferably that of                    colors. In this paper, the authors argue that color                    adjectives are special predicates in both linguistic                    and metaphysical terms: semantically, they are                    intersective predicates, metaphysically, color                    properties can be empirically reduced onto purely                    physical properties. The restriction of categorization                    simulations to the color paradigm systematically leads                    to ignoring two ubiquitous features of natural language                    predicates, namely relativity and context-dependency.                    Therefore, the models for simulation models of                    linguistic categories are not able to capture the                    formation of categories like perspective-dependent                    predicates ‘left’ and ‘right’, subsective                    predicates like ‘small’ and ‘big’, or                    predicates that make reference to abstract objects like                    ‘I prefer this kind of situation’. The authors                    develop a three-dimensional grid of ascending                    complexity that is partitioned according to the                    semiotic triangle. They also develop a conceptual model                    in the form of a decision grid by means of which the                    complexity level of simulation models of linguistic                    categorization can be assessed in linguistic terms.
@Article{Luecking:Mehler:2011,
Author         = {Lücking, Andy and Mehler, Alexander},
Title          = {A Model of Complexity Levels of Meaning Constitution
in Simulation Models of Language Evolution},
Journal        = {International Journal of Signs and Semiotic Systems},
Volume         = {1},
Number         = {1},
Pages          = {18-38},
abstract       = {Currently, some simulative accounts exist within
dynamic or evolutionary frameworks that are concerned
with the development of linguistic categories within a
population of language users. Although these studies
mostly emphasize that their models are abstract, the
paradigm categorization domain is preferably that of
colors. In this paper, the authors argue that color
adjectives are special predicates in both linguistic
and metaphysical terms: semantically, they are
intersective predicates, metaphysically, color
properties can be empirically reduced onto purely
physical properties. The restriction of categorization
to ignoring two ubiquitous features of natural language
predicates, namely relativity and context-dependency.
Therefore, the models for simulation models of
linguistic categories are not able to capture the
formation of categories like perspective-dependent
predicates ‘left’ and ‘right’, subsective
predicates like ‘small’ and ‘big’, or
predicates that make reference to abstract objects like
‘I prefer this kind of situation’. The authors
develop a three-dimensional grid of ascending
complexity that is partitioned according to the
semiotic triangle. They also develop a conceptual model
in the form of a decision grid by means of which the
complexity level of simulation models of linguistic
categorization can be assessed in linguistic terms.},
year           = 2011
}
• A. Mehler, O. Abramov, and N. Diewald, “Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf Hypothesis in the Context of Wikipedia,” Computer Speech and Language, vol. 25, iss. 3, pp. 716-740, 2011.
[Abstract] [BibTeX]

In this article, we test a variant of the Sapir-Whorf                    Hypothesis in the area of complex network theory. This                    is done by analyzing social ontologies as a new                    resource for automatic language classification. Our                    method is to solely explore structural features of                    social ontologies in order to predict family                    resemblances of languages used by the corresponding                    communities to build these ontologies. This approach is                    based on a reformulation of the Sapir-Whorf Hypothesis                    in terms of distributed cognition. Starting from a                    corpus of 160 Wikipedia-based social ontologies, we                    test our variant of the Sapir-Whorf Hypothesis by                    several experiments, and find out that we outperform                    the corresponding baselines. All in all, the article                    develops an approach to classify linguistic networks of                    tens of thousands of vertices by exploring a small                    range of mathematically well-established topological                    indices.
@Article{Mehler:Abramov:Diewald:2011:a,
Author         = {Mehler, Alexander and Abramov, Olga and Diewald, Nils},
Title          = {Geography of Social Ontologies: Testing a Variant of
the Sapir-Whorf Hypothesis in the Context of Wikipedia},
Journal        = {Computer Speech and Language},
Volume         = {25},
Number         = {3},
Pages          = {716-740},
abstract       = {In this article, we test a variant of the Sapir-Whorf
Hypothesis in the area of complex network theory. This
is done by analyzing social ontologies as a new
resource for automatic language classification. Our
method is to solely explore structural features of
social ontologies in order to predict family
resemblances of languages used by the corresponding
communities to build these ontologies. This approach is
based on a reformulation of the Sapir-Whorf Hypothesis
in terms of distributed cognition. Starting from a
corpus of 160 Wikipedia-based social ontologies, we
test our variant of the Sapir-Whorf Hypothesis by
several experiments, and find out that we outperform
the corresponding baselines. All in all, the article
develops an approach to classify linguistic networks of
tens of thousands of vertices by exploring a small
range of mathematically well-established topological
indices.},
doi            = {10.1016/j.csl.2010.05.006},
website        = {http://www.sciencedirect.com/science/article/pii/S0885230810000434},
year           = 2011
}
• A. Mehler, “Social Ontologies as Generalized Nearly Acyclic Directed Graphs: A Quantitative Graph Model of Social Ontologies by Example of Wikipedia,” in Towards an Information Theory of Complex Networks: Statistical Methods and Applications, M. Dehmer, F. Emmert-Streib, and A. Mehler, Eds., Boston/Basel: Birkhäuser, 2011, pp. 259-319.
[BibTeX]

@InCollection{Mehler:2011:c,
Author         = {Mehler, Alexander},
Title          = {Social Ontologies as Generalized Nearly Acyclic
Directed Graphs: A Quantitative Graph Model of Social
Ontologies by Example of Wikipedia},
BookTitle      = {Towards an Information Theory of Complex Networks:
Statistical Methods and Applications},
Publisher      = {Birkh{\"a}user},
Editor         = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler,
Alexander},
Pages          = {259-319},
year           = 2011
}
• A. Lücking, S. Ptock, and K. Bergmann, “Staccato: Segmentation Agreement Calculator,” in Gesture in Embodied Communication and Human-Computer Interaction. Proceedings of the 9th International Gesture Workshop, Athens, Greece, 2011, pp. 50-53.
[BibTeX]

@InProceedings{Luecking:Ptock:Bergmann:2011,
Author         = {Lücking, Andy and Ptock, Sebastian and Bergmann,
Kirsten},
Title          = {Staccato: Segmentation Agreement Calculator},
BookTitle      = {Gesture in Embodied Communication and Human-Computer
Interaction. Proceedings of the 9th International
Gesture Workshop},
Editor         = {Eleni Efthimiou and Georgios Kouroupetroglou},
Series         = {GW 2011},
Pages          = {50--53},
Publisher      = {National and Kapodistrian University of Athens},
month          = {5},
year           = 2011
}
• A. Mehler and A. Lücking, “A Graph Model of Alignment in Multilog,” in Proceedings of IEEE Africon 2011, Zambia, 2011.
[BibTeX]

@InProceedings{Mehler:Luecking:2011,
Author         = {Mehler, Alexander and Lücking, Andy},
Title          = {A Graph Model of Alignment in Multilog},
BookTitle      = {Proceedings of IEEE Africon 2011},
Series         = {IEEE Africon},
Organization   = {IEEE},
month          = {9},
website        = {https://www.researchgate.net/publication/267941012_A_Graph_Model_of_Alignment_in_Multilog},
year           = 2011
}
• C. Stegbauer and A. Mehler, “Positionssensitive Dekomposition von Potenzgesetzen am Beispiel von Wikipedia-basierten Kollaborationsnetzwerken,” in Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK 2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin, 2011.
[BibTeX]

@InProceedings{Stegbauer:Mehler:2011,
Author         = {Stegbauer, Christian and Mehler, Alexander},
Title          = {Positionssensitive Dekomposition von Potenzgesetzen am
Beispiel von Wikipedia-basierten
Kollaborationsnetzwerken},
BookTitle      = {Proceedings of the 4th Workshop Digital Social
Networks at INFORMATIK 2011: Informatik schafft
Communities, Oct 4-7, 2011, Berlin},
pdf            = {http://www.user.tu-berlin.de/komm/CD/paper/090423.pdf},
specialnote    = {Best Paper Award},
specialnotewebsite = {http://www.digitale-soziale-netze.de/gi-workshop/index.php?site=review2011},
year           = 2011
}
• M. Lösch, U. Waltinger, W. Horstmann, and A. Mehler, “Building a DDC-annotated Corpus from OAI Metadata,” Journal of Digital Information, vol. 12, iss. 2, 2011.
[Abstract] [BibTeX]

Checking for readability or simplicity of texts is                    important for many institutional and individual users.                    Formulas for approximately measuring text readability                    have a long tradition. Usually, they exploit                    surface-oriented indicators like sentence length, word                    length, word frequency, etc. However, in many cases,                    this information is not adequate to realistically                    approximate the cognitive difficulties a person can                    have to understand a text. Therefore we use deep                    syntactic and semantic indicators in addition. The                    syntactic information is represented by a dependency                    tree, the semantic information by a semantic network.                    Both representations are automatically generated by a                    deep syntactico-semantic analysis. A global readability                    score is determined by applying a nearest neighbor                    algorithm on 3,000 ratings of 300 test persons. The                    evaluation showed that the deep syntactic and semantic                    indicators lead to promising results comparable to the                    best surface-based indicators. The combination of deep                    and shallow indicators leads to an improvement over                    shallow indicators alone. Finally, a graphical user                    interface was developed which highlights difficult                    passages, depending on the individual indicator values,                    and displays a global readability score.
@Article{Loesch:Waltinger:Horstmann:Mehler:2011,
Author         = {Lösch, Mathias and Waltinger, Ulli and Horstmann,
Wolfram and Mehler, Alexander},
Title          = {Building a DDC-annotated Corpus from OAI Metadata},
Journal        = {Journal of Digital Information},
Volume         = {12},
Number         = {2},
abstract       = {Checking for readability or simplicity of texts is
important for many institutional and individual users.
Formulas for approximately measuring text readability
have a long tradition. Usually, they exploit
surface-oriented indicators like sentence length, word
length, word frequency, etc. However, in many cases,
this information is not adequate to realistically
approximate the cognitive difficulties a person can
have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The
syntactic information is represented by a dependency
tree, the semantic information by a semantic network.
Both representations are automatically generated by a
deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor
algorithm on 3,000 ratings of 300 test persons. The
evaluation showed that the deep syntactic and semantic
indicators lead to promising results comparable to the
best surface-based indicators. The combination of deep
and shallow indicators leads to an improvement over
shallow indicators alone. Finally, a graphical user
interface was developed which highlights difficult
passages, depending on the individual indicator values,
and displays a global readability score.},
bibsource      = {DBLP, http://dblp.uni-trier.de},
website        = {http://journals.tdl.org/jodi/article/view/1765},
year           = 2011
}
• M. Lux, J. Laußmann, A. Mehler, and C. Menßen, “An Online Platform for Visualizing Time Series in Linguistic Networks,” in Proceedings of the Demonstrations Session of the 2011 IEEE / WIC / ACM International Conferences on Web Intelligence and Intelligent Agent Technology, 22 – 27 August 2011, Lyon, France, 2011.
[Poster][BibTeX]

@InProceedings{Lux:Laussmann:Mehler:Menssen:2011,
Author         = {Lux, Markus and Lau{\ss}mann, Jan and Mehler,
Alexander and Men{\ss}en, Christian},
Title          = {An Online Platform for Visualizing Time Series in
Linguistic Networks},
BookTitle      = {Proceedings of the Demonstrations Session of the 2011
IEEE / WIC / ACM International Conferences on Web
Intelligence and Intelligent Agent Technology, 22 - 27
August 2011, Lyon, France},
website        = {http://dl.acm.org/citation.cfm?id=2052396},
year           = 2011
}
• A. Mehler, N. Diewald, U. Waltinger, R. Gleim, D. Esch, B. Job, T. Küchelmann, O. Abramov, and P. Blanchard, “Evolution of Romance Language in Written Communication: Network Analysis of Late Latin and Early Romance Corpora,” Leonardo, vol. 44, iss. 3, 2011.
[BibTeX]

@Article{Mehler:Diewald:Waltinger:et:al:2010,
Author         = {Mehler, Alexander and Diewald, Nils and Waltinger,
Ulli and Gleim, Rüdiger and Esch, Dietmar and Job,
Barbara and Küchelmann, Thomas and Abramov, Olga and
Blanchard, Philippe},
Title          = {Evolution of Romance Language in Written
Communication: Network Analysis of Late Latin and Early
Romance Corpora},
Journal        = {Leonardo},
Volume         = {44},
Number         = {3},
publisher      = {MIT Press},
year           = 2011
}
• A. Mehler, A. Lücking, and P. Menke, “From Neural Activation to Symbolic Alignment: A Network-Based Approach to the Formation of Dialogue Lexica,” in Proceedings of the International Joint Conference on Neural Networks (IJCNN 2011), San Jose, California, July 31 — August 5, 2011.
[BibTeX]

@InProceedings{Mehler:Luecking:Menke:2011,
Author         = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
Title          = {From Neural Activation to Symbolic Alignment: A
Network-Based Approach to the Formation of Dialogue
Lexica},
BookTitle      = {Proceedings of the International Joint Conference on
Neural Networks (IJCNN 2011), San Jose, California,
July 31 -- August 5},
website        = {{http://dx.doi.org/10.1109/IJCNN.2011.6033266}},
year           = 2011
}
• A. Lücking, O. Abramov, A. Mehler, and P. Menke, “The Bielefeld Jigsaw Map Game (JMG) Corpus,” in Abstracts of the Corpus Linguistics Conference 2011, Birmingham, 2011.
[BibTeX]

@InProceedings{Luecking:Abramov:Mehler:Menke:2011,
Author         = {Lücking, Andy and Abramov, Olga and Mehler, Alexander
and Menke, Peter},
Title          = {The Bielefeld Jigsaw Map Game (JMG) Corpus},
BookTitle      = {Abstracts of the Corpus Linguistics Conference 2011},
Series         = {CL2011},
pdf            = {http://www.birmingham.ac.uk/documents/college-artslaw/corpus/conference-archives/2011/Paper-137.pdf},
website        = {http://www.birmingham.ac.uk/research/activity/corpus/publications/conference-archives/2011-birmingham.aspx},
year           = 2011
}
• R. Gleim, A. Hoenen, N. Diewald, A. Mehler, and A. Ernst, “Modeling, Building and Maintaining Lexica for Corpus Linguistic Studies by Example of Late Latin,” in Corpus Linguistics 2011, 20-22 July, Birmingham, 2011.
[BibTeX]

@InProceedings{Gleim:Hoenen:Diewald:Mehler:Ernst:2011,
Author         = {Gleim, Rüdiger and Hoenen, Armin and Diewald, Nils
and Mehler, Alexander and Ernst, Alexandra},
Title          = {Modeling, Building and Maintaining Lexica for Corpus
Linguistic Studies by Example of Late Latin},
BookTitle      = {Corpus Linguistics 2011, 20-22 July, Birmingham},
year           = 2011
}
• P. Menke and A. Mehler, “From experiments to corpora: The Ariadne Corpus Management System,” in Corpus Linguistics 2011, 20-22 July, Birmingham, 2011.
[BibTeX]

@InProceedings{Menke:Mehler:2011,
Author         = {Menke, Peter and Mehler, Alexander},
Title          = {From experiments to corpora: The Ariadne Corpus
Management System},
BookTitle      = {Corpus Linguistics 2011, 20-22 July, Birmingham},
year           = 2011
}
• Towards an Information Theory of Complex Networks: Statistical Methods and Applications, M. Dehmer, F. Emmert-Streib, and A. Mehler, Eds., Boston/Basel: Birkhäuser, 2011.
[BibTeX]

@Book{Dehmer:EmmertStreib:Mehler:2009:a,
Editor         = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler,
Alexander},
Title          = {Towards an Information Theory of Complex Networks:
Statistical Methods and Applications},
Publisher      = {Birkh{\"a}user},
pagetotal      = {395},
year           = 2011
}
• A. Mehler, A. Lücking, and P. Menke, “Assessing Lexical Alignment in Spontaneous Direction Dialogue Data by Means of a Lexicon Network Model,” in Proceedings of 12th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing), February 20–26, Tokyo, Berlin/New York, 2011, pp. 368-379.
[BibTeX]

@InProceedings{Mehler:Luecking:Menke:2011:a,
Author         = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
Title          = {Assessing Lexical Alignment in Spontaneous Direction
Dialogue Data by Means of a Lexicon Network Model},
BookTitle      = {Proceedings of 12th International Conference on
Intelligent Text Processing and Computational
Linguistics (CICLing), February 20--26, Tokyo},
Series         = {CICLing'11},
Pages          = {368-379},
Publisher      = {Springer},
year           = 2011
}
• P. Geibel, A. Mehler, and K. Kühnberger, “Learning Methods for Graph Models of Document Structure,” in Modeling, Learning and Processing of Text Technological Data Structures, A. Mehler, K. Kühnberger, H. Lobin, H. Lüngen, A. Storrer, and A. Witt, Eds., Berlin/New York: Springer, 2011.
[BibTeX]

@InCollection{Geibel:Mehler:Kuehnberger:2011:a,
Author         = {Geibel, Peter and Mehler, Alexander and Kühnberger,
Kai-Uwe},
Title          = {Learning Methods for Graph Models of Document
Structure},
BookTitle      = {Modeling, Learning and Processing of Text
Technological Data Structures},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin,
Henning and Lüngen, Harald and Storrer, Angelika and
Witt, Andreas},
Series         = {Studies in Computational Intelligence},
year           = 2011
}
• A. Mehler and U. Waltinger, “Integrating Content and Structure Learning: A Model of Hypertext Zoning and Sounding,” in Modeling, Learning and Processing of Text Technological Data Structures, A. Mehler, K. Kühnberger, H. Lobin, H. Lüngen, A. Storrer, and A. Witt, Eds., Berlin/New York: Springer, 2011.
[BibTeX]

@InCollection{Mehler:Waltinger:2011:a,
Author         = {Mehler, Alexander and Waltinger, Ulli},
Title          = {Integrating Content and Structure Learning: A Model of
Hypertext Zoning and Sounding},
BookTitle      = {Modeling, Learning and Processing of Text
Technological Data Structures},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin,
Henning and Lüngen, Harald and Storrer, Angelika and
Witt, Andreas},
Series         = {Studies in Computational Intelligence},
website        = {http://rd.springer.com/chapter/10.1007/978-3-642-22613-7_15},
year           = 2011
}
• O. Abramov and A. Mehler, “Automatic Language Classification by Means of Syntactic Dependency Networks,” Journal of Quantitative Linguistics, vol. 18, iss. 4, pp. 291-336, 2011.
[Abstract] [BibTeX]

This article presents an approach to automatic                    language classification by means of linguistic                    networks. Networks of 11 languages were constructed                    from dependency treebanks, and the topology of these                    networks serves as input to the classification                    algorithm. The results match the genealogical                    similarities of these languages. In addition, we test                    two alternative approaches to automatic language                    classification – one based on n-grams and the other                    on quantitative typological indices. All three methods                    show good results in identifying genealogical groups.                    Beyond genetic similarities, network features (and                    feature combinations) offer a new source of typological                    information about languages. This information can                    contribute to a better understanding of the interplay                    of single linguistic phenomena observed in language.
@Article{Abramov:Mehler:2011:a,
Author         = {Abramov, Olga and Mehler, Alexander},
Title          = {Automatic Language Classification by Means of
Syntactic Dependency Networks},
Journal        = {Journal of Quantitative Linguistics},
Volume         = {18},
Number         = {4},
Pages          = {291-336},
language classification by means of linguistic
networks. Networks of 11 languages were constructed
from dependency treebanks, and the topology of these
networks serves as input to the classification
algorithm. The results match the genealogical
similarities of these languages. In addition, we test
two alternative approaches to automatic language
classification – one based on n-grams and the other
on quantitative typological indices. All three methods
show good results in identifying genealogical groups.
Beyond genetic similarities, network features (and
feature combinations) offer a new source of typological
information about languages. This information can
contribute to a better understanding of the interplay
of single linguistic phenomena observed in language.},
website        = {http://www.researchgate.net/publication/220469321_Automatic_Language_Classification_by_means_of_Syntactic_Dependency_Networks},
year           = 2011
}
• A. Mehler, K. Kühnberger, H. Lobin, H. Lüngen, A. Storrer, and A. Witt, Modeling, Learning and Processing of Text Technological Data Structures, A. Mehler, K. Kühnberger, H. Lobin, H. Lüngen, A. Storrer, and A. Witt, Eds., Berlin/New York: Springer, 2011.
[BibTeX]

@Book{Mehler:Kuehnberger:Lobin:Luengen:Storrer:Witt:2011,
Author         = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin,
Henning and Lüngen, Harald and Storrer, Angelika and
Witt, Andreas},
Editor         = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin,
Henning and Lüngen, Harald and Storrer, Angelika and
Witt, Andreas},
Title          = {Modeling, Learning and Processing of Text
Technological Data Structures},
Publisher      = {Springer},
Series         = {Studies in Computational Intelligence},
pagetotal      = {400},
website        = {/books/texttechnologybook/},
year           = 2011
}
• U. Waltinger, On Social Semantics in Information Retrieval, Saarbrücken: Südwestdeutscher Verlag für Hochschulschriften, 2011. Zugl. Diss Univ. Bielefeld (2010)
[Abstract] [BibTeX]

In this thesis we analyze the performance of social                    semantics in textual information retrieval. By means of                    collaboratively constructed knowledge derived from                    web-based social networks, inducing both common-sense                    and domain-specific knowledge as constructed by a                    multitude of users, we will establish an improvement in                    performance of selected tasks within different areas of                    information retrieval. This work connects the concepts                    and the methods of social networks and the semantic web                    to support the analysis of a social semantic web that                    combines human intelligence with machine learning and                    natural language processing. In this context, social                    networks, as instances of the social web, are capable                    in delivering social network data and document                    collections on a tremendous scale, inducing thematic                    dynamics that cannot be achieved by traditional expert                    resources. The question of an automatic conversion,                    annotation and processing, however, is central to the                    debate of the benefits of the social semantic web.                    Which kind of technologies and methods are available,                    adequate and contribute to the processing of this                    rapidly rising flood of information and at the same                    time being capable of using the wealth of information                    in this large, but more importantly decentralized                    internet. The present work researches the performance                    of social semantic-induced categorization by means of                    different document models. We will shed light on the                    question, to which level social networks and social                    ontologies contribute to selected areas within the                    information retrieval area, such as automatically                    determining term and text associations, identifying                    topics, text and web genre categorization, and also the                    domain of sentiment analysis. We will show in extensive                    evaluations, comparing the classical apparatus of text                    categorization -- Vector Space Model, Latent Semantic                    Analysis and Support Vector Maschine -- that                    significant improvements can be obtained by considering                    the collaborative knowledge derived from the social                    web.
@Book{Waltinger:2011,
Author         = {Waltinger, Ulli},
Title          = {On Social Semantics in Information Retrieval},
Publisher      = {Südwestdeutscher Verlag für Hochschulschriften},
Note           = {Zugl. Diss Univ. Bielefeld (2010)},
abstract       = {In this thesis we analyze the performance of social
semantics in textual information retrieval. By means of
collaboratively constructed knowledge derived from
web-based social networks, inducing both common-sense
and domain-specific knowledge as constructed by a
multitude of users, we will establish an improvement in
performance of selected tasks within different areas of
information retrieval. This work connects the concepts
and the methods of social networks and the semantic web
to support the analysis of a social semantic web that
combines human intelligence with machine learning and
natural language processing. In this context, social
networks, as instances of the social web, are capable
in delivering social network data and document
collections on a tremendous scale, inducing thematic
dynamics that cannot be achieved by traditional expert
resources. The question of an automatic conversion,
annotation and processing, however, is central to the
debate of the benefits of the social semantic web.
Which kind of technologies and methods are available,
adequate and contribute to the processing of this
rapidly rising flood of information and at the same
time being capable of using the wealth of information
in this large, but more importantly decentralized
internet. The present work researches the performance
of social semantic-induced categorization by means of
different document models. We will shed light on the
question, to which level social networks and social
ontologies contribute to selected areas within the
information retrieval area, such as automatically
determining term and text associations, identifying
topics, text and web genre categorization, and also the
domain of sentiment analysis. We will show in extensive
evaluations, comparing the classical apparatus of text
categorization -- Vector Space Model, Latent Semantic
Analysis and Support Vector Maschine -- that
significant improvements can be obtained by considering
the collaborative knowledge derived from the social
web.},
website        = {http://www.ulliwaltinger.de/on-social-semantics-in-information-retrieval/},
year           = 2011
}
• G. Doeben-Henisch, G. Abrami, M. Pfaff, and M. Struwe, “Conscious learning semiotics systems to assist human persons (CLS2H),” in AFRICON, 2011, 2011, pp. 1-7.
[Abstract] [BibTeX]

Challenged by the growing societal demand for Ambient                    Assistive Living (AAL) technologies, we are dedicated                    to develop intelligent technical devices which are able                    to communicate with human persons in a truly human-like                    manner. The core of the project is a simulation                    environment which enables the development of conscious                    learning semiotic agents which will be able to assist                    human persons in their daily life. We are reporting                    first results and future perspectives.
@InProceedings{Doebenhenisch:Abrami:Pfaff:Struwe:2011,
Author         = {Doeben-Henisch, Gerd and Abrami, Giuseppe and Pfaff,
Marcus and Struwe, Marvin},
Title          = {Conscious learning semiotics systems to assist human
persons (CLS2H)},
BookTitle      = {AFRICON, 2011},
Volume         = {},
Number         = {},
Pages          = {1 -7},
abstract       = {Challenged by the growing societal demand for Ambient
Assistive Living (AAL) technologies, we are dedicated
to develop intelligent technical devices which are able
to communicate with human persons in a truly human-like
manner. The core of the project is a simulation
environment which enables the development of conscious
learning semiotic agents which will be able to assist
human persons in their daily life. We are reporting
first results and future perspectives.},
doi            = {10.1109/AFRCON.2011.6072043},
issn           = {2153-0025},
keywords       = {ambient assistive living;conscious learning semiotic
agents;conscious learning semiotics systems;human
persons;intelligent technical devices;simulation
environment;learning (artificial
intelligence);multi-agent systems;},
month          = {sept.},
pdf            = {http://www.doeben-henisch.de/gdhnp/csg/africon2011.pdf},
website        = {http://www.researchgate.net/publication/261451874_Conscious_Learning_Semiotics_Systems_to_Assist_Human_Persons_(CLS(2)H)},
year           = 2011
}
• U. Waltinger, A. Mehler, M. Lösch, and W. Horstmann, “Hierarchical Classification of OAI Metadata Using the DDC Taxonomy,” in Advanced Language Technologies for Digital Libraries (ALT4DL), R. Bernardi, S. Chambers, B. Gottfried, F. Segond, and I. Zaihrayeu, Eds., Berlin: Springer, 2011, pp. 29-40.
[Abstract] [BibTeX]

In the area of digital library services, the access to                    subject-specific metadata of scholarly publications is                    of utmost interest. One of the most prevalent                    approaches for metadata exchange is the XML-based Open                    Archive Initiative (OAI) Protocol for Metadata                    Harvesting (OAI-PMH). However, due to its loose                    requirements regarding metadata content there is no                    strict standard for consistent subject indexing                    specified, which is furthermore needed in the digital                    library domain. This contribution addresses the problem                    of automatic enhancement of OAI metadata by means of                    the most widely used universal classification schemes                    in libraries—the Dewey Decimal Classification (DDC).                    To be more specific, we automatically classify                    scientific documents according to the DDC taxonomy                    within three levels using a machine learning-based                    classifier that relies solely on OAI metadata records                    as the document representation. The results show an                    asymmetric distribution of documents across the                    hierarchical structure of the DDC taxonomy and issues                    of data sparseness. However, the performance of the                    classifier shows promising results on all three levels                    of the DDC.
@InCollection{Waltinger:Mehler:Loesch:Horstmann:2011,
Author         = {Waltinger, Ulli and Mehler, Alexander and Lösch,
Mathias and Horstmann, Wolfram},
Title          = {Hierarchical Classification of OAI Metadata Using the
DDC Taxonomy},
BookTitle      = {Advanced Language Technologies for Digital Libraries
(ALT4DL)},
Publisher      = {Springer},
Editor         = {Raffaella Bernardi and Sally Chambers and Bjoern
Gottfried and Frederique Segond and Ilya Zaihrayeu},
Series         = {LNCS},
Pages          = {29-40},
abstract       = {In the area of digital library services, the access to
subject-specific metadata of scholarly publications is
of utmost interest. One of the most prevalent
approaches for metadata exchange is the XML-based Open
Archive Initiative (OAI) Protocol for Metadata
Harvesting (OAI-PMH). However, due to its loose
requirements regarding metadata content there is no
strict standard for consistent subject indexing
specified, which is furthermore needed in the digital
library domain. This contribution addresses the problem
of automatic enhancement of OAI metadata by means of
the most widely used universal classification schemes
in libraries—the Dewey Decimal Classification (DDC).
To be more specific, we automatically classify
scientific documents according to the DDC taxonomy
within three levels using a machine learning-based
classifier that relies solely on OAI metadata records
as the document representation. The results show an
asymmetric distribution of documents across the
hierarchical structure of the DDC taxonomy and issues
of data sparseness. However, the performance of the
classifier shows promising results on all three levels
of the DDC.},
year           = 2011
}
• A. Mehler, S. Schwandt, R. Gleim, and B. Jussen, “Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektrum und Einsatzszenarien,” Journal for Language Technology and Computational Linguistics (JLCL), vol. 26, iss. 1, pp. 97-117, 2011.
[Abstract] [BibTeX]

Die Digital Humanities bzw. die Computational                    Humanities entwickeln sich zu eigenständigen                    Disziplinen an der Nahtstelle von Geisteswissenschaft                    und Informatik. Diese Entwicklung betrifft zunehmend                    auch die Lehre im Bereich der geisteswissenschaftlichen                    Fachinformatik. In diesem Beitrag thematisieren wir den                    eHumanities Desktop als ein Werkzeug für diesen                    Bereich der Lehre. Dabei geht es genauer um einen                    Brückenschlag zwischen Geschichtswissenschaft und                    Informatik: Am Beispiel der historischen Semantik                    stellen wir drei Lehrszenarien vor, in denen der                    eHumanities Desktop in der geschichtswissenschaftlichen                    Lehre zum Einsatz kommt. Der Beitrag schliesst mit                    einer Anforderungsanalyse an zukünftige Entwicklungen                    in diesem Bereich.
@Article{Mehler:Schwandt:Gleim:Jussen:2011,
Author         = {Mehler, Alexander and Schwandt, Silke and Gleim,
Rüdiger and Jussen, Bernhard},
Title          = {Der eHumanities Desktop als Werkzeug in der
historischen Semantik: Funktionsspektrum und
Einsatzszenarien},
Journal        = {Journal for Language Technology and Computational
Linguistics (JLCL)},
Volume         = {26},
Number         = {1},
Pages          = {97-117},
abstract       = {Die Digital Humanities bzw. die Computational
Humanities entwickeln sich zu eigenst{\"a}ndigen
Disziplinen an der Nahtstelle von Geisteswissenschaft
und Informatik. Diese Entwicklung betrifft zunehmend
auch die Lehre im Bereich der geisteswissenschaftlichen
Fachinformatik. In diesem Beitrag thematisieren wir den
eHumanities Desktop als ein Werkzeug für diesen
Bereich der Lehre. Dabei geht es genauer um einen
Brückenschlag zwischen Geschichtswissenschaft und
Informatik: Am Beispiel der historischen Semantik
stellen wir drei Lehrszenarien vor, in denen der
eHumanities Desktop in der geschichtswissenschaftlichen
Lehre zum Einsatz kommt. Der Beitrag schliesst mit
einer Anforderungsanalyse an zukünftige Entwicklungen
in diesem Bereich.},
pdf            = {http://media.dwds.de/jlcl/2011_Heft1/8.pdf },
year           = 2011
}
• T. Dong and T. vor der Brück, “Qualitative Spatial Knowledge Acquisition Based on the Connection Relation,” in Proceedings of the 3rd International Conference on Advanced Cognitive Technologies and Applications (COGNITIVE), Rome, Italy, 2011, pp. 70-75.
[Abstract] [BibTeX]

Research in cognitive psychology shows that the                    connection relation is the primitive spatial relation.                    This paper proposes a novel spatial knowledge                    representation of indoor environments based on the                    connection relation, and demonstrates how deictic                    orientation relations can be acquired from a map, which                    is constructed purely on connection relations between                    extended objects. Without loss of generality, we                    restrict indoor environments to be constructed by a set                    of rectangles, each representing either a room or a                    corridor. The term fiat cell is coined to represent a                    subjective partition along a corridor. Spatial                    knowledge includes rectangles, sides information of                    rectangles, connection relations among rectangles, and                    fiat cells of rectangles. Efficient algorithms are                    given for identifying one shortest path between two                    locations, transforming paths into fiat paths, and                    acquiring deictic orientations.
@InProceedings{Dong:vor:der:Brueck:2011,
Author         = {Dong, Tiansi and vor der Brück, Tim},
Title          = {Qualitative Spatial Knowledge Acquisition Based on the
Connection Relation},
BookTitle      = {Proceedings of the 3rd International Conference on
(COGNITIVE)},
Editor         = {Terry Bossomaier and Pascal Lorenz},
Pages          = {70--75},
abstract       = {Research in cognitive psychology shows that the
connection relation is the primitive spatial relation.
This paper proposes a novel spatial knowledge
representation of indoor environments based on the
connection relation, and demonstrates how deictic
orientation relations can be acquired from a map, which
is constructed purely on connection relations between
extended objects. Without loss of generality, we
restrict indoor environments to be constructed by a set
of rectangles, each representing either a room or a
corridor. The term fiat cell is coined to represent a
subjective partition along a corridor. Spatial
knowledge includes rectangles, sides information of
rectangles, connection relations among rectangles, and
fiat cells of rectangles. Efficient algorithms are
given for identifying one shortest path between two
locations, transforming paths into fiat paths, and
acquiring deictic orientations.},
website        = {http://www.thinkmind.org/index.php?view=article&articleid=cognitive_2011_3_40_40123},
year           = 2011
}
• M. Z. Islam, R. Mittmann, and A. Mehler, “Multilingualism in Ancient Texts: Language Detection by Example of Old High German and Old Saxon,” in GSCL conference on Multilingual Resources and Multilingual Applications (GSCL 2011), 28-30 September, Hamburg, Germany, 2011.
[Abstract] [BibTeX]

In this paper, we present an approach to language d                    etection in streams of multilingual ancient texts. We                    introduce a supervised classifier that detects, amongst                    others, Old High G erman (OHG) and Old Saxon (OS). We                    evaluate our mod el by means of three experiments that                    show that language detection is po ssible even for dead                    languages. Finally, we present an experiment in                    unsupervised language detection as a tertium                    comparationis for o ur supervised classifier.
@InProceedings{Zahurul:Mittmann:Mehler:2011,
Author         = {Islam, Md. Zahurul and Mittmann, Roland and Mehler,
Alexander},
Title          = {Multilingualism in Ancient Texts: Language Detection
by Example of Old High German and Old Saxon},
BookTitle      = {GSCL conference on Multilingual Resources and
Multilingual Applications (GSCL 2011), 28-30 September,
Hamburg, Germany},
abstract       = {In this paper, we present an approach to language d
etection in streams of multilingual ancient texts. We
introduce a supervised classifier that detects, amongst
others, Old High G erman (OHG) and Old Saxon (OS). We
evaluate our mod el by means of three experiments that
show that language detection is po ssible even for dead
languages. Finally, we present an experiment in
unsupervised language detection as a tertium
comparationis for o ur supervised classifier.},
timestamp      = {2011.08.25},
year           = 2011
}

### 2010 (26)

• A. Mehler, “Minimum Spanning Markovian Trees: Introducing Context-Sensitivity into the Generation of Spanning Trees,” in Structural Analysis of Complex Networks, M. Dehmer, Ed., Basel: Birkhäuser Publishing, 2010, pp. 381-401.
[Abstract] [BibTeX]

This chapter introduces a novel class of graphs:                    Minimum Spanning Markovian Trees (MSMTs). The idea                    behind MSMTs is to provide spanning trees that minimize                    the costs of edge traversals in a Markovian manner,                    that is, in terms of the path starting with the root of                    the tree and ending at the vertex under consideration.                    In a second part, the chapter generalizes this class of                    spanning trees in order to allow for damped Markovian                    effects in the course of spanning. These two effects,                    (1) the sensitivity to the contexts generated by                    consecutive edges and (2) the decreasing impact of more                    antecedent (or 'weakly remembered') vertices, are well                    known in cognitive modeling [6, 10, 21, 23]. In this                    sense, the chapter can also be read as an effort to                    introduce a graph model to support the simulation of                    cognitive systems. Note that MSMTs are not to be                    confused with branching Markov chains or Markov trees                    [20] as we focus on generating spanning trees from                    given weighted undirected networks.
@InCollection{Mehler:2010:a,
Author         = {Mehler, Alexander},
Title          = {Minimum Spanning Markovian Trees: Introducing
Context-Sensitivity into the Generation of Spanning
Trees},
BookTitle      = {Structural Analysis of Complex Networks},
Publisher      = {Birkh{\"a}user Publishing},
Editor         = {Dehmer, Matthias},
Pages          = {381-401},
abstract       = {This chapter introduces a novel class of graphs:
Minimum Spanning Markovian Trees (MSMTs). The idea
behind MSMTs is to provide spanning trees that minimize
the costs of edge traversals in a Markovian manner,
that is, in terms of the path starting with the root of
the tree and ending at the vertex under consideration.
In a second part, the chapter generalizes this class of
spanning trees in order to allow for damped Markovian
effects in the course of spanning. These two effects,
(1) the sensitivity to the contexts generated by
consecutive edges and (2) the decreasing impact of more
antecedent (or 'weakly remembered') vertices, are well
known in cognitive modeling [6, 10, 21, 23]. In this
sense, the chapter can also be read as an effort to
introduce a graph model to support the simulation of
cognitive systems. Note that MSMTs are not to be
confused with branching Markov chains or Markov trees
[20] as we focus on generating spanning trees from
given weighted undirected networks.},
website        = {https://www.researchgate.net/publication/226700676_Minimum_Spanning_Markovian_Trees_Introducing_Context-Sensitivity_into_the_Generation_of_Spanning_Trees},
year           = 2010
}
• R. Gleim and A. Mehler, “Computational Linguistics for Mere Mortals – Powerful but Easy-to-use Linguistic Processing for Scientists in the Humanities,” in Proceedings of LREC 2010, Malta, 2010.
[Abstract] [BibTeX]

Delivering linguistic resources and easy-to-use                    methods to a broad public in the humanities is a                    challenging task. On the one hand users rightly demand                    easy to use interfaces but on the other hand want to                    have access to the full flexibility and power of the                    functions being offered. Even though a growing number                    of excellent systems exist which offer convenient means                    to use linguistic resources and methods, they usually                    focus on a specific domain, as for example corpus                    exploration or text categorization. Architectures which                    address a broad scope of applications are still rare.                    This article introduces the eHumanities Desktop, an                    online system for corpus management, processing and                    analysis which aims at bridging the gap between                    powerful command line tools and intuitive user                    interfaces. 
@InProceedings{Gleim:Mehler:2010:b,
Author         = {Gleim, Rüdiger and Mehler, Alexander},
Title          = {Computational Linguistics for Mere Mortals –
Powerful but Easy-to-use Linguistic Processing for
Scientists in the Humanities},
BookTitle      = {Proceedings of LREC 2010},
Publisher      = {ELDA},
abstract       = {Delivering linguistic resources and easy-to-use
methods to a broad public in the humanities is a
challenging task. On the one hand users rightly demand
easy to use interfaces but on the other hand want to
functions being offered. Even though a growing number
of excellent systems exist which offer convenient means
to use linguistic resources and methods, they usually
focus on a specific domain, as for example corpus
exploration or text categorization. Architectures which
online system for corpus management, processing and
analysis which aims at bridging the gap between
powerful command line tools and intuitive user
interfaces. },
year           = 2010
}
• A. Mehler, A. Lücking, and P. Weiß, “A Network Model of Interpersonal Alignment,” Entropy, vol. 12, iss. 6, pp. 1440-1483, 2010.
[Abstract] [BibTeX]

In dyadic communication, both interlocutors adapt to                    each other linguistically, that is, they align                    interpersonally. In this article, we develop a                    framework for modeling interpersonal alignment in terms                    of the structural similarity of the interlocutors’                    dialog lexica. This is done by means of so-called                    two-layer time-aligned network series, that is, a                    time-adjusted graph model. The graph model is                    partitioned into two layers, so that the                    interlocutors’ lexica are captured as subgraphs of an                    encompassing dialog graph. Each constituent network of                    the series is updated utterance-wise. Thus, both the                    inherent bipartition of dyadic conversations and their                    gradual development are modeled. The notion of                    alignment is then operationalized within a quantitative                    model of structure formation based on the mutual                    information of the subgraphs that represent the                    interlocutor’s dialog lexica. By adapting and further                    developing several models of complex network theory, we                    show that dialog lexica evolve as a novel class of                    graphs that have not been considered before in the area                    of complex (linguistic) networks. Additionally, we show                    that our framework allows for classifying dialogs                    according to their alignment status. To the best of our                    knowledge, this is the first approach to measuring                    alignment in communication that explores the                    similarities of graph-like cognitive representations.
@Article{Mehler:Weiss:Luecking:2010:a,
Author         = {Mehler, Alexander and Lücking, Andy and Wei{\ss},
Petra},
Title          = {A Network Model of Interpersonal Alignment},
Journal        = {Entropy},
Volume         = {12},
Number         = {6},
Pages          = {1440-1483},
each other linguistically, that is, they align
framework for modeling interpersonal alignment in terms
of the structural similarity of the interlocutors’
dialog lexica. This is done by means of so-called
two-layer time-aligned network series, that is, a
time-adjusted graph model. The graph model is
partitioned into two layers, so that the
interlocutors’ lexica are captured as subgraphs of an
encompassing dialog graph. Each constituent network of
the series is updated utterance-wise. Thus, both the
inherent bipartition of dyadic conversations and their
gradual development are modeled. The notion of
alignment is then operationalized within a quantitative
model of structure formation based on the mutual
information of the subgraphs that represent the
interlocutor’s dialog lexica. By adapting and further
developing several models of complex network theory, we
show that dialog lexica evolve as a novel class of
graphs that have not been considered before in the area
of complex (linguistic) networks. Additionally, we show
that our framework allows for classifying dialogs
according to their alignment status. To the best of our
knowledge, this is the first approach to measuring
alignment in communication that explores the
similarities of graph-like cognitive representations.},
doi            = {10.3390/e12061440},
pdf            = {http://www.mdpi.com/1099-4300/12/6/1440/pdf},
website        = {http://www.mdpi.com/1099-4300/12/6/1440/},
year           = 2010
}
• A. Mehler, S. Sharoff, and M. Santini, Genres on the Web: Computational Models and Empirical Studies, A. Mehler, S. Sharoff, and M. Santini, Eds., Dordrecht: Springer, 2010.
[Abstract] [BibTeX]

The volume 'Genres on the Web' has been designed for a                    wide audience, from the expert to the novice. It is a                    required book for scholars, researchers and students                    who want to become acquainted with the latest                    theoretical, empirical and computational advances in                    the expanding field of web genre research. The study of                    web genre is an overarching and interdisciplinary novel                    area of research that spans from corpus linguistics,                    computational linguistics, NLP, and text-technology, to                    web mining, webometrics, social network analysis and                    information studies. This book gives readers a thorough                    grounding in the latest research on web genres and                    emerging document types. The book covers a wide range                    of web-genre focussed subjects, such as: -The                    identification of the sources of web genres -Automatic                    web genre identification -The presentation of                    structure-oriented models -Empirical case studies One                    of the driving forces behind genre research is the idea                    of a genre-sensitive information system, which                    incorporates genre cues complementing the current                    keyword-based search and retrieval applications.
@Book{Mehler:Sharoff:Santini:2010:a,
Author         = {Mehler, Alexander and Sharoff, Serge and Santini,
Marina},
Editor         = {Mehler, Alexander and Sharoff, Serge and Santini,
Marina},
Title          = {Genres on the Web: Computational Models and Empirical
Studies},
Publisher      = {Springer},
abstract       = {The volume 'Genres on the Web' has been designed for a
wide audience, from the expert to the novice. It is a
required book for scholars, researchers and students
who want to become acquainted with the latest
theoretical, empirical and computational advances in
the expanding field of web genre research. The study of
web genre is an overarching and interdisciplinary novel
area of research that spans from corpus linguistics,
computational linguistics, NLP, and text-technology, to
web mining, webometrics, social network analysis and
information studies. This book gives readers a thorough
grounding in the latest research on web genres and
emerging document types. The book covers a wide range
of web-genre focussed subjects, such as: -The
identification of the sources of web genres -Automatic
web genre identification -The presentation of
structure-oriented models -Empirical case studies One
of the driving forces behind genre research is the idea
of a genre-sensitive information system, which
incorporates genre cues complementing the current
keyword-based search and retrieval applications.},
booktitle      = {Genres on the Web: Computational Models and Empirical
Studies},
pagetotal      = {376},
website        = {http://www.springer.com/computer/ai/book/978-90-481-9177-2},
year           = 2010
}
• T. Sutter and A. Mehler, Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen zum Web 2.0, T. Sutter and A. Mehler, Eds., Wiesbaden: Verlag für Sozialwissenschaften, 2010.
[Abstract] [BibTeX]

Die Beiträge des Bandes untersuchen den                    Medienwandel von frühen europäischen                    Medienkulturen bis zu aktuellen Formen der                    Internetkommunikation unter soziologischer,                    kulturwissenschaftlicher und linguistischer                    Perspektive. Zwar haben sich die Massenmedien von den                    Beschränkungen sozialer Interaktionen gelöst, sie                    weisen dem Publikum aber eine distanzierte, bloß                    rezipierende Rolle zu. Dagegen eröffnen neue Formen                    'interaktiver' Medien gesteigerte Möglichkeiten der                    Rückmeldung und der Mitgestaltung für die Nutzer. Der                    vorliegende Band fragt nach der Qualität dieses                    Medienwandels: Werden Medien tatsächlich                    interaktiv? Was bedeutet die Interaktivität neuer                    Medien? Werden die durch neue Medien eröffneten                    Beteiligungsmöglichkeiten realisiert?
@Book{Sutter:Mehler:2010,
Author         = {Sutter, Tilmann and Mehler, Alexander},
Editor         = {Sutter, Tilmann and Mehler, Alexander},
Title          = {Medienwandel als Wandel von Interaktionsformen – von
frühen Medienkulturen zum Web 2.0},
Publisher      = {Verlag für Sozialwissenschaften},
abstract       = {Die Beitr{\"a}ge des Bandes untersuchen den
Medienwandel von frühen europ{\"a}ischen
Medienkulturen bis zu aktuellen Formen der
Internetkommunikation unter soziologischer,
kulturwissenschaftlicher und linguistischer
Perspektive. Zwar haben sich die Massenmedien von den
Beschr{\"a}nkungen sozialer Interaktionen gelöst, sie
weisen dem Publikum aber eine distanzierte, blo{\ss}
rezipierende Rolle zu. Dagegen eröffnen neue Formen
'interaktiver' Medien gesteigerte Möglichkeiten der
Rückmeldung und der Mitgestaltung für die Nutzer. Der
vorliegende Band fragt nach der Qualit{\"a}t dieses
Medienwandels: Werden Medien tats{\"a}chlich
interaktiv? Was bedeutet die Interaktivit{\"a}t neuer
Medien? Werden die durch neue Medien eröffneten
Beteiligungsmöglichkeiten realisiert?},
pagetotal      = {289},
website        = {http://www.springer.com/de/book/9783531156422},
year           = 2010
}
• T. vor der Brück and H. Stenzhorn, “Logical Ontology Validation Using an Automatic Theorem Prover,” in Proceedings of the 19th European Conference on Artificial Intelligence (ECAI), Lisbon, Portugal, 2010, pp. 491-496.
[Abstract] [BibTeX]

Ontologies are utilized for a wide range of tasks,                    like information retrieval/extraction or text                    generation, and in a multitude of domains, such as                    biology, medicine or business and commerce. To be                    actually usable in such real-world scenarios,                    ontologies usually have to encompass a large number of                    factual statements. However, with increasing size, it                    becomes very diffcult to ensure their complete                    correctness. This is particularly true in the case when                    an ontology is not hand-crafted but constructed                    (semi)automatically through text mining, for example.                    As a consequence, when inference mechanisms are applied                    on these ontologies, even minimal inconsistencies of                    tentimes lead to serious errors and are hard to trace                    back and find. This paper addresses this issue and                    describes a method to validate ontologies using an                    automatic theorem prover and MultiNet axioms. This                    logic-based approach allows to detect many                    inconsistencies, which are diffcult or even impossible                    to identify through statistical methods or by manual                    investigation in reasonable time. To make this approach                    accessible for ontology developers, a graphical user                    interface is provided that highlights erroneous axioms                    directly in the ontology for quicker fixing.
@InProceedings{vor:der:Brueck:Stenzhorn:2010,
Author         = {vor der Brück, Tim and Stenzhorn, Holger},
Title          = {Logical Ontology Validation Using an Automatic Theorem
Prover},
BookTitle      = {Proceedings of the 19th European Conference on
Artificial Intelligence (ECAI)},
Pages          = {491--496},
abstract       = {Ontologies are utilized for a wide range of tasks,
like information retrieval/extraction or text
generation, and in a multitude of domains, such as
biology, medicine or business and commerce. To be
actually usable in such real-world scenarios,
ontologies usually have to encompass a large number of
factual statements. However, with increasing size, it
becomes very diffcult to ensure their complete
correctness. This is particularly true in the case when
an ontology is not hand-crafted but constructed
(semi)automatically through text mining, for example.
As a consequence, when inference mechanisms are applied
on these ontologies, even minimal inconsistencies of
tentimes lead to serious errors and are hard to trace
back and find. This paper addresses this issue and
describes a method to validate ontologies using an
automatic theorem prover and MultiNet axioms. This
logic-based approach allows to detect many
inconsistencies, which are diffcult or even impossible
to identify through statistical methods or by manual
investigation in reasonable time. To make this approach
accessible for ontology developers, a graphical user
interface is provided that highlights erroneous axioms
directly in the ontology for quicker fixing.},
year           = 2010
}
• T. vor der Brück, “Hypernymy Extraction Using a Semantic Network Representation,” International Journal of Computational Linguistics and Applications, vol. 1, iss. 1, pp. 105-119, 2010.
[Abstract] [BibTeX]

There are several approaches to detect hypernymy                    relations from texts by text mining. Usually these                    approaches are based on supervised learning and in a                    first step are extracting several patterns. These                    patterns are then applied to previously unseen texts                    and used to recognize hypernym/hyponym pairs. Normally                    these approaches are only based on a surface                    representation or a syntactical tree structure, i.e.,                    constituency or dependency trees derived by a                    syntactical parser. In this work, however, we present                    an approach that operates directly on a semantic                    network (SN), which is generated by a deep                    syntactico-semantic analysis. Hyponym/hypernym pairs                    are then extracted by the application of graph                    matching. This algorithm is combined with a shallow                    approach enriched with semantic information.
@Article{vor:der:Brueck:2010,
Author         = {vor der Brück, Tim},
Title          = {Hypernymy Extraction Using a Semantic Network
Representation},
Journal        = {International Journal of Computational Linguistics and
Applications},
Volume         = {1},
Number         = {1},
Pages          = {105--119},
abstract       = {There are several approaches to detect hypernymy
relations from texts by text mining. Usually these
approaches are based on supervised learning and in a
first step are extracting several patterns. These
patterns are then applied to previously unseen texts
and used to recognize hypernym/hyponym pairs. Normally
these approaches are only based on a surface
representation or a syntactical tree structure, i.e.,
constituency or dependency trees derived by a
syntactical parser. In this work, however, we present
an approach that operates directly on a semantic
network (SN), which is generated by a deep
syntactico-semantic analysis. Hyponym/hypernym pairs
are then extracted by the application of graph
matching. This algorithm is combined with a shallow
approach enriched with semantic information.},
pdf            = {http://www.gelbukh.com/ijcla/2010-1-2/Hypernymy
Extraction Using.pdf},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
year           = 2010
}
• T. vor der Brück, “Learning Deep Semantic Patterns for Hypernymy Extraction Following the Minimum Description Length Principle,” in Proceedings of the 29th International Conference on Lexis and Grammar (LGC), Belgrade, Serbia, 2010, pp. 39-49.
[Abstract] [BibTeX]

Current approaches of hypernymy acquisition are mostly                    based on syntactic or surface representations and                    extract hypernymy relations between surface word forms                    and not word readings. In this paper we present a                    purely semantic approach for hypernymy extraction based                    on semantic networks (SNs). This approach employs a set                    of patterns sub0 (a1,a2) <-- premise where the premise                    part of a pattern is given by a SN. Furthermore this                    paper describes how the patterns can be derived by                    relational statistical learning following the Minimum                    Description Length principle (MDL). The evaluation                    demonstrates the usefulness of the learned patterns and                    also of the entire hypernymy extraction system.
@InProceedings{vor:der:Brueck:2010:a,
Author         = {vor der Brück, Tim},
Title          = {Learning Deep Semantic Patterns for Hypernymy
Extraction Following the Minimum Description Length
Principle},
BookTitle      = {Proceedings of the 29th International Conference on
Lexis and Grammar (LGC)},
Pages          = {39--49},
abstract       = {Current approaches of hypernymy acquisition are mostly
based on syntactic or surface representations and
extract hypernymy relations between surface word forms
and not word readings. In this paper we present a
purely semantic approach for hypernymy extraction based
on semantic networks (SNs). This approach employs a set
of patterns sub0 (a1,a2) <-- premise where the premise
part of a pattern is given by a SN. Furthermore this
paper describes how the patterns can be derived by
relational statistical learning following the Minimum
Description Length principle (MDL). The evaluation
demonstrates the usefulness of the learned patterns and
also of the entire hypernymy extraction system.},
year           = 2010
}
• T. vor der Brück, "Learning Semantic Network Patterns for Hypernymy Extraction," in Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex), Beijing, China, 2010, pp. 38-47.
[BibTeX]

@InProceedings{vor:der:Brueck:2010:b,
Author         = {vor der Brück, Tim},
Title          = {Learning Semantic Network Patterns for Hypernymy
Extraction},
BookTitle      = {Proceedings of the 6th Workshop on Ontologies and
Lexical Resources (OntoLex)},
Pages          = {38--47},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
year           = 2010
}
• S. Hartrumpf, T. vor der Brück, and C. Eichhorn, "Detecting Duplicates with Shallow and Parser-based Methods," in Proceedings of the 6th International Conference on Natural Language Processing and Knowledge Engineering (NLPKE), Beijing, China, 2010, pp. 142-149.
[Abstract] [BibTeX]

Identifying duplicate texts is important in many areas                    like plagiarism detection, information retrieval, text                    summarization, and question answering. Current                    approaches are mostly surface-oriented (or use only                    shallow syntactic representations) and see each text                    only as a token list. In this work however, we describe                    a deep, semantically oriented method based on semantic                    networks which are derived by a syntactico-semantic                    parser. Semantically identical or similar semantic                    networks for each sentence of a given base text are                    efficiently retrieved by using a specialized semantic                    network index. In order to detect many kinds of                    paraphrases the current base semantic network is varied                    by applying inferences: lexico-semantic relations,                    relation axioms, and meaning postulates. Some important                    phenomena occurring in difficult-to-detect duplicates                    are discussed. The deep approach profits from                    background knowledge, whose acquisition from corpora                    like Wikipedia is explained briefly. This deep                    duplicate recognizer is combined with two shallow                    duplicate recognizers in order to guarantee high recall                    for texts which are not fully parsable. The evaluation                    shows that the combined approach preserves recall and                    increases precision considerably, in comparison to                    traditional shallow methods. For the evaluation, a                    standard corpus of German plagiarisms was extended by                    four diverse components with an emphasis on duplicates                    (and not just plagiarisms), e.g., news feed articles                    from different web sources and two translations of the                    same short story.
@InProceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:a,
Author         = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn,
Christian},
Title          = {Detecting Duplicates with Shallow and Parser-based
Methods},
BookTitle      = {Proceedings of the 6th International Conference on
Natural Language Processing and Knowledge Engineering
(NLPKE)},
Pages          = {142--149},
abstract       = {Identifying duplicate texts is important in many areas
like plagiarism detection, information retrieval, text
approaches are mostly surface-oriented (or use only
shallow syntactic representations) and see each text
only as a token list. In this work however, we describe
a deep, semantically oriented method based on semantic
networks which are derived by a syntactico-semantic
parser. Semantically identical or similar semantic
networks for each sentence of a given base text are
efficiently retrieved by using a specialized semantic
network index. In order to detect many kinds of
paraphrases the current base semantic network is varied
by applying inferences: lexico-semantic relations,
relation axioms, and meaning postulates. Some important
phenomena occurring in difficult-to-detect duplicates
are discussed. The deep approach profits from
background knowledge, whose acquisition from corpora
like Wikipedia is explained briefly. This deep
duplicate recognizer is combined with two shallow
duplicate recognizers in order to guarantee high recall
for texts which are not fully parsable. The evaluation
shows that the combined approach preserves recall and
increases precision considerably, in comparison to
traditional shallow methods. For the evaluation, a
standard corpus of German plagiarisms was extended by
four diverse components with an emphasis on duplicates
(and not just plagiarisms), e.g., news feed articles
from different web sources and two translations of the
same short story.},
website        = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5587838&abstractAccess=no&userType=inst},
year           = 2010
}
• S. Hartrumpf, T. vor der Brück, and C. Eichhorn, "Semantic Duplicate Identification with Parsing and Machine Learning," in Proceedings of the 13th International Conference on Text, Speech and Dialogue (TSD 2010), Brno, Czech Republic, 2010, pp. 84-92.
[Abstract] [BibTeX]

Identifying duplicate texts is important in many areas                    like plagiarism detection, information retrieval, text                    summarization, and question answering. Current                    approaches are mostly surface-oriented (or use only                    shallow syntactic representations) and see each text                    only as a token list. In this work however, we describe                    a deep, semantically oriented method based on semantic                    networks which are derived by a syntacticosemantic                    parser. Semantically identical or similar semantic                    networks for each sentence of a given base text are                    efficiently retrieved by using a specialized index. In                    order to detect many kinds of paraphrases the semantic                    networks of a candidate text are varied by applying                    inferences: lexico- semantic relations, relation                    axioms, and meaning postulates. Important phenomena                    occurring in difficult duplicates are discussed. The                    deep approach profits from background knowledge, whose                    acquisition from corpora is explained briefly. The deep                    duplicate recognizer is combined with two shallow                    duplicate recognizers in order to guarantee a high                    recall for texts which are not fully parsable. The                    evaluation shows that the combined approach preserves                    recall and increases precision considerably in                    comparison to traditional shallow methods.
@InProceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:b,
Author         = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn,
Christian},
Title          = {Semantic Duplicate Identification with Parsing and
Machine Learning},
BookTitle      = {Proceedings of the 13th International Conference on
Text, Speech and Dialogue (TSD 2010)},
Editor         = {Petr Sojka and Aleš Horák and Ivan Kopeček and
Karel Pala},
Volume         = {6231},
Series         = {Lecture Notes in Artificial Intelligence},
Pages          = {84--92},
abstract       = {Identifying duplicate texts is important in many areas
like plagiarism detection, information retrieval, text
approaches are mostly surface-oriented (or use only
shallow syntactic representations) and see each text
only as a token list. In this work however, we describe
a deep, semantically oriented method based on semantic
networks which are derived by a syntacticosemantic
parser. Semantically identical or similar semantic
networks for each sentence of a given base text are
efficiently retrieved by using a specialized index. In
order to detect many kinds of paraphrases the semantic
networks of a candidate text are varied by applying
inferences: lexico- semantic relations, relation
axioms, and meaning postulates. Important phenomena
occurring in difficult duplicates are discussed. The
deep approach profits from background knowledge, whose
acquisition from corpora is explained briefly. The deep
duplicate recognizer is combined with two shallow
duplicate recognizers in order to guarantee a high
recall for texts which are not fully parsable. The
evaluation shows that the combined approach preserves
recall and increases precision considerably in
month          = {September},
year           = 2010
}
• T. vor der Brück and H. Helbig, "Retrieving Meronyms from Texts Using An Automated Theorem Prover," Journal for Language Technology and Computational Linguistics (JLCL), vol. 25, iss. 1, pp. 57-81, 2010.
[Abstract] [BibTeX]

In this paper we present a truly semantic-oriented                    approach for meronymy relation extraction. It directly                    operates, instead of syntactic trees or surface                    representations, on semantic networks (SNs). These SNs                    are derived from texts (in our case, the German Wikip                    edia) by a deep linguistic syntactico-semantic                    analysis. The extraction of meronym/holonym pairs is                    carried out by using, among other components, an                    automated theorem prover, whose work is based on a set                    of logical axioms. The corresponding algorithm is                    combined with a shallow approach enriched with semantic                    information. Through the employment of logical methods,                    the recall and precision of the semantic patterns                    pertinent to the extracted relations can be increased                    considerably.
@Article{vor:der:Brueck:Helbig:2010:b,
Author         = {vor der Brück, Tim and Helbig, Hermann},
Title          = {Retrieving Meronyms from Texts Using An Automated
Theorem Prover},
Journal        = {Journal for Language Technology and Computational
Linguistics (JLCL)},
Volume         = {25},
Number         = {1},
Pages          = {57--81},
abstract       = {In this paper we present a truly semantic-oriented
approach for meronymy relation extraction. It directly
operates, instead of syntactic trees or surface
representations, on semantic networks (SNs). These SNs
are derived from texts (in our case, the German Wikip
edia) by a deep linguistic syntactico-semantic
analysis. The extraction of meronym/holonym pairs is
carried out by using, among other components, an
automated theorem prover, whose work is based on a set
of logical axioms. The corresponding algorithm is
combined with a shallow approach enriched with semantic
information. Through the employment of logical methods,
the recall and precision of the semantic patterns
pertinent to the extracted relations can be increased
considerably.},
pdf            = {http://www.jlcl.org/2010_Heft1/tim_vorderbrueck.pdf},
year           = 2010
}
• A. Lücking and K. Bergmann, Introducing the Bielefeld SaGA CorpusEuropa Universität Viadrina Frankfurt/Oder: , 2010.
[Abstract] [BibTeX]

People communicate multimodally. Most prominently,                    they co-produce speech and gesture. How do they do                    that? Studying the interplay of both modalities has to                    be informed by empirically observed communication                    behavior. We present a corpus built of speech and                    gesture data gained in a controlled study. We describe                    1) the setting underlying the data; 2) annotation of                    the data; 3) reliability evalution methods and results;                    and 4) applications of the corpus in the research                    domain of speech and gesture alignment.
@Misc{Luecking:Bergmann:2010,
Author         = {Andy L\"{u}cking and Kirsten Bergmann},
Title          = {Introducing the {B}ielefeld {SaGA} Corpus},
HowPublished   = {Talk given at \textit{Gesture: Evolution, Brain, and
Linguistic Structures.} 4th Conference of the
International Society for Gesture Studies (ISGS).
abstract       = {People communicate multimodally. Most prominently,
they co-produce speech and gesture. How do they do
that? Studying the interplay of both modalities has to
be informed by empirically observed communication
behavior. We present a corpus built of speech and
gesture data gained in a controlled study. We describe
1) the setting underlying the data; 2) annotation of
the data; 3) reliability evalution methods and results;
and 4) applications of the corpus in the research
domain of speech and gesture alignment.},
day            = {28},
month          = {07},
year           = 2010
}
• A. Lücking, "A Semantic Account for Iconic Gestures," in Gesture: Evolution, Brain, and Linguistic Structures, Europa Universität Viadrina Frankfurt/Oder, 2010, p. 210.
[BibTeX]

@InProceedings{Luecking:2010,
Author         = {Lücking, Andy},
Title          = {A Semantic Account for Iconic Gestures},
BookTitle      = {Gesture: Evolution, Brain, and Linguistic Structures},
Pages          = {210},
Organization   = {4th Conference of the International Society for
Gesture Studies (ISGS)},
keywords       = {own},
month          = {7},
website        = {http://pub.uni-bielefeld.de/publication/2318565},
year           = 2010
}
• A. Lücking, K. Bergmann, F. Hahn, S. Kopp, and H. Rieser, "The Bielefeld Speech and Gesture Alignment Corpus (SaGA)," in Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality, Malta, 2010, pp. 92-98.
[Abstract] [BibTeX]

People communicate multimodally. Most prominently,                    they co-produce speech and gesture. How do they do                    that? Studying the interplay of both modalities has to                    be informed by empirically observed communication                    behavior. We present a corpus built of speech and                    gesture data gained in a controlled study. We describe                    1) the setting underlying the data; 2) annotation of                    the data; 3) reliability evalution methods and results;                    and 4) applications of the corpus in the research                    domain of speech and gesture alignment.
@InProceedings{Luecking:et:al:2010,
Author         = {Lücking, Andy and Bergmann, Kirsten and Hahn, Florian
and Kopp, Stefan and Rieser, Hannes},
Title          = {The Bielefeld Speech and Gesture Alignment Corpus
(SaGA)},
BookTitle      = {Multimodal Corpora: Advances in Capturing, Coding and
Analyzing Multimodality},
Pages          = {92--98},
Organization   = {7th International Conference for Language Resources
and Evaluation (LREC 2010)},
abstract       = {People communicate multimodally. Most prominently,
they co-produce speech and gesture. How do they do
that? Studying the interplay of both modalities has to
be informed by empirically observed communication
behavior. We present a corpus built of speech and
gesture data gained in a controlled study. We describe
1) the setting underlying the data; 2) annotation of
the data; 3) reliability evalution methods and results;
and 4) applications of the corpus in the research
domain of speech and gesture alignment.},
keywords       = {own},
month          = {5},
website        = {http://pub.uni-bielefeld.de/publication/2001935},
year           = 2010
}
• M. Z. Islam, J. Tiedemann, and A. Eisele, "English to Bangla Phrase – Based Machine Translation," in The 14th Annual Conference of The European Association for Machine Translation. Saint-Raphaël, France, 27-28 May, 2010.
[BibTeX]

@InProceedings{Zahurul:Tiedemann:Eisele:2010,
Author         = {Islam, Md. Zahurul and Tiedemann, Jörg and Eisele,
Andreas},
Title          = {English to Bangla Phrase – Based Machine Translation},
BookTitle      = {The 14th Annual Conference of The European Association
for Machine Translation. Saint-Raphaël, France, 27-28
May},
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2010
}
• U. Waltinger, "GermanPolarityClues: A Lexical Resource for German Sentiment Analysis," in Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC '10), Valletta, Malta, 2010.
[BibTeX]

@InProceedings{Waltinger:2010:a,
Author         = {Waltinger, Ulli},
Title          = {GermanPolarityClues: A Lexical Resource for German
Sentiment Analysis},
BookTitle      = {Proceedings of the Seventh conference on International
Language Resources and Evaluation (LREC '10)},
Editor         = {Nicoletta Calzolari (Conference Chair), Khalid
Choukri, Bente Maegaard, Joseph Mariani, Jan Odjik,
Stelios Piperidis, Mike Rosner, Daniel Tapias},
Publisher      = {European Language Resources Association (ELRA)},
date_0         = {2010-05},
isbn           = {2-9517408-6-7},
language       = {english},
month          = {may},
pdf            = {http://www.ulliwaltinger.de/pdf/91_Paper.pdf},
website        = {http://www.ulliwaltinger.de/sentiment/},
year           = 2010
}
• U. Waltinger, "GermanPolarityClues: A Lexical Resource for German Sentiment Analysis," in Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC '10), Valletta, Malta, 2010.
[BibTeX]

@InProceedings{Waltinger:2010:b,
Author         = {Waltinger, Ulli},
Title          = {GermanPolarityClues: A Lexical Resource for German
Sentiment Analysis},
BookTitle      = {Proceedings of the Seventh conference on International
Language Resources and Evaluation (LREC '10)},
Editor         = {Nicoletta Calzolari (Conference Chair), Khalid
Choukri, Bente Maegaard, Joseph Mariani, Jan Odjik,
Stelios Piperidis, Mike Rosner, Daniel Tapias},
Publisher      = {European Language Resources Association (ELRA)},
date_0         = {2010-05},
isbn           = {2-9517408-6-7},
language       = {english},
month          = {may},
year           = 2010
}
• A. Mehler, P. Weiß, P. Menke, and A. Lücking, "Towards a Simulation Model of Dialogical Alignment," in Proceedings of the 8th International Conference on the Evolution of Language (Evolang8), 14-17 April 2010, Utrecht, 2010, pp. 238-245.
[BibTeX]

@InProceedings{Mehler:Weiss:Menke:Luecking:2010,
Author         = {Mehler, Alexander and Wei{\ss}, Petra and Menke, Peter
and Lücking, Andy},
Title          = {Towards a Simulation Model of Dialogical Alignment},
BookTitle      = {Proceedings of the 8th International Conference on the
Evolution of Language (Evolang8), 14-17 April 2010,
Utrecht},
Pages          = {238-245},
website        = {http://www.let.uu.nl/evolang2010.nl/},
year           = 2010
}
• F. Foscarini, Y. Kim, C. A. Lee, A. Mehler, G. Oliver, and S. Ross, "On the Notion of Genre in Digital Preservation," in Automation in Digital Preservation, Dagstuhl, Germany, 2010.
[BibTeX]

@InProceedings{Foscarini:Kim:Lee:Mehler:Oliver:Ross:2010,
Author         = {Foscarini, Fiorella and Kim, Yunhyong and Lee,
Christopher A. and Mehler, Alexander and Oliver,
Gillian and Ross, Seamus},
Title          = {On the Notion of Genre in Digital Preservation},
BookTitle      = {Automation in Digital Preservation},
Editor         = {Chanod, Jean-Pierre and Dobreva, Milena and Rauber,
Andreas and Ross, Seamus},
Number         = {10291},
Series         = {Dagstuhl Seminar Proceedings},
Publisher      = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik,
Germany},
annote         = {Keywords: Digital preservation, genre analysis,
context modeling, diplomatics, information retrieval},
issn           = {1862-4405},
pdf            = {http://drops.dagstuhl.de/opus/volltexte/2010/2763/pdf/10291.MehlerAlexander.Paper.2763.pdf},
website        = {http://drops.dagstuhl.de/opus/volltexte/2010/2763},
year           = 2010
}
• A. Mehler, R. Gleim, U. Waltinger, and N. Diewald, "Time Series of Linguistic Networks by Example of the Patrologia Latina," in Proceedings of INFORMATIK 2010: Service Science, September 27 - October 01, 2010, Leipzig, 2010, pp. 609-616.
[BibTeX]

@InProceedings{Mehler:Gleim:Waltinger:Diewald:2010,
Author         = {Mehler, Alexander and Gleim, Rüdiger and Waltinger,
Ulli and Diewald, Nils},
Title          = {Time Series of Linguistic Networks by Example of the
Patrologia Latina},
BookTitle      = {Proceedings of INFORMATIK 2010: Service Science,
September 27 - October 01, 2010, Leipzig},
Editor         = {F{\"a}hnrich, Klaus-Peter and Franczyk, Bogdan},
Volume         = {2},
Series         = {Lecture Notes in Informatics},
Pages          = {609-616},
Publisher      = {GI},
pdf            = {http://subs.emis.de/LNI/Proceedings/Proceedings176/586.pdf},
year           = 2010
}
• R. Gleim, P. Warner, and A. Mehler, "eHumanities Desktop - An Architecture for Flexible Annotation in Iconographic Research," in Proceedings of the 6th International Conference on Web Information Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia, 2010.
[BibTeX]

@InProceedings{Gleim:Warner:Mehler:2010,
Author         = {Gleim, Rüdiger and Warner, Paul and Mehler, Alexander},
Title          = {eHumanities Desktop - An Architecture for Flexible
Annotation in Iconographic Research},
BookTitle      = {Proceedings of the 6th International Conference on Web
Information Systems and Technologies (WEBIST '10),
April 7-10, 2010, Valencia},
website        = {https://www.researchgate.net/publication/220724277_eHumanities_Desktop_-_An_Architecture_for_Flexible_Annotation_in_Iconographic_Research},
year           = 2010
}
• P. Menke and A. Mehler, "The Ariadne System: A flexible and extensible framework for the modeling and storage of experimental data in the humanities," in Proceedings of LREC 2010, Malta, 2010.
[Abstract] [BibTeX]

This paper introduces the Ariadne Corpus Management                    System. First, the underlying data model is presented                    which enables users to represent and process                    heterogeneous data sets within a single, consistent                    framework. Secondly, a set of automatized procedures is                    described that offers assistance to researchers in                    various data-related use cases. Finally, an approach to                    easy yet powerful data retrieval is introduced in form                    of a specialised querying language for multimodal data.
@InProceedings{Menke:Mehler:2010,
Author         = {Menke, Peter and Mehler, Alexander},
Title          = {The Ariadne System: A flexible and extensible
framework for the modeling and storage of experimental
data in the humanities},
BookTitle      = {Proceedings of LREC 2010},
Publisher      = {ELDA},
abstract       = {This paper introduces the Ariadne Corpus Management
System. First, the underlying data model is presented
which enables users to represent and process
heterogeneous data sets within a single, consistent
framework. Secondly, a set of automatized procedures is
described that offers assistance to researchers in
various data-related use cases. Finally, an approach to
easy yet powerful data retrieval is introduced in form
of a specialised querying language for multimodal data.},
year           = 2010
}
• T. Sutter and A. Mehler, "Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplinären Medienwissenschaft," in Medienwandel als Wandel von Interaktionsformen, T. Sutter and A. Mehler, Eds., Wiesbaden: VS Verlag für Sozialwissenschaften, 2010, pp. 7-16.
[Abstract] [BibTeX]

Die Herausforderung, die der Wandel von                    Kommunikationsmedien für die Medienwissenschaft                    darstellt, resultiert nicht nur aus der ungeheuren                    Beschleunigung des Medienwandels. Die Herausforderung                    stellt sich auch mit der Frage, welches die neuen                    Formen und Strukturen sind, die aus dem Wandel der                    Medien hervorgehen. Rückt man diese Frage in den                    Fokus der Überlegungen, kommen erstens                    Entwicklungen im Wechsel von Massenmedien zu neuen,                    „interaktiven Medien in den Blick. Dies betrifft                    den Wandel von den alten Medien in Form von                    Einwegkommunikation zu den neuen Medien in Form von                    Netzkommunikation. Dieser Wandel wurde in zahlreichen                    Analysen als eine Revolution beschrieben: Im                    Unterschied zur einseitigen, rückkopplungsarmen                    Kommunikationsform der Massenmedien sollen neue,                    computergestützte Formen der Medienkommunikation                    „interaktiv sein, d.h. gesteigerte                    Rückkopplungs- und Eingriffsmöglichkeiten                    für die Adressaten und Nutzer bieten.                    Sozialwissenschaftlich bedeutsam ist dabei die                    Einschätzung der Qualität und des Umfangs                    dieser neuen Möglichkeiten und Leistungen. Denn                    bislang bedeutete Medienwandel im Kern eine zunehmende                    Ausdifferenzierung alter und neuer Medien mit je                    spezifischen Leistungen, d.h. neue Medien ersetzen die                    älteren nicht, sondern sie ergänzen und                    erweitern sie. Allerdings wird im Zuge des aktuellen                    Medienwandels immer deutlicher, dass die neuen Medien                    durchaus imstande sind, die Leistungen massenmedialer                    Verbreitung von Kommunikation zu übernehmen. Stehen                    wir also, wie das schon seit längerem kühn                    vorhergesagt wird, vor der Etablierung eines                    Universalmediums, das in der Lage ist, die Formen und                    Funktionen anderer Medien zu übernehmen?
@InBook{Sutter2010,
Author         = {Sutter, Tilmann and Mehler, Alexander},
Editor         = {Sutter, Tilmann and Mehler, Alexander},
Title          = {Einleitung: Der aktuelle Medienwandel im Blick einer
interdisziplin{\"a}ren Medienwissenschaft},
Pages          = {7--16},
Publisher      = {VS Verlag f{\"u}r Sozialwissenschaften},
abstract       = {Die Herausforderung, die der Wandel von
Kommunikationsmedien f{\"u}r die Medienwissenschaft
darstellt, resultiert nicht nur aus der ungeheuren
Beschleunigung des Medienwandels. Die Herausforderung
stellt sich auch mit der Frage, welches die neuen
Formen und Strukturen sind, die aus dem Wandel der
Medien hervorgehen. R{\"u}ckt man diese Frage in den
Fokus der {\"U}berlegungen, kommen erstens
Entwicklungen im Wechsel von Massenmedien zu neuen,
„interaktiven Medien in den Blick. Dies betrifft
den Wandel von den alten Medien in Form von
Einwegkommunikation zu den neuen Medien in Form von
Netzkommunikation. Dieser Wandel wurde in zahlreichen
Analysen als eine Revolution beschrieben: Im
Unterschied zur einseitigen, r{\"u}ckkopplungsarmen
Kommunikationsform der Massenmedien sollen neue,
computergest{\"u}tzte Formen der Medienkommunikation
„interaktiv sein, d.h. gesteigerte
R{\"u}ckkopplungs- und Eingriffsm{\"o}glichkeiten
f{\"u}r die Adressaten und Nutzer bieten.
Sozialwissenschaftlich bedeutsam ist dabei die
Einsch{\"a}tzung der Qualit{\"a}t und des Umfangs
dieser neuen M{\"o}glichkeiten und Leistungen. Denn
bislang bedeutete Medienwandel im Kern eine zunehmende
Ausdifferenzierung alter und neuer Medien mit je
spezifischen Leistungen, d.h. neue Medien ersetzen die
{\"a}lteren nicht, sondern sie erg{\"a}nzen und
erweitern sie. Allerdings wird im Zuge des aktuellen
Medienwandels immer deutlicher, dass die neuen Medien
durchaus imstande sind, die Leistungen massenmedialer
Verbreitung von Kommunikation zu {\"u}bernehmen. Stehen
wir also, wie das schon seit l{\"a}ngerem k{\"u}hn
vorhergesagt wird, vor der Etablierung eines
Universalmediums, das in der Lage ist, die Formen und
Funktionen anderer Medien zu {\"u}bernehmen?},
booktitle      = {Medienwandel als Wandel von Interaktionsformen},
doi            = {10.1007/978-3-531-92292-8_1},
isbn           = {978-3-531-92292-8},
url            = {https://doi.org/10.1007/978-3-531-92292-8_1},
year           = 2010
}
• S. Eger and I. Sejane, "Computing Semantic Similarity from Bilingual Dictionaries," in Proceedings of the 10th International Conference on the Statistical Analysis of Textual Data (JADT-2010), Rome, Italy, 2010, pp. 1217-1225.
[BibTeX]

@InProceedings{Eger:Sejane:2010,
Author         = {Eger, Steffen and Sejane, Ineta},
Title          = {Computing Semantic Similarity from Bilingual
Dictionaries},
BookTitle      = {Proceedings of the 10th International Conference on
the Statistical Analysis of Textual Data (JADT-2010)},
Pages          = {1217-1225},
year           = 2010
}
• T. vor der Brück and H. Helbig, "Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels," in Proceedings of the Ninth International Conference on Machine Learning and Applications (ICMLA), Washington, D.C., 2010, pp. 243-250.
[Abstract] [BibTeX]

There is a substantial body of work on the extraction                    of relations from texts, most of which is based on                    pattern matching or on applying tree kernel functions                    to syntactic structures. Whereas pattern application is                    usually more efficient, tree kernels can be superior                    when assessed by the F-measure. In this paper, we                    introduce a hybrid approach to extracting meronymy                    relations, which is based on both patterns and kernel                    functions. In a first step, meronymy relation                    hypotheses are extracted from a text corpus by applying                    patterns. In a second step these relation hypotheses                    are validated by using several shallow features and a                    graph kernel approach. In contrast to other meronymy                    extraction and validation methods which are based on                    surface or syntactic representations we use a purely                    semantic approach based on semantic networks. This                    involves analyzing each sentence of the Wikipedia                    corpus by a deep syntactico-semantic parser and                    converting it into a semantic network. Meronymy                    relation hypotheses are extracted from the semantic                    networks by means of an automated theorem prover, which                    employs a set of logical axioms and patterns in the                    form of semantic networks. The meronymy candidates are                    then validated by means of a graph kernel approach                    based on common walks. The evaluation shows that this                    method achieves considerably higher accuracy, recall,                    and F-measure than a method using purely shallow                    validation.
@InProceedings{vor:der:Brueck:Helbig:2010:a,
Author         = {vor der Brück, Tim and Helbig, Hermann},
Title          = {Validating Meronymy Hypotheses with Support Vector
Machines and Graph Kernels},
BookTitle      = {Proceedings of the Ninth International Conference on
Machine Learning and Applications (ICMLA)},
Pages          = {243--250},
Publisher      = {IEEE Press},
abstract       = {There is a substantial body of work on the extraction
of relations from texts, most of which is based on
pattern matching or on applying tree kernel functions
to syntactic structures. Whereas pattern application is
usually more efficient, tree kernels can be superior
when assessed by the F-measure. In this paper, we
introduce a hybrid approach to extracting meronymy
relations, which is based on both patterns and kernel
functions. In a first step, meronymy relation
hypotheses are extracted from a text corpus by applying
patterns. In a second step these relation hypotheses
are validated by using several shallow features and a
graph kernel approach. In contrast to other meronymy
extraction and validation methods which are based on
surface or syntactic representations we use a purely
semantic approach based on semantic networks. This
involves analyzing each sentence of the Wikipedia
corpus by a deep syntactico-semantic parser and
converting it into a semantic network. Meronymy
relation hypotheses are extracted from the semantic
networks by means of an automated theorem prover, which
employs a set of logical axioms and patterns in the
form of semantic networks. The meronymy candidates are
then validated by means of a graph kernel approach
based on common walks. The evaluation shows that this
method achieves considerably higher accuracy, recall,
and F-measure than a method using purely shallow
validation.},
website        = {http://www.computer.org/csdl/proceedings/icmla/2010/4300/00/4300a243-abs.html},
year           = 2010
}

### 2009 (23)

• M. Santini, A. Mehler, and S. Sharoff, "Riding the Rough Waves of Genre on the Web: Concepts and Research Questions," in Genres on the Web: Computational Models and Empirical Studies, A. Mehler, S. Sharoff, and M. Santini, Eds., Berlin/New York: Springer, 2009, pp. 3-32.
[Abstract] [BibTeX]

This chapter outlines the state of the art of empirical and computational webgenre research. First, it highlights why the concept of genre is profitable for a range of disciplines. At the same time, it lists a number of recent interpretations that can inform and influence present and future genre research. Last but not least, it breaks down a series of open issues that relate to the modelling of the concept of webgenre in empirical and computational studies.
@InCollection{Santini:Mehler:Sharoff:2009,
Author         = {Santini, Marina and Mehler, Alexander and Sharoff,
Serge},
Title          = {Riding the Rough Waves of Genre on the Web: Concepts
and Research Questions},
BookTitle      = {Genres on the Web: Computational Models and Empirical
Studies},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Sharoff, Serge and Santini,
Marina},
Pages          = {3-32},
abstract       = {This chapter outlines the state of the art of
empirical and computational webgenre research. First,
it highlights why the concept of genre is profitable
for a range of disciplines. At the same time, it lists
a number of recent interpretations that can inform and
influence present and future genre research. Last but
not least, it breaks down a series of open issues that
relate to the modelling of the concept of webgenre in
empirical and computational studies.},
crossref       = {Genres on the Web: Computational Models and Empirical
Studies},
year           = 2009
}
• A. Mehler, R. Gleim, U. Waltinger, A. Ernst, D. Esch, and T. Feith, "eHumanities Desktop – eine webbasierte Arbeitsumgebung für die geisteswissenschaftliche Fachinformatik," in Proceedings of the Symposium "Sprachtechnologie und eHumanities", 26.–27. Februar, Duisburg-Essen University, 2009.
[BibTeX]

@InProceedings{Mehler:Gleim:Waltinger:Ernst:Esch:Feith:2009,
Author         = {Mehler, Alexander and Gleim, Rüdiger and Waltinger,
Ulli and Ernst, Alexandra and Esch, Dietmar and Feith,
Tobias},
Title          = {eHumanities Desktop – eine webbasierte
Arbeitsumgebung für die geisteswissenschaftliche
Fachinformatik},
BookTitle      = {Proceedings of the Symposium "Sprachtechnologie und
eHumanities", 26.–27. Februar, Duisburg-Essen
University},
website        = {http://duepublico.uni-duisburg-essen.de/servlets/DocumentServlet?id=37041},
year           = 2009
}
• B. Wagner, A. Mehler, C. Wolff, and B. Dotzler, "Bausteine eines Literary Memory Information System (LiMeS) am Beispiel der Kafka-Forschung," in Proceedings of the Symposium "Sprachtechnologie und eHumanities", 26.–27. Februar, Duisburg-Essen University, 2009.
[Abstract] [BibTeX]

In dem Paper beschreiben wir Bausteine eines Literary                    Memory Information System (LiMeS), das die                    literaturwissenschaftliche Erforschung von so genannten                    Matrixtexten – das sind Primärtexte eines                    bestimmten literarischen Gesamtwerks – unter dem                    Blickwinkel großer Mengen so genannter Echotexte                    (Topia 1984; Wagner/Reinhard 2007) – das sind                    Subtexte im Sinne eines literaturwissenschaftlichen                    Intertextualitätsbegriffs – ermöglicht. Den                    Ausgangspunkt dieses computerphilologischen                    Informationssystems bildet ein Text-Mining-Modell                    basierend auf dem Intertextualitätsbegriff in                    Verbindung mit dem Begriff des Semantic Web (Mehler,                    2004b, 2005a, b, Wolff 2005). Wir zeigen, inwiefern                    dieses Modell über bestehende                    Informationssystemarchitekturen hinausgeht und                    schließen einen Brückenschlag zur derzeitigen                    Entwicklung von Arbeitsumgebungen in der                    geisteswissenschaftlichen Fachinformatik in Form eines                    eHumanities Desktop.
@InProceedings{Wagner:Mehler:Wolff:Dotzler:2009,
Author         = {Wagner, Benno and Mehler, Alexander and Wolff,
Christian and Dotzler, Bernhard},
Title          = {Bausteine eines Literary Memory Information System
(LiMeS) am Beispiel der Kafka-Forschung},
BookTitle      = {Proceedings of the Symposium "Sprachtechnologie und
eHumanities", 26.–27. Februar, Duisburg-Essen
University},
abstract       = {In dem Paper beschreiben wir Bausteine eines Literary
Memory Information System (LiMeS), das die
literaturwissenschaftliche Erforschung von so genannten
Matrixtexten – das sind Prim{\"a}rtexte eines
bestimmten literarischen Gesamtwerks – unter dem
Blickwinkel gro{\ss}er Mengen so genannter Echotexte
(Topia 1984; Wagner/Reinhard 2007) – das sind
Subtexte im Sinne eines literaturwissenschaftlichen
Intertextualit{\"a}tsbegriffs – ermöglicht. Den
Ausgangspunkt dieses computerphilologischen
Informationssystems bildet ein Text-Mining-Modell
basierend auf dem Intertextualit{\"a}tsbegriff in
Verbindung mit dem Begriff des Semantic Web (Mehler,
2004b, 2005a, b, Wolff 2005). Wir zeigen, inwiefern
dieses Modell über bestehende
Informationssystemarchitekturen hinausgeht und
schlie{\ss}en einen Brückenschlag zur derzeitigen
Entwicklung von Arbeitsumgebungen in der
geisteswissenschaftlichen Fachinformatik in Form eines
eHumanities Desktop.},
website        = {http://epub.uni-regensburg.de/6795/},
year           = 2009
}
• U. Waltinger, A. Mehler, and A. Wegner, "A Two-Level Approach to Web Genre Classification," in Proceedings of the 5th International Conference on Web Information Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa, 2009.
[Abstract] [BibTeX]

This paper presents an approach of two-level                    categorization of web pages. In contrast to related                    approaches the model additionally explores and                    categorizes functionally and thematically demarcated                    segments of the hypertext types to be categorized. By                    classifying these segments conclusions can be drawn                    about the type of the corresponding compound web                    document.
@InProceedings{Waltinger:Mehler:Wegner:2009,
Author         = {Waltinger, Ulli and Mehler, Alexander and Wegner,
Armin},
Title          = {A Two-Level Approach to Web Genre Classification},
BookTitle      = {Proceedings of the 5th International Conference on Web
Information Systems and Technologies (WEBIST '09),
March 23-26, 2009, Lisboa},
abstract       = {This paper presents an approach of two-level
categorization of web pages. In contrast to related
approaches the model additionally explores and
categorizes functionally and thematically demarcated
segments of the hypertext types to be categorized. By
classifying these segments conclusions can be drawn
about the type of the corresponding compound web
document.},
pdf            = {http://www.ulliwaltinger.de/pdf/Webist_2009_TwoLevel_Genre_Classification_WaltingerMehlerWegner.pdf},
year           = 2009
}
• A. Mehler, "Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types," in Linguistic Modeling of Information and Markup Languages. Contributions to Language Technology, A. Witt and D. Metzing, Eds., Dordrecht: Springer, 2009.
[Abstract] [BibTeX]

In this chapter we develop a representation model of                    web document networks. Based on the notion of uncertain                    web document structures, the model is defined as a                    template which grasps nested manifestation levels of                    hypertext types. Further, we specify the model on the                    conceptual, formal and physical level and exemplify it                    by reconstructing competing web document models.
@InCollection{Mehler:2009:b,
Author         = {Mehler, Alexander},
Title          = {Structure Formation in the Web. A Graph-Theoretical
Model of Hypertext Types},
BookTitle      = {Linguistic Modeling of Information and Markup
Languages. Contributions to Language Technology},
Publisher      = {Springer},
Editor         = {Witt, Andreas and Metzing, Dieter},
Series         = {Text, Speech and Language Technology},
abstract       = {In this chapter we develop a representation model of
web document networks. Based on the notion of uncertain
web document structures, the model is defined as a
template which grasps nested manifestation levels of
hypertext types. Further, we specify the model on the
conceptual, formal and physical level and exemplify it
by reconstructing competing web document models.},
year           = 2009
}
• R. Gleim, A. Mehler, U. Waltinger, and P. Menke, "eHumanities Desktop – An extensible Online System for Corpus Management and Analysis," in 5th Corpus Linguistics Conference, University of Liverpool, 2009.
[Abstract] [BibTeX]

This paper presents the eHumanities Desktop - an                    online system for corpus management and analysis in                    support of computing in the humanities. Design issues                    and the overall architecture are described, as well as                    an outline of the applications offered by the system.
@InProceedings{Gleim:Mehler:Waltinger:Menke:2009,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Waltinger,
Ulli and Menke, Peter},
Title          = {eHumanities Desktop – An extensible Online System
for Corpus Management and Analysis},
BookTitle      = {5th Corpus Linguistics Conference, University of
Liverpool},
abstract       = {This paper presents the eHumanities Desktop - an
online system for corpus management and analysis in
support of computing in the humanities. Design issues
and the overall architecture are described, as well as
an outline of the applications offered by the system.},
pdf            = {http://www.ulliwaltinger.de/pdf/eHumanitiesDesktop-AnExtensibleOnlineSystem-CL2009.pdf},
website        = {http://www.ulliwaltinger.de/ehumanities-desktop-an-extensible-online-system-for-corpus-management-and-analysis/},
year           = 2009
}
• A. Mehler and A. Lücking, "A Structural Model of Semiotic Alignment: The Classification of Multimodal Ensembles as a Novel Machine Learning Task," in Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya, 2009.
[Abstract] [BibTeX]

In addition to the well-known linguistic alignment                    processes in dyadic communication – e.g., phonetic,                    syntactic, semantic alignment – we provide evidence                    for a genuine multimodal alignment process, namely                    semiotic alignment. Communicative elements from                    different modalities 'routinize into' cross-modal                    'super-signs', which we call multimodal ensembles.                    Computational models of human communication are in need                    of expressive models of multimodal ensembles. In this                    paper, we exemplify semiotic alignment by means of                    empirical examples of the building of multimodal                    ensembles. We then propose a graph model of multimodal                    dialogue that is expressive enough to capture                    multimodal ensembles. In line with this model, we                    define a novel task in machine learning with the aim of                    training classifiers that can detect semiotic alignment                    in dialogue. This model is in support of approaches                    which need to gain insights into realistic                    human-machine communication.
@InProceedings{Mehler:Luecking:2009,
Author         = {Mehler, Alexander and Lücking, Andy},
Title          = {A Structural Model of Semiotic Alignment: The
Classification of Multimodal Ensembles as a Novel
BookTitle      = {Proceedings of IEEE Africon 2009, September 23-25,
Nairobi, Kenya},
Publisher      = {IEEE},
abstract       = {In addition to the well-known linguistic alignment
processes in dyadic communication – e.g., phonetic,
syntactic, semantic alignment – we provide evidence
for a genuine multimodal alignment process, namely
semiotic alignment. Communicative elements from
different modalities 'routinize into' cross-modal
'super-signs', which we call multimodal ensembles.
Computational models of human communication are in need
of expressive models of multimodal ensembles. In this
paper, we exemplify semiotic alignment by means of
empirical examples of the building of multimodal
ensembles. We then propose a graph model of multimodal
dialogue that is expressive enough to capture
multimodal ensembles. In line with this model, we
define a novel task in machine learning with the aim of
training classifiers that can detect semiotic alignment
in dialogue. This model is in support of approaches
which need to gain insights into realistic
human-machine communication.},
year           = 2009
}
• A. Mehler, "Generalized Shortest Paths Trees: A Novel Graph Class Applied to Semiotic Networks," in Analysis of Complex Networks: From Biology to Linguistics, M. Dehmer and F. Emmert-Streib, Eds., Weinheim: Wiley-VCH, 2009, pp. 175-220.
[BibTeX]

@InCollection{Mehler:2009:c,
Author         = {Mehler, Alexander},
Title          = {Generalized Shortest Paths Trees: A Novel Graph Class
Applied to Semiotic Networks},
BookTitle      = {Analysis of Complex Networks: From Biology to
Linguistics},
Publisher      = {Wiley-VCH},
Editor         = {Dehmer, Matthias and Emmert-Streib, Frank},
Pages          = {175-220},
website        = {https://www.researchgate.net/publication/255666602_1_Generalised_Shortest_Paths_Trees_A_Novel_Graph_Class_Applied_to_Semiotic_Networks},
year           = 2009
}
• T. vor der Brück and S. Hartrumpf, "A Readability Checker Based on Deep Semantic Indicators," in Human Language Technology. Challenges of the Information Society, Z. Vetulani and H. Uszkoreit, Eds., Berlin, Germany: Springer, 2009, vol. 5603, pp. 232-244.
[Abstract] [BibTeX]

One major reason that readability checkers are still                    far away from judging the understandability of texts                    consists in the fact that no semantic information is                    used. Syntactic, lexical, or morphological information                    can only give limited access for estimating the                    cognitive difficulties for a human being to comprehend                    a text. In this paper however, we present a readability                    checker which uses semantic information in addition.                    This information is represented as semantic networks                    and is derived by a deep syntactico-semantic analysis.                    We investigate in which situations a semantic                    readability indicator can lead to superior results in                    comparison with ordinary surface indicators like                    sentence length. Finally, we compute the weights of our                    semantic indicators in the readability function based                    on the user ratings collected in an online evaluation.
@InCollection{vor:der:Brueck:Hartrumpf:2009,
Author         = {vor der Brück, Tim and Hartrumpf, Sven},
Title          = {A Readability Checker Based on Deep Semantic
Indicators},
BookTitle      = {Human Language Technology. Challenges of the
Information Society},
Publisher      = {Springer},
Editor         = {Zygmunt Vetulani and Hans Uszkoreit},
Volume         = {5603},
Series         = {Lecture Notes in Computer Science (LNCS)},
Pages          = {232--244},
abstract       = {One major reason that readability checkers are still
far away from judging the understandability of texts
consists in the fact that no semantic information is
used. Syntactic, lexical, or morphological information
can only give limited access for estimating the
cognitive difficulties for a human being to comprehend
a text. In this paper however, we present a readability
checker which uses semantic information in addition.
This information is represented as semantic networks
and is derived by a deep syntactico-semantic analysis.
We investigate in which situations a semantic
comparison with ordinary surface indicators like
sentence length. Finally, we compute the weights of our
semantic indicators in the readability function based
on the user ratings collected in an online evaluation.},
website        = {http://rd.springer.com/chapter/10.1007/978-3-642-04235-5_20},
year           = 2009
}
• T. vor der Brück, "Hypernymy Extraction Based on Shallow and Deep Patterns," in From Form To Meaning: Processing Texts Automatically, Proceedings of the Biennial GSCL Conference 2009, Potsdam, Germany, 2009, pp. 41-52.
[Abstract] [BibTeX]

There exist various approaches to construct taxonomies                    by text mining. Usually these approaches are based on                    supervised learning and extract in a first step several                    patterns. These patterns are then applied to previously                    unseen texts and used to recognize hypernym/hyponym                    pairs. Normally these approaches are only based on a                    surface representation or a syntactic tree structure,                    i.e., a constituency or dependency tree derived by a                    syntactical parser. In this work we present an approach                    which, additionally to shallow patterns, directly                    operates on semantic networks which are derived by a                    deep linguistic syntacticosemantic analysis.                    Furthermore, the shallow approach heavily depends on                    semantic information, too. It is shown that recall and                    precision can be improved considerably than by relying                    on shallow patterns alone.
@InProceedings{vor:der:Brueck:2009:b,
Author         = {vor der Brück, Tim},
Title          = {Hypernymy Extraction Based on Shallow and Deep
Patterns},
BookTitle      = {From Form To Meaning: Processing Texts Automatically,
Proceedings of the Biennial GSCL Conference 2009},
Editor         = {Christian Chiarcos and Richard Eckart de Castilho},
Pages          = {41--52},
abstract       = {There exist various approaches to construct taxonomies
by text mining. Usually these approaches are based on
supervised learning and extract in a first step several
patterns. These patterns are then applied to previously
unseen texts and used to recognize hypernym/hyponym
pairs. Normally these approaches are only based on a
surface representation or a syntactic tree structure,
i.e., a constituency or dependency tree derived by a
syntactical parser. In this work we present an approach
which, additionally to shallow patterns, directly
operates on semantic networks which are derived by a
deep linguistic syntacticosemantic analysis.
Furthermore, the shallow approach heavily depends on
semantic information, too. It is shown that recall and
precision can be improved considerably than by relying
on shallow patterns alone.},
year           = 2009
}
• G. Bouma, S. Duarte, and M. Z. Islam, "Cross-lingual Alignment and Completion of Wikipedia Templates," in Third International Workshop on Cross Lingual Information Access: Addressing the Information Need of Multilingual Societies (CLIAWS3), Boulder, Colorado, USA, June 4, 2009.
[Abstract] [BibTeX]

For many languages, the size of Wikipedia is an order                    of magnitude smaller than the English Wikipedia. We                    present a method for cross-lingual alignment of                    template and infobox attributes in Wikipedia. The                    alignment is used to add and complete templates and                    infoboxes in one language with information derived from                    Wikipedia in another language. We show that alignment                    between English and Dutch Wikipedia is accurate and                    that the result can be used to expand the number of                    template attribute-value pairs in Dutch Wikipedia by                    50%. Furthermore, the alignment provides valuable                    information for normalization of template and attribute                    names and can be used to detect potential                    inconsistencies
@InProceedings{Bouma:Duarte:Zahurul:2009,
Author         = {Bouma, Gosse and Duarte, Sergio and Islam, Md. Zahurul},
Title          = {Cross-lingual Alignment and Completion of Wikipedia
Templates},
BookTitle      = {Third International Workshop on Cross Lingual
Information Access: Addressing the Information Need of
USA, June 4},
abstract       = {For many languages, the size of Wikipedia is an order
of magnitude smaller than the English Wikipedia. We
present a method for cross-lingual alignment of
template and infobox attributes in Wikipedia. The
alignment is used to add and complete templates and
infoboxes in one language with information derived from
Wikipedia in another language. We show that alignment
between English and Dutch Wikipedia is accurate and
that the result can be used to expand the number of
template attribute-value pairs in Dutch Wikipedia by
50%. Furthermore, the alignment provides valuable
information for normalization of template and attribute
names and can be used to detect potential
inconsistencies},
owner          = {zahurul},
timestamp      = {2011.08.02},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.148.1418},
year           = 2009
}
• U. Waltinger, "Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics," in Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya, 2009.
[BibTeX]

@InProceedings{Waltinger:2009:a,
Author         = {Waltinger, Ulli},
Title          = {Polarity Reinforcement: Sentiment Polarity
Identification By Means Of Social Semantics},
BookTitle      = {Proceedings of the IEEE Africon 2009, September 23-25,
Nairobi, Kenya},
date_0         = {2009},
pdf            = {http://www.ulliwaltinger.de/pdf/AfriconIEEE_2009_SentimentPolarity_Waltinger.pdf},
website        = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5308104},
year           = 2009
}
• U. Waltinger, I. Cramer, and T. Wandmacher, "From Social Networks To Distributional Properties: A Comparative Study On Computing Semantic Relatedness," in Proceedings of the 31th Annual Conference of the Cognitive Science Society, Austin, TX, 2009, pp. 3016-3021.
[BibTeX]

@InProceedings{Waltinger:Cramer:Wandmacher:2009:a,
Author         = {Waltinger, Ulli and Cramer, Irene and Wandmacher,
Tonio},
Title          = {From Social Networks To Distributional Properties: A
Comparative Study On Computing Semantic Relatedness},
BookTitle      = {Proceedings of the 31th Annual Conference of the
Cognitive Science Society},
Editor         = {Taatgen, N.A. and van Rijn, H.},
Pages          = {3016-3021},
Publisher      = {Cognitive Science Society},
date_0         = {2009},
pdf            = {http://csjarchive.cogsci.rpi.edu/proceedings/2009/papers/661/paper661.pdf},
year           = 2009
}
• U. Waltinger, "Polarity Reinforcement: Sentiment Polarity Identification By Means Of Social Semantics," in Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya, 2009.
[BibTeX]

@InProceedings{Waltinger:2009:b,
Author         = {Waltinger, Ulli},
Title          = {Polarity Reinforcement: Sentiment Polarity
Identification By Means Of Social Semantics},
BookTitle      = {Proceedings of the IEEE Africon 2009, September 23-25,
Nairobi, Kenya},
date_0         = {2009},
year           = 2009
}
• U. Waltinger, I. Cramer, and T. Wandmacher, "From Social Networks To Distributional Properties: A Comparative Study On Computing Semantic Relatedness," in Proceedings of the 31th Annual Conference of the Cognitive Science Society, Austin, TX, 2009, pp. 3016-3021.
[BibTeX]

@InProceedings{Waltinger:Cramer:Wandmacher:2009:b,
Author         = {Waltinger, Ulli and Cramer, Irene and Wandmacher,
Tonio},
Title          = {From Social Networks To Distributional Properties: A
Comparative Study On Computing Semantic Relatedness},
BookTitle      = {Proceedings of the 31th Annual Conference of the
Cognitive Science Society},
Editor         = {N.A. Taatgen and H. van Rijn},
Pages          = {3016-3021},
Publisher      = {Cognitive Science Society},
date_0         = {2009},
year           = 2009
}
• A. Mehler and U. Waltinger, "Enhancing Document Modeling by Means of Open Topic Models: Crossing the Frontier of Classification Schemes in Digital Libraries by Example of the DDC," Library Hi Tech, vol. 27, iss. 4, pp. 520-539, 2009.
[Abstract] [BibTeX]

Purpose: We present a topic classification model using                    the Dewey Decimal Classification (DDC) as the target                    scheme. This is done by exploring metadata as provided                    by the Open Archives Initiative (OAI) to derive                    document snippets as minimal document representations.                    The reason is to reduce the effort of document                    processing in digital libraries. Further, we perform                    feature selection and extension by means of social                    ontologies and related web-based lexical resources.                    This is done to provide reliable topic-related                    classifications while circumventing the problem of data                    sparseness. Finally, we evaluate our model by means of                    two language-specific corpora. This paper bridges                    digital libraries on the one hand and computational                    linguistics on the other. The aim is to make accessible                    computational linguistic methods to provide thematic                    classifications in digital libraries based on closed                    topic models as the DDC. Design/methodology/approach:                    text classification, text-technology, computational                    linguistics, computational semantics, social semantics.                    Findings: We show that SVM-based classifiers perform                    best by exploring certain selections of OAI document                    metadata. Research limitations/implications: The                    findings show that it is necessary to further develop                    SVM-based DDC-classifiers by using larger training sets                    possibly for more than two languages in order to get                    better F-measure values. Practical implications: We can                    show that DDC-classifications come into reach which                    primarily explore OAI metadata. Originality/value: We                    provide algorithmic and formal-mathematical information                    how to build DDC-classifiers for digital libraries.
@Article{Mehler:Waltinger:2009:b,
Author         = {Mehler, Alexander and Waltinger, Ulli},
Title          = {Enhancing Document Modeling by Means of Open Topic
Models: Crossing the Frontier of Classification Schemes
in Digital Libraries by Example of the DDC},
Journal        = {Library Hi Tech},
Volume         = {27},
Number         = {4},
Pages          = {520-539},
abstract       = {Purpose: We present a topic classification model using
the Dewey Decimal Classification (DDC) as the target
scheme. This is done by exploring metadata as provided
by the Open Archives Initiative (OAI) to derive
document snippets as minimal document representations.
The reason is to reduce the effort of document
processing in digital libraries. Further, we perform
feature selection and extension by means of social
ontologies and related web-based lexical resources.
This is done to provide reliable topic-related
classifications while circumventing the problem of data
sparseness. Finally, we evaluate our model by means of
two language-specific corpora. This paper bridges
digital libraries on the one hand and computational
linguistics on the other. The aim is to make accessible
computational linguistic methods to provide thematic
classifications in digital libraries based on closed
topic models as the DDC. Design/methodology/approach:
text classification, text-technology, computational
linguistics, computational semantics, social semantics.
Findings: We show that SVM-based classifiers perform
best by exploring certain selections of OAI document
findings show that it is necessary to further develop
SVM-based DDC-classifiers by using larger training sets
possibly for more than two languages in order to get
better F-measure values. Practical implications: We can
show that DDC-classifications come into reach which
primarily explore OAI metadata. Originality/value: We
provide algorithmic and formal-mathematical information
how to build DDC-classifiers for digital libraries.},
website        = {http://biecoll.ub.uni-bielefeld.de/frontdoor.php?source_opus=5001&la=de},
year           = 2009
}
• R. Gleim, U. Waltinger, A. Ernst, A. Mehler, D. Esch, and T. Feith, "The eHumanities Desktop – An Online System for Corpus Management and Analysis in Support of Computing in the Humanities," in Proceedings of the Demonstrations Session of the 12th Conference of the European Chapter of the Association for Computational Linguistics EACL 2009, 30 March – 3 April, Athens, 2009.
[BibTeX]

@InProceedings{Gleim:Waltinger:Ernst:Mehler:Esch:Feith:2009,
Author         = {Gleim, Rüdiger and Waltinger, Ulli and Ernst,
Alexandra and Mehler, Alexander and Esch, Dietmar and
Feith, Tobias},
Title          = {The eHumanities Desktop – An Online System for
Corpus Management and Analysis in Support of Computing
in the Humanities},
BookTitle      = {Proceedings of the Demonstrations Session of the 12th
Conference of the European Chapter of the Association
for Computational Linguistics EACL 2009, 30 March – 3
April, Athens},
year           = 2009
}
• A. Mehler, "Artifizielle Interaktivität. Eine semiotische Betrachtung," in Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen zum Web 2.0, T. Sutter and A. Mehler, Eds., Wiesbaden: VS, 2009.
[BibTeX]

@InCollection{Mehler:2009:d,
Author         = {Mehler, Alexander},
Title          = {Artifizielle Interaktivit{\"a}t. Eine semiotische
Betrachtung},
BookTitle      = {Medienwandel als Wandel von Interaktionsformen – von
frühen Medienkulturen zum Web 2.0},
Publisher      = {VS},
Editor         = {Sutter, Tilmann and Mehler, Alexander},
year           = 2009
}
• U. Waltinger and A. Mehler, "The Feature Difference Coefficient: Classification by Means of Feature Distributions," in Proceedings of the Conference on Text Mining Services (TMS 2009), Leipzig, 2009, p. 159–168.
[BibTeX]

@InProceedings{Waltinger:Mehler:2009:a,
Author         = {Waltinger, Ulli and Mehler, Alexander},
Title          = {The Feature Difference Coefficient: Classification by
Means of Feature Distributions},
BookTitle      = {Proceedings of the Conference on Text Mining Services
(TMS 2009)},
Series         = {Leipziger Beitr{\"a}ge zur Informatik: Band XIV},
Pages          = {159–168},
Publisher      = {Leipzig University},
year           = 2009
}
• M. Santini, G. Rehm, S. Sharoff, and A. Mehler, Automatic Genre Identification: Issues and Prospects, M. Santini, G. Rehm, S. Sharoff, and A. Mehler, Eds., GSCL, 2009, vol. 24(1).
[BibTeX]

@Book{Santini:Rehm:Sharoff:Mehler:2009,
Author         = {Santini, Marina and Rehm, Georg and Sharoff, Serge and
Mehler, Alexander},
Editor         = {Santini, Marina and Rehm, Georg and Sharoff, Serge and
Mehler, Alexander},
Title          = {Automatic Genre Identification: Issues and Prospects},
Publisher      = {GSCL},
Volume         = {24(1)},
Series         = {Journal for Language Technology and Computational
Linguistics (JLCL)},
pagetotal      = {148},
pdf            = {http://www.jlcl.org/2009_Heft1/JLCL24(1).pdf},
year           = 2009
}
• U. Waltinger, A. Mehler, and R. Gleim, "Social Semantics And Its Evaluation By Means of Closed Topic Models: An SVM-Classification Approach Using Semantic Feature Replacement By Topic Generalization," in Proceedings of the Biennial GSCL Conference 2009, September 30 – October 2, Universität Potsdam, 2009.
[BibTeX]

@InProceedings{Waltinger:Mehler:Gleim:2009:a,
Author         = {Waltinger, Ulli and Mehler, Alexander and Gleim,
Rüdiger},
Title          = {Social Semantics And Its Evaluation By Means of Closed
Topic Models: An SVM-Classification Approach Using
Semantic Feature Replacement By Topic Generalization},
BookTitle      = {Proceedings of the Biennial GSCL Conference 2009,
September 30 – October 2, Universit{\"a}t Potsdam},
year           = 2009
}
• U. Waltinger and A. Mehler, "Social Semantics and Its Evaluation By Means Of Semantic Relatedness And Open Topic Models," in IEEE/WIC/ACM International Conference on Web Intelligence, September 15–18, Milano, 2009.
[Abstract] [BibTeX]

This paper presents an approach using social semantics                    for the task of topic labelling by means of Open Topic                    Models. Our approach utilizes a social ontology to                    create an alignment of documents within a social                    network. Comprised category information is used to                    compute a topic generalization. We propose a                    feature-frequency-based method for measuring semantic                    relatedness which is needed in order to reduce the                    number of document features for the task of topic                    labelling. This method is evaluated against multiple                    human judgement experiments comprising two languages                    and three different resources. Overall the results show                    that social ontologies provide a rich source of                    terminological knowledge. The performance of the                    semantic relatedness measure with correlation values of                    up to .77 are quite promising. Results on the topic                    labelling experiment show, with an accuracy of up to                    .79, that our approach can be a valuable method for                    various NLP applications.
@InProceedings{Waltinger:Mehler:2009:c,
Author         = {Waltinger, Ulli and Mehler, Alexander},
Title          = {Social Semantics and Its Evaluation By Means Of
Semantic Relatedness And Open Topic Models},
BookTitle      = {IEEE/WIC/ACM International Conference on Web
Intelligence, September 15–18, Milano},
abstract       = {This paper presents an approach using social semantics
for the task of topic labelling by means of Open Topic
Models. Our approach utilizes a social ontology to
create an alignment of documents within a social
network. Comprised category information is used to
compute a topic generalization. We propose a
feature-frequency-based method for measuring semantic
relatedness which is needed in order to reduce the
number of document features for the task of topic
labelling. This method is evaluated against multiple
human judgement experiments comprising two languages
and three different resources. Overall the results show
that social ontologies provide a rich source of
terminological knowledge. The performance of the
semantic relatedness measure with correlation values of
up to .77 are quite promising. Results on the topic
labelling experiment show, with an accuracy of up to
.79, that our approach can be a valuable method for
various NLP applications.},
website        = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5284920&abstractAccess=no&userType=inst},
year           = 2009
}
• T. vor der Brück, "Approximation of the Parameters of a Readability Formula by Robust Regression," in Machine Learning and Data Mining in Pattern recognition: Poster Proceedings of the International Conference on Machine Learning and Data Mining (MLDM), Leipzig, Germany, 2009, pp. 115-125.
[Abstract] [BibTeX]

Most readability formulas calculate a global                    readability score by combining several indicator values                    by a linear combination. Typical indicators are Average                    sentence length, Average number of syllables per word,                    etc. Usually the parameters of the linear combination                    are determined by a linear OLS (ordinary least square                    estimation) minimizing the sum of the squared residuals                    in comparison with human ratings for a given set of                    texts. The usage of OLS leads to several drawbacks.                    First, the parameters are not constraint in any way and                    are therefore not intuitive and difficult to interpret.                    Second, if the number of parameters become large, the                    effect of overfitting easily occurs. Finally, OLS is                    quite sensitive to outliers. Therefore, an alternative                    method is presented which avoids these drawbacks and is                    based on robust regression.
@InProceedings{vor:der:Brueck:2009,
Author         = {vor der Brück, Tim},
Title          = {Approximation of the Parameters of a Readability
Formula by Robust Regression},
BookTitle      = {Machine Learning and Data Mining in Pattern
recognition: Poster Proceedings of the International
Conference on Machine Learning and Data Mining (MLDM)},
Pages          = {115--125},
abstract       = {Most readability formulas calculate a global
readability score by combining several indicator values
by a linear combination. Typical indicators are Average
sentence length, Average number of syllables per word,
etc. Usually the parameters of the linear combination
are determined by a linear OLS (ordinary least square
estimation) minimizing the sum of the squared residuals
in comparison with human ratings for a given set of
texts. The usage of OLS leads to several drawbacks.
First, the parameters are not constraint in any way and
are therefore not intuitive and difficult to interpret.
Second, if the number of parameters become large, the
effect of overfitting easily occurs. Finally, OLS is
quite sensitive to outliers. Therefore, an alternative
method is presented which avoids these drawbacks and is
based on robust regression.},
year           = 2009
}

### 2008 (21)

• M. Stührenberg, M. Beißwenger, K. Kühnberger, A. Mehler, H. Lüngen, D. Metzing, and U. Mönnich, "Sustainability of Text-Technological Resources," in Proceedings of the Post LREC-2008 Workshop: Sustainability of Language Resources and Tools for Natural Language Processing Marrakech, Morocco, 2008.
[Abstract] [BibTeX]

We consider that there are obvious relationships                    between research on sustainability of language and                    linguistic resources on the one hand and work                    undertaken in the Research Unit 'Text-Technological                    Modelling of Information' on the other. Currently the                    main focus in sustainability research is concerned with                    archiving methods of textual resources, i.e. methods                    for sustainability of primary and secondary data; these                    aspects are addressed in our work as well. However, we                    believe that there are additional certain aspects of                    sustainability on which new light is shed on by                    procedures, algorithms and dynamic processes undertaken                    in our Research Unit
@InProceedings{Stuehrenberg:Beisswenger:Kuehnberger:Mehler:Luengen:Metzing:Moennich:2008,
Author         = {Stührenberg, Maik and Bei{\ss}wenger, Michael and
Kühnberger, Kai-Uwe and Mehler, Alexander and Lüngen,
Harald and Metzing, Dieter and Mönnich, Uwe},
Title          = {Sustainability of Text-Technological Resources},
BookTitle      = {Proceedings of the Post LREC-2008 Workshop:
Sustainability of Language Resources and Tools for
Natural Language Processing Marrakech, Morocco},
abstract       = {We consider that there are obvious relationships
between research on sustainability of language and
linguistic resources on the one hand and work
undertaken in the Research Unit 'Text-Technological
Modelling of Information' on the other. Currently the
main focus in sustainability research is concerned with
archiving methods of textual resources, i.e. methods
for sustainability of primary and secondary data; these
aspects are addressed in our work as well. However, we
believe that there are additional certain aspects of
sustainability on which new light is shed on by
procedures, algorithms and dynamic processes undertaken
in our Research Unit},
pdf            = {http://www.michael-beisswenger.de/pub/lrec-sustainability.pdf},
year           = 2008
}
• A. Mehler, B. Job, P. Blanchard, and H. Eikmeyer, "Sprachliche Netzwerke," in Netzwerkanalyse und Netzwerktheorie, C. Stegbauer, Ed., Wiesbaden: VS, 2008, pp. 413-427.
[Abstract] [BibTeX]

In diesem Kapitel beschreiben wir so genannte                    sprachliche Netzwerke. Dabei handelt es sich um                    Netzwerke sprachlicher Einheiten, die in Zusammenhang                    mit ihrer Einbettung in das Netzwerk jener                    Sprachgemeinschaft analysiert werden, welche diese                    Einheiten und deren Vernetzung hervorgebracht hat. Wir                    erörtern ein Dreistufenmodell zur Analyse solcher                    Netzwerke und exemplifizieren dieses Modell anhand                    mehrerer Spezialwikis. Ein Hauptaugenmerk des Kapitels                    liegt dabei auf einem Mehrebenennetzwerkmodell, und                    zwar in Abkehr von den unipartiten Graphmodellen der                    Theorie komplexer Netzwerke.
@InCollection{Mehler:Job:Blanchard:Eikmeyer:2008,
Author         = {Mehler, Alexander and Job, Barbara and Blanchard,
Philippe and Eikmeyer, Hans-Jürgen},
Title          = {Sprachliche Netzwerke},
BookTitle      = {Netzwerkanalyse und Netzwerktheorie},
Publisher      = {VS},
Editor         = {Stegbauer, Christian},
Pages          = {413-427},
abstract       = {In diesem Kapitel beschreiben wir so genannte
sprachliche Netzwerke. Dabei handelt es sich um
Netzwerke sprachlicher Einheiten, die in Zusammenhang
mit ihrer Einbettung in das Netzwerk jener
Sprachgemeinschaft analysiert werden, welche diese
Einheiten und deren Vernetzung hervorgebracht hat. Wir
erörtern ein Dreistufenmodell zur Analyse solcher
Netzwerke und exemplifizieren dieses Modell anhand
mehrerer Spezialwikis. Ein Hauptaugenmerk des Kapitels
liegt dabei auf einem Mehrebenennetzwerkmodell, und
zwar in Abkehr von den unipartiten Graphmodellen der
Theorie komplexer Netzwerke.},
year           = 2008
}
• O. Abramov, A. Mehler, and R. Gleim, "A Unified Database of Dependency Treebanks. Integrating, Quantifying and Evaluating Dependency Data," in Proceedings of the 6th Language Resources and Evaluation Conference (LREC 2008), Marrakech (Morocco), 2008.
[Abstract] [BibTeX]

This paper describes a database of 11 dependency                    treebanks which were unified by means of a                    two-dimensional graph format. The format was evaluated                    with respect to storage-complexity on the one hand, and                    efficiency of data access on the other hand. An example                    of how the treebanks can be integrated within a unique                    interface is given by means of the DTDB interface. 
@InProceedings{Pustylnikov:Mehler:Gleim:2008,
Author         = {Abramov, Olga and Mehler, Alexander and Gleim,
Rüdiger},
Title          = {A Unified Database of Dependency Treebanks.
Integrating, Quantifying and Evaluating Dependency Data},
BookTitle      = {Proceedings of the 6th Language Resources and
Evaluation Conference (LREC 2008), Marrakech (Morocco)},
abstract       = {This paper describes a database of 11 dependency
treebanks which were unified by means of a
two-dimensional graph format. The format was evaluated
with respect to storage-complexity on the one hand, and
efficiency of data access on the other hand. An example
of how the treebanks can be integrated within a unique
interface is given by means of the DTDB interface. },
pdf            = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/LREC08_full.pdf},
year           = 2008
}
• A. Mehler, "Structural Similarities of Complex Networks: A Computational Model by Example of Wiki Graphs," Applied Artificial Intelligence, vol. 22, iss. 7&8, p. 619–683, 2008.
[Abstract] [BibTeX]

This article elaborates a framework for representing                    and classifying large complex networks by example of                    wiki graphs. By means of this framework we reliably                    measure the similarity of document, agent, and word                    networks by solely regarding their topology. In doing                    so, the article departs from classical approaches to                    complex network theory which focuses on topological                    characteristics in order to check their small world                    property. This does not only include characteristics                    that have been studied in complex network theory, but                    also some of those which were invented in social                    network analysis and hypertext theory. We show that                    network classifications come into reach which go beyond                    the hypertext structures traditionally analyzed in web                    mining. The reason is that we focus on networks as a                    whole as units to be classified—above the level of                    websites and their constitutive pages. As a                    consequence, we bridge classical approaches to text and                    web mining on the one hand and complex network theory                    on the other hand. Last but not least, this approach                    also provides a framework for quantifying the                    linguistic notion of intertextuality.
@Article{Mehler:2008:a,
Author         = {Mehler, Alexander},
Title          = {Structural Similarities of Complex Networks: A
Computational Model by Example of Wiki Graphs},
Journal        = {Applied Artificial Intelligence},
Volume         = {22},
Number         = {7\&8},
Pages          = {619–683},
and classifying large complex networks by example of
wiki graphs. By means of this framework we reliably
measure the similarity of document, agent, and word
networks by solely regarding their topology. In doing
so, the article departs from classical approaches to
complex network theory which focuses on topological
characteristics in order to check their small world
property. This does not only include characteristics
that have been studied in complex network theory, but
also some of those which were invented in social
network analysis and hypertext theory. We show that
network classifications come into reach which go beyond
the hypertext structures traditionally analyzed in web
mining. The reason is that we focus on networks as a
whole as units to be classified—above the level of
websites and their constitutive pages. As a
consequence, we bridge classical approaches to text and
web mining on the one hand and complex network theory
on the other hand. Last but not least, this approach
also provides a framework for quantifying the
linguistic notion of intertextuality.},
doi            = {10.1080/08839510802164085},
website        = {https://www.researchgate.net/publication/200772675_Structural_similarities_of_complex_networks_A_computational_model_by_example_of_wiki_graphs},
year           = 2008
}
• A. Mehler, Lexical-Semantic Resources in Automated Discourse Analysis, H. Lüngen, A. Mehler, and A. Storrer, Eds., GSCL, 2008, vol. 23(2).
[BibTeX]

@Book{Luengen:Mehler:Storrer:2008:a,
Author         = {Mehler, Alexander},
Editor         = {Lüngen, Harald and Mehler, Alexander and Storrer,
Angelika},
Title          = {Lexical-Semantic Resources in Automated Discourse
Analysis},
Publisher      = {GSCL},
Volume         = {23(2)},
Series         = {Journal for Language Technology and Computational
Linguistics (JLCL)},
pagetotal      = {111},
pdf            = {{http://www.jlcl.org/2008_Heft2/JLCL23(2).pdf}},
website        = {https://www.researchgate.net/publication/228956889_Lexical-Semantic_Resources_in_Automated_Discourse_Analysis},
year           = 2008
}
• A. Mehler, "Large Text Networks as an Object of Corpus Linguistic Studies," in Corpus Linguistics. An International Handbook of the Science of Language and Society, A. Lüdeling and M. Kytö, Eds., Berlin/New York: De Gruyter, 2008, p. 328–382.
[BibTeX]

@InCollection{Mehler:2008:b,
Author         = {Mehler, Alexander},
Title          = {Large Text Networks as an Object of Corpus Linguistic
Studies},
BookTitle      = {Corpus Linguistics. An International Handbook of the
Science of Language and Society},
Publisher      = {De Gruyter},
Editor         = {Lüdeling, Anke and Kytö, Merja},
Pages          = {328–382},
year           = 2008
}
• T. vor der Brück and H. Stenzhorn, "A Dynamic Approach for Automatic Error Detection in Generation Grammars," in Proceedings of the 18th European Conference on Artificial Intelligence (ECAI), Patras, Greece, 2008.
[Abstract] [BibTeX]

In any real world application scenario, natural                    language generation (NLG) systems have to employ                    grammars consisting of tremendous amounts of rules.                    Detecting and fixing errors in such grammars is                    therefore a highly tedious task. In this work we                    present a data mining algorithm which deduces incorrect                    grammar rules by abductive reasoning out of positive                    and negative training examples. More specifcally, the                    constituency trees belonging to successful generation                    processes and the incomplete trees of failed ones are                    analyzed. From this a quality score is derived for each                    grammar rule by analyzing the occurrences of the rules                    in the trees and by spotting the exact error locations                    in the incomplete trees. In prior work on automatic                    error detection v.d.Brück et al. [5] proposed a static                    error detection algorithm for generation grammars. The                    approach of Cussens et al. creates missing grammar                    rules for parsing using abduction [1]. Zeller                    introduced a dynamic approach in the related area of                    detecting errors in computer programs [6].
@InProceedings{vor:der:Brueck:Stenzhorn:2008,
Author         = {vor der Brück, Tim and Stenzhorn, Holger},
Title          = {A Dynamic Approach for Automatic Error Detection in
Generation Grammars},
BookTitle      = {Proceedings of the 18th European Conference on
Artificial Intelligence (ECAI)},
abstract       = {In any real world application scenario, natural
language generation (NLG) systems have to employ
grammars consisting of tremendous amounts of rules.
Detecting and fixing errors in such grammars is
therefore a highly tedious task. In this work we
present a data mining algorithm which deduces incorrect
grammar rules by abductive reasoning out of positive
and negative training examples. More specifcally, the
constituency trees belonging to successful generation
processes and the incomplete trees of failed ones are
analyzed. From this a quality score is derived for each
grammar rule by analyzing the occurrences of the rules
in the trees and by spotting the exact error locations
in the incomplete trees. In prior work on automatic
error detection v.d.Brück et al. [5] proposed a static
error detection algorithm for generation grammars. The
approach of Cussens et al. creates missing grammar
rules for parsing using abduction [1]. Zeller
introduced a dynamic approach in the related area of
detecting errors in computer programs [6].},
isbn           = {978-1-58603-891-5},
month          = {July},
year           = 2008
}
• T. vor der Brück, S. Hartrumpf, and H. Helbig, "A Readability Checker with Supervised Learning using Deep Syntactic and Semantic Indicators," in Proceedings of the 11th International Multiconference: Information Society - IS 2008 - Language Technologies, Ljubljana, Slovenia, 2008, pp. 92-97.
[Abstract] [BibTeX]

Checking for readability or simplicity of texts is                    important for many institutional and individual users.                    Formulas for approximately measuring text readability                    have a long tradition. Usually, they exploit                    surfaceoriented indicators like sentence length, word                    length, word frequency, etc. However, in many cases,                    this information is not adequate to realistically                    approximate the cognitive difficulties a person can                    have to understand a text. Therefore we use deep                    syntactic and semantic indicators in addition. The                    syntactic information is represented by a dependency                    tree, the semantic information by a semantic network.                    Both representations are automatically generated by a                    deep syntactico-semantic analysis. A global readability                    score is determined by applying a nearest neighbor                    algorithm on 3,000 ratings of 300 test persons. The                    evaluation showed that the deep syntactic and semantic                    indicators lead to promising results comparable to the                    best surface-based indicators. The combination of deep                    and shallow indicators leads to an improvement over                    shallow indicators alone. Finally, a graphical user                    interface was developed which highlights difficult                    passages, depending on the individual indicator values,                    and displays a global readability score. Povzetek:                    Strojno učenje z odvisnostnimi drevesi je uporabljeno                    za ugotavljanje berljivosti besedil. 1
@InProceedings{vor:der:Brueck:Hartrumpf:Helbig:2008:a,
Author         = {vor der Brück, Tim and Hartrumpf, Sven and Helbig,
Hermann},
Title          = {A Readability Checker with Supervised Learning using
Deep Syntactic and Semantic Indicators},
BookTitle      = {Proceedings of the 11th International Multiconference:
Information Society - IS 2008 - Language Technologies},
Editor         = {Erjavec, Tomaž and Gros, Jerneja Žganec},
Pages          = {92--97},
abstract       = {Checking for readability or simplicity of texts is
important for many institutional and individual users.
Formulas for approximately measuring text readability
have a long tradition. Usually, they exploit
surfaceoriented indicators like sentence length, word
length, word frequency, etc. However, in many cases,
this information is not adequate to realistically
approximate the cognitive difficulties a person can
have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The
syntactic information is represented by a dependency
tree, the semantic information by a semantic network.
Both representations are automatically generated by a
deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor
algorithm on 3,000 ratings of 300 test persons. The
evaluation showed that the deep syntactic and semantic
indicators lead to promising results comparable to the
best surface-based indicators. The combination of deep
and shallow indicators leads to an improvement over
shallow indicators alone. Finally, a graphical user
interface was developed which highlights difficult
passages, depending on the individual indicator values,
and displays a global readability score. Povzetek:
Strojno učenje z odvisnostnimi drevesi je uporabljeno
za ugotavljanje berljivosti besedil. 1},
isbn           = {987-961-264-006-4},
month          = {October},
url            = {http://pi7.fernuni-hagen.de/brueck/papers/brueck_hartrumpf_helbig08.pdf},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.5878},
year           = 2008
}
• T. vor der Brück, S. Hartrumpf, and H. Helbig, "A Readability Checker with Supervised Learning using Deep Indicators," Informatica, vol. 32, iss. 4, pp. 429-435, 2008.
[Abstract] [BibTeX]

Checking for readability or simplicity of texts is                    important for many institutional and individual users.                    Formulas for approximately measuring text readability                    have a long tradition. Usually, they exploit                    surface-oriented indicators like sentence length, word                    length, word frequency, etc. However, in many cases,                    this information is not adequate to realistically                    approximate the cognitive difficulties a person can                    have to understand a text. Therefore we use deep                    syntactic and semantic indicators in addition. The                    syntactic information is represented by a dependency                    tree, the semantic information by a semantic network.                    Both representations are automatically generated by a                    deep syntactico-semantic analysis. A global readability                    score is determined by applying a nearest neighbor                    algorithm on 3,000 ratings of 300 test persons. The                    evaluation showed that the deep syntactic and semantic                    indicators lead to promising results comparable to the                    best surface-based indicators. The combination of deep                    and shallow indicators leads to an improvement over                    shallow indicators alone. Finally, a graphical user                    interface was developed which highlights difficult                    passages, depending on the individual indicator values,                    and displays a global readability score.
@Article{vor:der:Brueck:Hartrumpf:Helbig:2008:b,
Author         = {vor der Brück, Tim and Hartrumpf, Sven and Helbig,
Hermann},
Title          = {A Readability Checker with Supervised Learning using
Deep Indicators},
Journal        = {Informatica},
Volume         = {32},
Number         = {4},
Pages          = {429--435},
abstract       = {Checking for readability or simplicity of texts is
important for many institutional and individual users.
Formulas for approximately measuring text readability
have a long tradition. Usually, they exploit
surface-oriented indicators like sentence length, word
length, word frequency, etc. However, in many cases,
this information is not adequate to realistically
approximate the cognitive difficulties a person can
have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The
syntactic information is represented by a dependency
tree, the semantic information by a semantic network.
Both representations are automatically generated by a
deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor
algorithm on 3,000 ratings of 300 test persons. The
evaluation showed that the deep syntactic and semantic
indicators lead to promising results comparable to the
best surface-based indicators. The combination of deep
and shallow indicators leads to an improvement over
shallow indicators alone. Finally, a graphical user
interface was developed which highlights difficult
passages, depending on the individual indicator values,
and displays a global readability score.},
year           = 2008
}
• O. Pustylnikov and A. Mehler, "Text classification by means of structural features. What kind of information about texts is captured by their structure?," in Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia, 2008.
[BibTeX]

@InProceedings{Pustylnikov:Mehler:2008:c,
Author         = {Pustylnikov, Olga and Mehler, Alexander},
Title          = {Text classification by means of structural features.
What kind of information about texts is captured by
their structure?},
BookTitle      = {Proceedings of RUSSIR '08, September 1-5, Taganrog,
Russia},
pdf            = {http://www.www.texttechnologylab.org/data/pdf/mehler_geibel_pustylnikov_2007.pdf},
year           = 2008
}
• U. Waltinger, A. Mehler, and M. Stührenberg, "An Integrated Model of Lexical Chaining: Applications, Resources and their Format," in Proceedings of KONVENS 2008 – Ergänzungsband Textressourcen und lexikalisches Wissen, 2008, pp. 59-70.
[BibTeX]

@InProceedings{Waltinger:Mehler:Stuehrenberg:2008,
Author         = {Waltinger, Ulli and Mehler, Alexander and
Stührenberg, Maik},
Title          = {An Integrated Model of Lexical Chaining: Applications,
Resources and their Format},
BookTitle      = {Proceedings of KONVENS 2008 – Erg{\"a}nzungsband
Textressourcen und lexikalisches Wissen},
Editor         = {Storrer, Angelika and Geyken, Alexander and Siebert,
Alexander and Würzner, Kay-Michael},
Pages          = {59-70},
pdf            = {http://www.ulliwaltinger.de/pdf/Konvens_2008_Integrated_Model_of_Lexical_Chaining_WaltingerMehlerStuehrenberg.pdf},
year           = 2008
}
• A. Mehler, "A Model of the Distribution of the Distances of Alike Elements in Dialogical Communication," in Proceedings of the International Conference on Information Theory and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas, 2008, pp. 45-50.
[BibTeX]

@InProceedings{Mehler:2008:c,
Author         = {Mehler, Alexander},
Title          = {A Model of the Distribution of the Distances of Alike
Elements in Dialogical Communication},
BookTitle      = {Proceedings of the International Conference on
Information Theory and Statistical Learning (ITSL '08),
July 14-15, 2008, Las Vegas},
Pages          = {45-50},
year           = 2008
}
• U. Waltinger, A. Mehler, and G. Heyer, "Towards Automatic Content Tagging: Enhanced Web Services in Digital Libraries Using Lexical Chaining," in 4th Int. Conf. on Web Information Systems and Technologies (WEBIST '08), 4-7 May, Funchal, Portugal, Barcelona, 2008, pp. 231-236.
[BibTeX]

@InProceedings{Waltinger:Mehler:Heyer:2008,
Author         = {Waltinger, Ulli and Mehler, Alexander and Heyer,
Gerhard},
Title          = {Towards Automatic Content Tagging: Enhanced Web
Services in Digital Libraries Using Lexical Chaining},
BookTitle      = {4th Int. Conf. on Web Information Systems and
Technologies (WEBIST '08), 4-7 May, Funchal, Portugal},
Editor         = {Cordeiro, José and Filipe, Joaquim and Hammoudi,
Slimane},
Pages          = {231-236},
Publisher      = {INSTICC Press},
pdf            = {http://www.ulliwaltinger.de/pdf/Webist_2008_Towards_Automatic_Content_Tagging_WaltingerMehlerHeyer.pdf},
url            = {http://dblp.uni-trier.de/db/conf/webist/webist2008-2.html#WaltingerMH08},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.463.3097},
year           = 2008
}
• A. Mehler, "A Short Note on Social-Semiotic Networks from the Point of View of Quantitative Semantics," in Proceedings of the Dagstuhl Seminar on Social Web Communities, September 21-26, Dagstuhl, 2008.
[BibTeX]

@InProceedings{Mehler:2008:f,
Author         = {Mehler, Alexander},
Title          = {A Short Note on Social-Semiotic Networks from the
Point of View of Quantitative Semantics},
BookTitle      = {Proceedings of the Dagstuhl Seminar on Social Web
Communities, September 21-26, Dagstuhl},
Editor         = {Alani, Harith and Staab, Steffen and Stumme, Gerd},
pdf            = {http://drops.dagstuhl.de/opus/volltexte/2008/1788/pdf/08391.MehlerAlexander.ExtAbstract.1788.pdf},
year           = 2008
}
• A. Mehler, R. Gleim, A. Ernst, and U. Waltinger, "WikiDB: Building Interoperable Wiki-Based Knowledge Resources for Semantic Databases," Sprache und Datenverarbeitung. International Journal for Language Data Processing, vol. 32, iss. 1, pp. 47-70, 2008.
[Abstract] [BibTeX]

This article describes an API for exploring the                    logical document and the logical network structure of                    wikis. It introduces an algorithm for the semantic                    preprocessing, filtering and typing of these building                    blocks. Further, this article models the process of                    wiki generation based on a unified format of syntactic,                    semantic and pragmatic representations. This                    three-level approach to make accessible syntactic,                    semantic and pragmatic aspects of wiki-based structure                    formation is complemented by a corresponding database                    model – called WikiDB – and an API operating                    thereon. Finally, the article provides an empirical                    study of using the three-fold representation format in                    conjunction with WikiDB.
@Article{Mehler:Gleim:Ernst:Waltinger:2008,
Author         = {Mehler, Alexander and Gleim, Rüdiger and Ernst,
Alexandra and Waltinger, Ulli},
Title          = {WikiDB: Building Interoperable Wiki-Based Knowledge
Resources for Semantic Databases},
Journal        = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
Volume         = {32},
Number         = {1},
Pages          = {47-70},
logical document and the logical network structure of
wikis. It introduces an algorithm for the semantic
preprocessing, filtering and typing of these building
wiki generation based on a unified format of syntactic,
semantic and pragmatic representations. This
three-level approach to make accessible syntactic,
semantic and pragmatic aspects of wiki-based structure
formation is complemented by a corresponding database
model – called WikiDB – and an API operating
thereon. Finally, the article provides an empirical
study of using the three-fold representation format in
conjunction with WikiDB.},
pdf            = {http://www.ulliwaltinger.de/pdf/Konvens_2008_WikiDB_Building_Semantic_Databases_MehlerGleimErnstWaltinger.pdf},
year           = 2008
}
• U. Waltinger and A. Mehler, "Who is it? Context sensitive named entity and instance recognition by means of Wikipedia," in Proceedings of the 2008 IEEE/WIC/ACM International Conference on Web Intelligence (WI-2008), 2008, p. 381–384.
[BibTeX]

@InProceedings{Waltinger:Mehler:2008:a,
Author         = {Waltinger, Ulli and Mehler, Alexander},
Title          = {Who is it? Context sensitive named entity and instance
recognition by means of Wikipedia},
BookTitle      = {Proceedings of the 2008 IEEE/WIC/ACM International
Conference on Web Intelligence (WI-2008)},
Pages          = {381–384},
Publisher      = {IEEE Computer Society},
pdf            = {http://www.ulliwaltinger.de/pdf/WI_2008_Context_Sensitive_Instance_Recognition_WaltingerMehler.pdf},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.324.5881},
year           = 2008
}
• A. Lücking, A. Mehler, and P. Menke, "Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching Empirical Evidence of Intrapersonal Alignment in Multimodal Communication," in LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics and Pragmatics of Dialogue (SEMDIAL), King's College London, 2008, p. 157–164.
[BibTeX]

@InProceedings{Luecking:Mehler:Menke:2008,
Author         = {Lücking, Andy and Mehler, Alexander and Menke, Peter},
Title          = {Taking Fingerprints of Speech-and-Gesture Ensembles:
Approaching Empirical Evidence of Intrapersonal
Alignment in Multimodal Communication},
BookTitle      = {LONDIAL 2008: Proceedings of the 12th Workshop on the
Semantics and Pragmatics of Dialogue (SEMDIAL)},
Pages          = {157–164},
month          = {June 2–4},
website        = {https://www.researchgate.net/publication/237305375_Taking_Fingerprints_of_Speech-and-Gesture_Ensembles_Approaching_Empirical_Evidence_of_Intrapersonal_Alignment_in_Multimodal_Communication},
year           = 2008
}
• A. Mehler and T. Sutter, "Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen," in Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs, Wikis, Podcasts und Communities aus interdisziplinärer Sicht, A. Zerfaß, M. Welker, and J. Schmidt, Eds., Köln: Herbert von Halem, 2008, pp. 267-300.
[Abstract] [BibTeX]

This article addresses challenges in maintaining and                    annotating image resources in the field of iconographic                    research. We focus on the task of bringing together                    generic and extensible techniques for resource and                    anno- tation management with the highly specific                    demands in this area of research. Special emphasis is                    put on the interrelation of images, image segements and                    textual contents. In addition, we describe the                    architecture, data model and user interface of the open                    annotation system used in the image database                    application that is a part of the eHumanities Desktop.
@InCollection{Mehler:Sutter:2008,
Author         = {Mehler, Alexander and Sutter, Tilmann},
Title          = {Interaktive Textproduktion in Wiki-basierten
Kommunikationssystemen},
BookTitle      = {Kommunikation, Partizipation und Wirkungen im Social
Web – Weblogs, Wikis, Podcasts und Communities aus
interdisziplin{\"a}rer Sicht},
Publisher      = {Herbert von Halem},
Editor         = {Zerfa{\ss}, Ansgar and Welker, Martin and Schmidt, Jan},
Pages          = {267-300},
annotating image resources in the field of iconographic
research. We focus on the task of bringing together
generic and extensible techniques for resource and
anno- tation management with the highly specific
demands in this area of research. Special emphasis is
put on the interrelation of images, image segements and
textual contents. In addition, we describe the
architecture, data model and user interface of the open
annotation system used in the image database
application that is a part of the eHumanities Desktop.},
year           = 2008
}
• A. Mehler, "On the Impact of Community Structure on Self-Organizing Lexical Networks," in Proceedings of the 7th Evolution of Language Conference (Evolang 2008), March 11-15, 2008, Barcelona, 2008, pp. 227-234.
[Abstract] [BibTeX]

This paper presents a simulation model of                    self-organizing lexical networks. Its starting point is                    the notion of an association game in which the impact                    of varying community models is studied on the emergence                    of lexical networks. The paper reports on experiments                    whose results are in accordance with findings in the                    framework of the naming game. This is done by means of                    a multilevel network model in which the correlation of                    social and of linguistic networks is studied
@InProceedings{Mehler:2008:e,
Author         = {Mehler, Alexander},
Title          = {On the Impact of Community Structure on
Self-Organizing Lexical Networks},
BookTitle      = {Proceedings of the 7th Evolution of Language
Conference (Evolang 2008), March 11-15, 2008, Barcelona},
Editor         = {Smith, Andrew D. M. and Smith, Kenny and Cancho, Ramon
Ferrer i},
Pages          = {227-234},
Publisher      = {World Scientific},
abstract       = {This paper presents a simulation model of
self-organizing lexical networks. Its starting point is
the notion of an association game in which the impact
of varying community models is studied on the emergence
of lexical networks. The paper reports on experiments
whose results are in accordance with findings in the
framework of the naming game. This is done by means of
a multilevel network model in which the correlation of
social and of linguistic networks is studied},
website        = {http://stel.ub.edu/evolang2008/evo10.htm},
year           = 2008
}
• O. Abramov and A. Mehler, "Towards a Uniform Representation of Treebanks: Providing Interoperability for Dependency Tree Data," in Proceedings of First International Conference on Global Interoperability for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11, 2008.
[Abstract] [BibTeX]

In this paper we present a corpus representation                    format which unifies the representation of a wide range                    of dependency treebanks within a single model. This                    approach provides interoperability and reusability of                    annotated syntactic data which in turn extends its                    applicability within various research contexts. We                    demonstrate our approach by means of dependency                    treebanks of 11 languages. Further, we perform a                    comparative quantitative analysis of these treebanks in                    order to demonstrate the interoperability of our                    approach. 
@InProceedings{Pustylnikov:Mehler:2008:a,
Author         = {Abramov, Olga and Mehler, Alexander},
Title          = {Towards a Uniform Representation of Treebanks:
Providing Interoperability for Dependency Tree Data},
BookTitle      = {Proceedings of First International Conference on
Global Interoperability for Language Resources (ICGL
2008), Hong Kong SAR, January 9-11},
abstract       = {In this paper we present a corpus representation
format which unifies the representation of a wide range
of dependency treebanks within a single model. This
approach provides interoperability and reusability of
annotated syntactic data which in turn extends its
applicability within various research contexts. We
demonstrate our approach by means of dependency
treebanks of 11 languages. Further, we perform a
comparative quantitative analysis of these treebanks in
order to demonstrate the interoperability of our
approach. },
pdf            = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/acl07.1.0.pdf},
website        = {https://www.researchgate.net/publication/242681771_Towards_a_Uniform_Representation_of_Treebanks_Providing_Interoperability_for_Dependency_Tree_Data},
year           = 2008
}
• G. Rehm, M. Santini, A. Mehler, P. Braslavski, R. Gleim, A. Stubbe, S. Symonenko, M. Tavosanis, and V. Vidulin, "Towards a Reference Corpus of Web Genres for the Evaluation of Genre Identification Systems," in Proceedings of the 6th Language Resources and Evaluation Conference (LREC 2008), Marrakech (Morocco), 2008.
[Abstract] [BibTeX]

We present initial results from an international and                    multi-disciplinary research collaboration that aims at                    the construction of a reference corpus of web genres.                    The primary application scenario for which we plan to                    build this resource is the automatic identification of                    web genres. Web genres are rather difficult to capture                    and to describe in their entirety, but we plan for the                    finished reference corpus to contain multi-level tags                    of the respective genre or genres a web document or a                    website instantiates. As the construction of such a                    corpus is by no means a trivial task, we discuss                    several alternatives that are, for the time being,                    mostly based on existing collections. Furthermore, we                    discuss a shared set of genre categories and a                    multi-purpose tool as two additional prerequisites for                    a reference corpus of web genres. 
@InProceedings{Rehm:Santini:Mehler:Braslavski:Gleim:Stubbe:Symonenko:Tavosanis:Vidulin:2008,
Author         = {Rehm, Georg and Santini, Marina and Mehler, Alexander
and Braslavski, Pavel and Gleim, Rüdiger and Stubbe,
Andrea and Symonenko, Svetlana and Tavosanis, Mirko and
Vidulin, Vedrana},
Title          = {Towards a Reference Corpus of Web Genres for the
Evaluation of Genre Identification Systems},
BookTitle      = {Proceedings of the 6th Language Resources and
Evaluation Conference (LREC 2008), Marrakech (Morocco)},
abstract       = {We present initial results from an international and
multi-disciplinary research collaboration that aims at
the construction of a reference corpus of web genres.
The primary application scenario for which we plan to
build this resource is the automatic identification of
web genres. Web genres are rather difficult to capture
and to describe in their entirety, but we plan for the
finished reference corpus to contain multi-level tags
of the respective genre or genres a web document or a
website instantiates. As the construction of such a
corpus is by no means a trivial task, we discuss
several alternatives that are, for the time being,
mostly based on existing collections. Furthermore, we
discuss a shared set of genre categories and a
multi-purpose tool as two additional prerequisites for
a reference corpus of web genres. },
website        = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/94.html},
year           = 2008
}

### 2007 (29)

• R. Gleim, A. Mehler, M. Dehmer, and O. Abramov, "Aisles through the Category Forest – Utilising the Wikipedia Category System for Corpus Building in Machine Learning," in 3rd International Conference on Web Information Systems and Technologies (WEBIST '07), March 3-6, 2007, Barcelona, Barcelona, 2007, pp. 142-149.
[Abstract] [BibTeX]

The Word Wide Web is a continuous challenge to machine                    learning. Established approaches have to be enhanced                    and new methods be developed in order to tackle the                    problem of finding and organising relevant information.                    It has often been motivated that semantic                    classifications of input documents help solving this                    task. But while approaches of supervised text                    categorisation perform quite well on genres found in                    written text, newly evolved genres on the web are much                    more demanding. In order to successfully develop                    approaches to web mining, respective corpora are                    needed. However, the composition of genre- or                    domain-specific web corpora is still an unsolved                    problem. It is time consuming to build large corpora of                    good quality because web pages typically lack reliable                    meta information. Wikipedia along with similar                    approaches of collaborative text production offers a                    way out of this dilemma. We examine how social tagging,                    as supported by the MediaWiki software, can be utilised                    as a source of corpus building. Further, we describe a                    representation format for social ontologies and present                    the Wikipedia Category Explorer, a tool which supports                    categorical views to browse through the Wikipedia and                    to construct domain specific corpora for machine                    learning.
@InProceedings{Gleim:Mehler:Dehmer:Abramov:2007,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Dehmer,
Matthias and Abramov, Olga},
Title          = {Aisles through the Category Forest – Utilising the
Wikipedia Category System for Corpus Building in
Machine Learning},
BookTitle      = {3rd International Conference on Web Information
Systems and Technologies (WEBIST '07), March 3-6, 2007,
Barcelona},
Editor         = {Filipe, Joaquim and Cordeiro, José and Encarnação,
Bruno and Pedrosa, Vitor},
Pages          = {142-149},
abstract       = {The Word Wide Web is a continuous challenge to machine
learning. Established approaches have to be enhanced
and new methods be developed in order to tackle the
problem of finding and organising relevant information.
It has often been motivated that semantic
classifications of input documents help solving this
task. But while approaches of supervised text
categorisation perform quite well on genres found in
written text, newly evolved genres on the web are much
more demanding. In order to successfully develop
approaches to web mining, respective corpora are
needed. However, the composition of genre- or
domain-specific web corpora is still an unsolved
problem. It is time consuming to build large corpora of
good quality because web pages typically lack reliable
meta information. Wikipedia along with similar
approaches of collaborative text production offers a
way out of this dilemma. We examine how social tagging,
as supported by the MediaWiki software, can be utilised
as a source of corpus building. Further, we describe a
representation format for social ontologies and present
the Wikipedia Category Explorer, a tool which supports
categorical views to browse through the Wikipedia and
to construct domain specific corpora for machine
learning.},
year           = 2007
}
• A. Mehler, R. Gleim, and A. Wegner, "Structural Uncertainty of Hypertext Types. An Empirical Study," in Proceedings of the Workshop "Towards Genre-Enabled Search Engines: The Impact of NLP", September, 30, 2007, in conjunction with RANLP 2007, Borovets, Bulgaria, 2007, pp. 13-19.
[BibTeX]

@InProceedings{Mehler:Gleim:Wegner:2007,
Author         = {Mehler, Alexander and Gleim, Rüdiger and Wegner,
Armin},
Title          = {Structural Uncertainty of Hypertext Types. An
Empirical Study},
BookTitle      = {Proceedings of the Workshop "Towards Genre-Enabled
Search Engines: The Impact of NLP", September, 30,
2007, in conjunction with RANLP 2007, Borovets,
Bulgaria},
Editor         = {Rehm, Georg and Santini, Marina},
Pages          = {13-19},
year           = 2007
}
• A. Mehler, "Evolving Lexical Networks. A Simulation Model of Terminological Alignment," in Proceedings of the Workshop on Language, Games, and Evolution at the 9th European Summer School in Logic, Language and Information (ESSLLI 2007), Trinity College, Dublin, 6-17 August, 2007, pp. 57-67.
[Abstract] [BibTeX]

In this paper we describe a simulation model of                    terminological alignment in a multiagent community. It                    is based on the notion of an association game which is                    used instead of the classical notion of a naming game                    (Steels, 1996). The simulation model integrates a small                    world-like agent community which restricts agent                    communication. We hypothesize that this restriction is                    decisive when it comes to simulate terminological                    alignment based on lexical priming. The paper presents                    preliminary experimental results in support of this                    hypothesis.
@InProceedings{Mehler:2007:d,
Author         = {Mehler, Alexander},
Title          = {Evolving Lexical Networks. A Simulation Model of
Terminological Alignment},
BookTitle      = {Proceedings of the Workshop on Language, Games, and
Evolution at the 9th European Summer School in Logic,
Language and Information (ESSLLI 2007), Trinity
College, Dublin, 6-17 August},
Editor         = {Benz, Anton and Ebert, Christian and van Rooij, Robert},
Pages          = {57-67},
abstract       = {In this paper we describe a simulation model of
terminological alignment in a multiagent community. It
is based on the notion of an association game which is
used instead of the classical notion of a naming game
(Steels, 1996). The simulation model integrates a small
world-like agent community which restricts agent
communication. We hypothesize that this restriction is
decisive when it comes to simulate terminological
alignment based on lexical priming. The paper presents
preliminary experimental results in support of this
hypothesis.},
year           = 2007
}
• A. Mehler, P. Geibel, R. Gleim, S. Herold, B. Jain, and O. Abramov, "Much Ado About Text Content. Learning Text Types Solely by Structural Differentiae," in Proceedings of OTT '06 – Ontologies in Text Technology: Approaches to Extract Semantic Knowledge from Structured Information, Osnabrück, 2007, pp. 63-71.
[Abstract] [BibTeX]

In this paper, we deal with classifying texts into                    classes which denote text types whose textual instances                    serve more or less homogeneous functions. Other than                    mainstream approaches to text classification, which                    rely on the vector space model [30] or some of its                    descendants [2] and, thus, on content-related lexical                    features, we solely refer to structural differentiae,                    that is, to patterns of text structure as determinants                    of class membership. Further, we suppose that text                    types span a type hierarchy based on the type-subtype                    relation [31]. Thus, although we admit that class                    membership is fuzzy so that overlapping classes are                    inevitable, we suppose a non-overlapping type system                    structured into a rooted tree – whether solely based                    on functional or additional on, e.g., content- or                    mediabased criteria [1]. What regards criteria of                    goodness of classification, we perform a classical                    supervised categorization experiment [30] based on                    cross-validation as a method of model selection [11].                    That is, we perform a categorization experiment in                    which for all training and test cases class membership                    is known ex ante. In summary, we perform a supervised                    experiment of text classification in order to learn                    functionally grounded text types where membership to                    these types is solely based on structural criteria.
@InProceedings{Mehler:Geibel:Gleim:Herold:Jain:Pustylnikov:2007,
Author         = {Mehler, Alexander and Geibel, Peter and Gleim,
Rüdiger and Herold, Sebastian and Jain,
Brijnesh-Johannes and Abramov, Olga},
Solely by Structural Differentiae},
BookTitle      = {Proceedings of OTT '06 – Ontologies in Text
Technology: Approaches to Extract Semantic Knowledge
from Structured Information},
Editor         = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
Series         = {Publications of the Institute of Cognitive Science
(PICS)},
Pages          = {63-71},
abstract       = {In this paper, we deal with classifying texts into
classes which denote text types whose textual instances
serve more or less homogeneous functions. Other than
mainstream approaches to text classification, which
rely on the vector space model [30] or some of its
descendants [2] and, thus, on content-related lexical
features, we solely refer to structural differentiae,
that is, to patterns of text structure as determinants
of class membership. Further, we suppose that text
types span a type hierarchy based on the type-subtype
relation [31]. Thus, although we admit that class
membership is fuzzy so that overlapping classes are
inevitable, we suppose a non-overlapping type system
structured into a rooted tree – whether solely based
on functional or additional on, e.g., content- or
mediabased criteria [1]. What regards criteria of
goodness of classification, we perform a classical
supervised categorization experiment [30] based on
cross-validation as a method of model selection [11].
That is, we perform a categorization experiment in
which for all training and test cases class membership
is known ex ante. In summary, we perform a supervised
experiment of text classification in order to learn
functionally grounded text types where membership to
these types is solely based on structural criteria.},
pdf            = {http://ikw.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Geibel_abstract.pdf},
year           = 2007
}
• M. Dehmer, A. Mehler, and F. Emmert-Streib, "Graph-theoretical Characterizations of Generalized Trees," in Proceedings of the 2007 International Conference on Machine Learning: Models, Technologies & Applications (MLMTA '07), June 25-28, 2007, Las Vegas, 2007, pp. 113-117.
[BibTeX]

@InProceedings{Dehmer:Mehler:Emmert-Streib:2007:a,
Author         = {Dehmer, Matthias and Mehler, Alexander and
Emmert-Streib, Frank},
Title          = {Graph-theoretical Characterizations of Generalized
Trees},
BookTitle      = {Proceedings of the 2007 International Conference on
Machine Learning: Models, Technologies \& Applications
(MLMTA '07), June 25-28, 2007, Las Vegas},
Pages          = {113-117},
website        = {https://www.researchgate.net/publication/221188591_Graph-theoretical_Characterizations_of_Generalized_Trees},
year           = 2007
}
• R. Gleim, A. Mehler, and H. Eikmeyer, "Representing and Maintaining Large Corpora," in Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK), 2007.
[BibTeX]

@InProceedings{Gleim:Mehler:Eikmeyer:2007:a,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer,
Hans-Jürgen},
Title          = {Representing and Maintaining Large Corpora},
BookTitle      = {Proceedings of the Corpus Linguistics 2007 Conference,
Birmingham (UK)},
year           = 2007
}
• P. Geibel, O. Abramov, A. Mehler, H. Gust, and K. Kühnberger, "Classification of Documents Based on the Structure of Their DOM Trees," in Proceedings of ICONIP 2007 (14th International Conference on Neural Information Processing), 2007, p. 779–788.
[Abstract] [BibTeX]

In this paper, we discuss kernels that can be applied                    for the classification of XML documents based on their                    DOM trees. DOM trees are ordered trees in which every                    node might be labeled by a vector of attributes                    including its XML tag and the textual content. We                    describe five new kernels suitable for such structures:                    a kernel based on predefined structural features, a                    tree kernel derived from the well-known parse tree                    kernel, the set tree kernel that allows permutations of                    children, the string tree kernel being an extension of                    the so-called partial tree kernel, and the soft tree                    kernel as a more efficient alternative. We evaluate the                    kernels experimentally on a corpus containing the DOM                    trees of newspaper articles and on the well-known                    SUSANNE corpus.
@InProceedings{Geibel:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
Author         = {Geibel, Peter and Abramov, Olga and Mehler, Alexander
and Gust, Helmar and Kühnberger, Kai-Uwe},
Title          = {Classification of Documents Based on the Structure of
Their DOM Trees},
BookTitle      = {Proceedings of ICONIP 2007 (14th International
Conference on Neural Information Processing)},
Series         = {Lecture Notes in Computer Science 4985},
Pages          = {779–788},
Publisher      = {Springer},
abstract       = {In this paper, we discuss kernels that can be applied
for the classification of XML documents based on their
DOM trees. DOM trees are ordered trees in which every
node might be labeled by a vector of attributes
including its XML tag and the textual content. We
describe five new kernels suitable for such structures:
a kernel based on predefined structural features, a
tree kernel derived from the well-known parse tree
kernel, the set tree kernel that allows permutations of
children, the string tree kernel being an extension of
the so-called partial tree kernel, and the soft tree
kernel as a more efficient alternative. We evaluate the
kernels experimentally on a corpus containing the DOM
trees of newspaper articles and on the well-known
SUSANNE corpus.},
year           = 2007
}
• B. Jussen, A. Mehler, and A. Ernst, "A Corpus Management System for Historical Semantics," Sprache und Datenverarbeitung. International Journal for Language Data Processing, vol. 31, iss. 1-2, pp. 81-89, 2007.
[Abstract] [BibTeX]

Der Beitrag beschreibt ein Korpusmanagementsystem für                    die historische Semantik. Die Grundlage hierfür bildet                    ein Bedeutungsbegriff, der – methodologisch                    gesprochen – auf der Analyse diachroner Korpora                    beruht. Das Ziel der Analyse dieser Korpora besteht                    darin, Bedeutungswandel als eine Bezugsgröße für                    den Wandel sozialer Systeme zu untersuchen. Das                    vorgestellte Korpusmanagementsystem unterstützt diese                    Art der korpusbasierten historischen Semantik.
@Article{Jussen:Mehler:Ernst:2007,
Author         = {Jussen, Bernhard and Mehler, Alexander and Ernst,
Alexandra},
Title          = {A Corpus Management System for Historical Semantics},
Journal        = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
Volume         = {31},
Number         = {1-2},
Pages          = {81-89},
abstract       = {Der Beitrag beschreibt ein Korpusmanagementsystem für
die historische Semantik. Die Grundlage hierfür bildet
ein Bedeutungsbegriff, der – methodologisch
gesprochen – auf der Analyse diachroner Korpora
beruht. Das Ziel der Analyse dieser Korpora besteht
darin, Bedeutungswandel als eine Bezugsgrö{\ss}e für
den Wandel sozialer Systeme zu untersuchen. Das
vorgestellte Korpusmanagementsystem unterstützt diese
Art der korpusbasierten historischen Semantik.},
year           = 2007
}
• A. Mehler and R. Köhler, "Machine Learning in a Semiotic Perspective," in Aspects of Automatic Text Analysis, A. Mehler and R. Köhler, Eds., Berlin/New York: Springer, 2007, pp. 1-29.
[Abstract] [BibTeX]

Gegenstand des folgenden Aufsatzes ist der konnotative                    Aspekt der Bedeutungen von Texten. Den Ausgangspunkt                    der Überlegungen zur Konnotation des Textes bildet                    die Auffassung, wonach Wort- und                    Textbedeutungskonstitution Ergebnis eines                    zirkulären Prozesses sind, der für die Emergenz                    einer Hierarchie ineinander geschachtelter                    Spracheinheiten verantwortlich zeichnet. Der Prozeß                    der Zeichenartikulation erfolgt entlang dieser Ebenen                    und erzeugt durch Verbindung von (konnotativer)                    Inhalts- und Ausdrucksseite auf Textebene das                    Textzeichen. Im Gegensatz zu einer strikten                    Interpretation des Fregeschen                    Kompositionalitätsprinzips, derzufolge die                    Bedeutungen sprachlicher Einheiten als fixierte,                    kontextfreie Größen vorauszusetzen sind, behandelt                    der vorliegende Ansatz bereits die lexikalische                    Bedeutung als Größe, die in Abhängigkeit von                    ihrem Kontext variieren kann. Aus semiotischer                    Perspektive ist es vor allem der Gestaltcharakter,                    welcher die konnotative Textbedeutung einer Anwendung                    des FregePrinzips entzieht. Anders ausgedrückt: Die                    konnotative Bedeutung eines Textes ist keineswegs in                    eine Struktur 'atomarer' Repräsentationen                    zerlegbar. Die hierarchische Organisation von Texten                    erweist sich insofern als komplex, als ihre Bedeutungen                    aus einem zirkulären Prozeß resultieren, der                    bestätigend und/oder verändernd auf die                    Bedeutungen der Textkonstituenten einwirkt. Diese                    Zirkularität bedingt, daß Texte nicht nur als                    Orte der Manifestation von Wortbedeutungsstrukturen                    anzusehen sind, sondern zugleich als Ausgangspunkte                    für die Modifikation und Emergenz solcher Strukturen                    dienen. Im folgenden wird unter Rekurs auf den                    Kopenhagener Strukturalismus ein Modell der                    konnotativen Bedeutung von Texten entwickelt, das sich                    unter anderem an dem glossematischen Begriff der                    Konstante orientiert. Die Formalisierung des Modells                    erfolgt mit Hilfe des Konzeptes der unscharfen Menge.                    Zu diesem Zweck werden die unscharfen                    Verwendungsregularitäten von Wörtern auf der Basis                    eines zweistufigen Verfahrens analysiert, welches die                    syntagmatischen und paradigmatischen Regularitäten                    des Wortgebrauches berücksichtigt. Die Rolle der                    Satzebene innerhalb des Prozesses der konnotativen                    Textbedeutungskonstitution wird angedeutet.                    Abschließend erfolgt eine Exemplifizierung des                    Algorithmus anhand der automatischen Analyse eines                    Textcorpus.
@InCollection{Mehler:Koehler:2007:b,
Author         = {Mehler, Alexander and Köhler, Reinhard},
Title          = {Machine Learning in a Semiotic Perspective},
BookTitle      = {Aspects of Automatic Text Analysis},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Köhler, Reinhard},
Series         = {Studies in Fuzziness and Soft Computing},
Pages          = {1-29},
abstract       = {Gegenstand des folgenden Aufsatzes ist der konnotative
Aspekt der Bedeutungen von Texten. Den Ausgangspunkt
der {\"U}berlegungen zur Konnotation des Textes bildet
die Auffassung, wonach Wort- und
Textbedeutungskonstitution Ergebnis eines
zirkul{\"a}ren Prozesses sind, der für die Emergenz
einer Hierarchie ineinander geschachtelter
Spracheinheiten verantwortlich zeichnet. Der Proze{\ss}
der Zeichenartikulation erfolgt entlang dieser Ebenen
und erzeugt durch Verbindung von (konnotativer)
Inhalts- und Ausdrucksseite auf Textebene das
Textzeichen. Im Gegensatz zu einer strikten
Interpretation des Fregeschen
Kompositionalit{\"a}tsprinzips, derzufolge die
Bedeutungen sprachlicher Einheiten als fixierte,
kontextfreie Grö{\ss}en vorauszusetzen sind, behandelt
der vorliegende Ansatz bereits die lexikalische
Bedeutung als Grö{\ss}e, die in Abh{\"a}ngigkeit von
ihrem Kontext variieren kann. Aus semiotischer
Perspektive ist es vor allem der Gestaltcharakter,
welcher die konnotative Textbedeutung einer Anwendung
des FregePrinzips entzieht. Anders ausgedrückt: Die
konnotative Bedeutung eines Textes ist keineswegs in
eine Struktur 'atomarer' Repr{\"a}sentationen
zerlegbar. Die hierarchische Organisation von Texten
erweist sich insofern als komplex, als ihre Bedeutungen
aus einem zirkul{\"a}ren Proze{\ss} resultieren, der
best{\"a}tigend und/oder ver{\"a}ndernd auf die
Bedeutungen der Textkonstituenten einwirkt. Diese
Zirkularit{\"a}t bedingt, da{\ss} Texte nicht nur als
Orte der Manifestation von Wortbedeutungsstrukturen
anzusehen sind, sondern zugleich als Ausgangspunkte
für die Modifikation und Emergenz solcher Strukturen
dienen. Im folgenden wird unter Rekurs auf den
Kopenhagener Strukturalismus ein Modell der
konnotativen Bedeutung von Texten entwickelt, das sich
unter anderem an dem glossematischen Begriff der
Konstante orientiert. Die Formalisierung des Modells
erfolgt mit Hilfe des Konzeptes der unscharfen Menge.
Zu diesem Zweck werden die unscharfen
Verwendungsregularit{\"a}ten von Wörtern auf der Basis
eines zweistufigen Verfahrens analysiert, welches die
des Wortgebrauches berücksichtigt. Die Rolle der
Satzebene innerhalb des Prozesses der konnotativen
Textbedeutungskonstitution wird angedeutet.
Abschlie{\ss}end erfolgt eine Exemplifizierung des
Algorithmus anhand der automatischen Analyse eines
Textcorpus.},
website        = {http://rd.springer.com/chapter/10.1007/978-3-540-37522-7_1},
year           = 2007
}
• A. Mehler, U. Waltinger, and A. Wegner, "A Formal Text Representation Model Based on Lexical Chaining," in Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial Data (LNVD 2007) September 10, Osnabrück, Osnabrück, 2007, pp. 17-26.
[Abstract] [BibTeX]

This paper presents a formal text representation model                    as an alternative to the vector space model. It                    combines a tree-like model with graph-inducing lexical                    relations. The paper aims at formalizing two yet                    unrelated approaches, i.e. lexical chaining [3] and                    quantitative structure analysis [9], in order to                    combine content and structure modeling.
@InProceedings{Mehler:Waltinger:Wegner:2007:a,
Author         = {Mehler, Alexander and Waltinger, Ulli and Wegner,
Armin},
Title          = {A Formal Text Representation Model Based on Lexical
Chaining},
BookTitle      = {Proceedings of the KI 2007 Workshop on Learning from
Non-Vectorial Data (LNVD 2007) September 10, Osnabrück},
Editor         = {Geibel, Peter and Jain, Brijnesh J.},
Pages          = {17-26},
Publisher      = {Universit{\"a}t Osnabrück},
abstract       = {This paper presents a formal text representation model
as an alternative to the vector space model. It
combines a tree-like model with graph-inducing lexical
relations. The paper aims at formalizing two yet
unrelated approaches, i.e. lexical chaining [3] and
quantitative structure analysis [9], in order to
combine content and structure modeling.},
pdf            = {http://www.ulliwaltinger.de/pdf/LNVD07MehlerWaltingerWegner.pdf},
year           = 2007
}
• T. vor der Brück and S. Hartrumpf, "A Semantically Oriented Readability Checker for German," in Proceedings of the 3rd Language & Technology Conference, Z. Vetulani, Ed., Poznań, Poland: Wydawnictwo Poznańskie, 2007, pp. 270-274.
[Abstract] [BibTeX]

One major reason that readability checkers are still                    far away from judging the understandability of texts                    consists in the fact that no semantic information is                    used. Syntactic, lexical, or morphological information                    can only give limited access for estimating the                    cognitive difficulties for a human being to comprehend                    a text. In this paper however, we present a readability                    checker which uses semantic information in addition.                    This information is represented as semantic networks                    and is derived by a deep syntactico-semantic analysis.                    We investigate in which situations a semantic                    readability indicator can lead to superior results in                    comparison with ordinary surface indicators like                    sentence length. Finally, we compute the correlations                    and absolute errors for our semantic indicators related                    to user ratings collected in an online evaluation.
@InCollection{vor:der:Brueck:Hartrumpf:2007,
Author         = {vor der Brück, Tim and Hartrumpf, Sven},
Title          = {A Semantically Oriented Readability Checker for German},
BookTitle      = {Proceedings of the 3rd Language \& Technology
Conference},
Publisher      = {Wydawnictwo Poznańskie},
Editor         = {Zygmunt Vetulani},
Pages          = {270--274},
abstract       = {One major reason that readability checkers are still
far away from judging the understandability of texts
consists in the fact that no semantic information is
used. Syntactic, lexical, or morphological information
can only give limited access for estimating the
cognitive difficulties for a human being to comprehend
a text. In this paper however, we present a readability
checker which uses semantic information in addition.
This information is represented as semantic networks
and is derived by a deep syntactico-semantic analysis.
We investigate in which situations a semantic
comparison with ordinary surface indicators like
sentence length. Finally, we compute the correlations
and absolute errors for our semantic indicators related
to user ratings collected in an online evaluation.},
isbn           = {978-83-7177-407-2},
month          = {October},
url            = {http://pi7.fernuni-hagen.de/papers/brueck_hartrumpf07_online.pdf},
year           = 2007
}
• T. vor der Brück and S. Busemann, "Suggesting Error Corrections of Path Expressions and Categories for Tree-Mapping Grammars," Zeitschrift für Sprachwissenschaft, vol. 26, iss. 2, 2007.
[Abstract] [BibTeX]

Tree mapping grammars are used in natural language                    generation (NLG) to map non-linguistic input onto a                    derivation tree from which the target text can be                    trivially read off as the terminal yield. Such grammars                    may consist of a large number of rules. Finding errors                    is quite tedious and sometimes very time-consuming.                    Often the generation fails because the relevant input                    subtree is not specified correctly. This work describes                    a method to detect and correct wrong assignments of                    input subtrees to grammar categories by                    cross-validating grammar rules with the given input                    structures. The method also detects and corrects the                    usage of a category in a grammar rule. The result is                    implemented in a grammar development workbench and                    accelerates the grammar writer's work considerably. The                    paper suggests the algorithms can be ported to other                    areas in which tree mapping is required.
@Article{vor:der:Brueck:Busemann:2007,
Author         = {vor der Brück, Tim and Busemann, Stephan},
Title          = {Suggesting Error Corrections of Path Expressions and
Categories for Tree-Mapping Grammars},
Journal        = {Zeitschrift für Sprachwissenschaft},
Volume         = {26},
Number         = {2},
abstract       = {Tree mapping grammars are used in natural language
generation (NLG) to map non-linguistic input onto a
derivation tree from which the target text can be
trivially read off as the terminal yield. Such grammars
may consist of a large number of rules. Finding errors
is quite tedious and sometimes very time-consuming.
Often the generation fails because the relevant input
subtree is not specified correctly. This work describes
a method to detect and correct wrong assignments of
input subtrees to grammar categories by
cross-validating grammar rules with the given input
structures. The method also detects and corrects the
usage of a category in a grammar rule. The result is
implemented in a grammar development workbench and
accelerates the grammar writer's work considerably. The
paper suggests the algorithms can be ported to other
areas in which tree mapping is required.},
url            = {http://www.reference-global.com/doi/pdfplus/10.1515/ZFS.2007.021},
year           = 2007
}
• T. vor der Brück and J. Leveling, "Parameter Learning for a Readability Checking Tool," in Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML, A. Hinneburg, Ed., Halle/Saale, Germany: Gesellschaft für Informatik, 2007.
[Abstract] [BibTeX]

This paper describes the application of machine                    learning methods to determine parameters for DeLite, a                    readability checking tool. DeLite pinpoints text                    segments that are difficult to understand and computes                    for a given text a global readability score, which is a                    weighted sum of normalized indicator values. Indicator                    values are numeric properties derived from linguistic                    units in the text, such as the distance between a verb                    and its complements or the number of possible                    antecedents for a pronoun. Indicators are normalized by                    means of a derivation of the Fermi function with two                    parameters. DeLite requires individual parameters for                    this normalization function and a weight for each                    indicator to compute the global readability score.                    Several experiments to determine these parameters were                    conducted, using different machine learning approaches.                    The training data consists of more than 300 user                    ratings of texts from the municipality domain. The                    weights for the indicators are learned using two                    approaches: i) robust regression with linear                    optimization and ii) an approximative iterative linear                    regression algorithm. For evaluation, the computed                    readability scores are compared to user ratings. The                    evaluation showed that iterative linear regression                    yields a smaller square error than robust regression                    although this method is only approximative. Both                    methods yield results outperforming a first manual                    setting, and for both methods, basically the same set                    of non-zero weights remain.
@InCollection{vor:der:Brueck:Leveling:2007,
Author         = {vor der Brück, Tim and Leveling, Johannes},
Title          = {Parameter Learning for a Readability Checking Tool},
BookTitle      = {Proceedings of the LWA 2007 (Lernen-Wissen-Adaption),
Workshop KDML},
Publisher      = {Gesellschaft für Informatik},
Editor         = {Alexander Hinneburg},
abstract       = {This paper describes the application of machine
learning methods to determine parameters for DeLite, a
readability checking tool. DeLite pinpoints text
segments that are difficult to understand and computes
for a given text a global readability score, which is a
weighted sum of normalized indicator values. Indicator
values are numeric properties derived from linguistic
units in the text, such as the distance between a verb
and its complements or the number of possible
antecedents for a pronoun. Indicators are normalized by
means of a derivation of the Fermi function with two
parameters. DeLite requires individual parameters for
this normalization function and a weight for each
indicator to compute the global readability score.
Several experiments to determine these parameters were
conducted, using different machine learning approaches.
The training data consists of more than 300 user
ratings of texts from the municipality domain. The
weights for the indicators are learned using two
approaches: i) robust regression with linear
optimization and ii) an approximative iterative linear
regression algorithm. For evaluation, the computed
readability scores are compared to user ratings. The
evaluation showed that iterative linear regression
yields a smaller square error than robust regression
although this method is only approximative. Both
methods yield results outperforming a first manual
setting, and for both methods, basically the same set
of non-zero weights remain.},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.6079},
year           = 2007
}
• C. Borr, M. Hielscher-Fastabend, and A. Lücking, "Reliability and Validity of Cervical Auscultation," Dysphagia, vol. 22, pp. 225-234, 2007.
[Abstract] [BibTeX]

We conducted a two-part study that contributes to the                    discussion about cervical auscultation (CA) as a                    scientifically justifiable and medically useful tool to                    identify patients with a high risk of                    aspiration/penetration. We sought to determine (1)                    acoustic features that mark a deglutition act as                    dysphagic; (2) acoustic changes in healthy older                    deglutition profiles compared with those of younger                    adults; (3) the correctness and concordance of rater                    judgments based on CA; and (4) if education in CA                    improves individual reliability. The first part of the                    study focused on a comparison of the swallow morphology                    of dysphagic as opposed to healthy subjects�                    deglutition in terms of structure properties of the                    pharyngeal phase of deglutition. We obtained the                    following results. The duration of deglutition apnea is                    significantly higher in the older group than in the                    younger one. Comparing the younger group and the                    dysphagic group we found significant differences in                    duration of deglutition apnea, onset time, and number                    of gulps. Just one parameter, number of gulps,                    distinguishes significantly between the older and the                    dysphagic groups. The second part of the study aimed at                    evaluating the reliability of CA in detecting dysphagia                    measured as the concordance and the correctness of CA                    experts in classifying swallowing sounds. The                    interrater reliability coefficient AC1 resulted in a                    value of 0.46, which is to be interpreted as fair                    agreement. Furthermore, we found that comparison with                    radiologically defined aspiration/penetration for the                    group of experts (speech and language therapists)                    yielded 70% specificity and 94% sensitivity. We                    conclude that the swallowing sounds contain audible                    cues that should, in principle, permit reliable                    classification and view CA as an early warning system                    for identifying patients with a high risk of                    aspiration/penetration; however, it is not appropriate                    as a stand-alone tool.
@Article{Borr:Luecking:Hierlscher:2007,
Author         = {Borr, Christiane and Hielscher-Fastabend, Martina and
Lücking, Andy},
Title          = {Reliability and Validity of Cervical Auscultation},
Journal        = {Dysphagia},
Volume         = {22},
Pages          = {225--234},
abstract       = {We conducted a two-part study that contributes to the
discussion about cervical auscultation (CA) as a
scientifically justifiable and medically useful tool to
identify patients with a high risk of
aspiration/penetration. We sought to determine (1)
acoustic features that mark a deglutition act as
dysphagic; (2) acoustic changes in healthy older
deglutition profiles compared with those of younger
adults; (3) the correctness and concordance of rater
judgments based on CA; and (4) if education in CA
improves individual reliability. The first part of the
study focused on a comparison of the swallow morphology
of dysphagic as opposed to healthy subjects�
deglutition in terms of structure properties of the
pharyngeal phase of deglutition. We obtained the
following results. The duration of deglutition apnea is
significantly higher in the older group than in the
younger one. Comparing the younger group and the
dysphagic group we found significant differences in
duration of deglutition apnea, onset time, and number
of gulps. Just one parameter, number of gulps,
distinguishes significantly between the older and the
dysphagic groups. The second part of the study aimed at
evaluating the reliability of CA in detecting dysphagia
measured as the concordance and the correctness of CA
experts in classifying swallowing sounds. The
interrater reliability coefficient AC1 resulted in a
value of 0.46, which is to be interpreted as fair
agreement. Furthermore, we found that comparison with
group of experts (speech and language therapists)
yielded 70% specificity and 94% sensitivity. We
conclude that the swallowing sounds contain audible
cues that should, in principle, permit reliable
classification and view CA as an early warning system
for identifying patients with a high risk of
aspiration/penetration; however, it is not appropriate
as a stand-alone tool.},
doi            = {10.1007/s00455-007-9078-3},
issue          = {3},
pdf            = {http://www.shkim.eu/cborr/ca5manuscript.pdf},
publisher      = {Springer New York},
url            = {http://dx.doi.org/10.1007/s00455-007-9078-3},
year           = 2007
}
• A. Kranstedt, A. Lücking, T. Pfeiffer, H. Rieser, and M. Staudacher, Locating Objects by Pointing, 2007.
[BibTeX]

@Misc{Kranstedt:et:al:2007,
Author         = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer,
Thies and Rieser, Hannes and Staudacher, Marc},
Title          = {Locating Objects by Pointing},
HowPublished   = {3rd International Conference of the International
Society for Gesture Studies. Evanston, IL, USA},
keywords       = {own},
month          = {6},
year           = 2007
}
• M. Asadullah, M. Z. Islam, and M. Khan, "Error-tolerant Finite-state Recognizer and String Pattern Similarity Based Spell-Checker for Bengali," in 5th International Conference on Natural Language Processing (ICON) as a poster,Hyderabad, India, January 2007, 2007.
[Abstract] [BibTeX]

A crucial figure of merit for a spelling checker is                    not just whether it can detect misspelled words, but                    also in how it ranks the sugges tions for the word.                    Spelling checker algorithms using edit distance methods                    tend to produce a large number of possibilities for                    misspelled words. We propose an alternative approach to                    checking the spelling of Bangla text that uses a finite                    state automaton (FSA) to probabilistically create the                    suggestion list for a misspelled word. FSA has proven                    to be an effective method for problems requiring                    probabilistic solution and high error tolerance. We                    start by using a finite state representation for all                    the words in the Bangla dictionary; the algorithm then                    uses the state tables to test a string, and in case of                    an erroneous string, try to find all possible solutions                    by attempting singular and multi - step transitions to                    consume one or more characters and using the su                    bsequent characters as look - ahead; and finally, we                    use backtracking to add each possible solution to the                    suggestion list. The use of finite state representation                    for the word implies that the algorithm is much more                    efficient in the case of non - inflected for ms; in                    case of nouns, it is even more significant as Bangla                    nouns are heavily used in the non - inflected form. In                    terms of error detection and correction, the algorithm                    uses the statistics of Bangla error pattern and thus                    produces a small number of signific ant suggestions.                    One notable limitation is the inability to handle                    transposition errors as a single edit distance errors.                    This is not as significant as it may seem since the                    number of transposition errors are not as common as                    other errors in Bangla. This p aper presents the                    structure and the algorithm to implement a Practical                    Bangla spell - checker, and discusses the results                    obtained from the prototype implementation.
@InProceedings{Asadullah:Zahurul:Khan:2007,
Author         = {Asadullah, Munshi and Islam, Md. Zahurul and Khan,
Mumit},
Title          = {Error-tolerant Finite-state Recognizer and String
Pattern Similarity Based Spell-Checker for Bengali},
BookTitle      = {5th International Conference on Natural Language
Processing (ICON) as a poster,Hyderabad, India, January
2007},
abstract       = {A crucial figure of merit for a spelling checker is
not just whether it can detect misspelled words, but
also in how it ranks the sugges tions for the word.
Spelling checker algorithms using edit distance methods
tend to produce a large number of possibilities for
misspelled words. We propose an alternative approach to
checking the spelling of Bangla text that uses a finite
state automaton (FSA) to probabilistically create the
suggestion list for a misspelled word. FSA has proven
to be an effective method for problems requiring
probabilistic solution and high error tolerance. We
start by using a finite state representation for all
the words in the Bangla dictionary; the algorithm then
uses the state tables to test a string, and in case of
an erroneous string, try to find all possible solutions
by attempting singular and multi - step transitions to
consume one or more characters and using the su
bsequent characters as look - ahead; and finally, we
use backtracking to add each possible solution to the
suggestion list. The use of finite state representation
for the word implies that the algorithm is much more
efficient in the case of non - inflected for ms; in
case of nouns, it is even more significant as Bangla
nouns are heavily used in the non - inflected form. In
terms of error detection and correction, the algorithm
uses the statistics of Bangla error pattern and thus
produces a small number of signific ant suggestions.
One notable limitation is the inability to handle
transposition errors as a single edit distance errors.
This is not as significant as it may seem since the
number of transposition errors are not as common as
other errors in Bangla. This p aper presents the
structure and the algorithm to implement a Practical
Bangla spell - checker, and discusses the results
obtained from the prototype implementation.},
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2007
}
• M. Z. Islam, M. N. Uddin, and M. Khan, "A Light Weight Stemmer for Bengali and Its Use in Spelling Checker," in 1st International Conference on Digital Communications and Computer Applications (DCCA2007), 2007.
[Abstract] [BibTeX]

Stemming is an operation that splits a word into the                    constituent root part and affix without doing complete                    morphological analysis. It is used to impr ove the                    performance of spelling checkers and informatio n                    retrieval applications, where morphological analysi                    would be too computationally expensive. For spellin g                    checkers specifically, using stemming may drastical ly                    reduce the dictionary size, often a bottleneck for                    mobile and embedded devices. This paper presents a                    computationally inexpensive stemming algorithm for                    Bengali, which handles suffix removal in a domain                    independent way. The evaluation of the proposed                    algorithm in a Bengali spelling checker indicates t hat                    it can be effectively used in information retrieval                    applications in general.
@InProceedings{Zahurul:Uddin:Khan:2007,
Author         = {Islam, Md. Zahurul and Uddin, Md. Nizam and Khan,
Mumit},
Title          = {A Light Weight Stemmer for Bengali and Its Use in
Spelling Checker},
BookTitle      = {1st International Conference on Digital Communications
and Computer Applications (DCCA2007)},
abstract       = {Stemming is an operation that splits a word into the
constituent root part and affix without doing complete
morphological analysis. It is used to impr ove the
performance of spelling checkers and informatio n
retrieval applications, where morphological analysi
would be too computationally expensive. For spellin g
checkers specifically, using stemming may drastical ly
reduce the dictionary size, often a bottleneck for
mobile and embedded devices. This paper presents a
computationally inexpensive stemming algorithm for
Bengali, which handles suffix removal in a domain
independent way. The evaluation of the proposed
algorithm in a Bengali spelling checker indicates t hat
it can be effectively used in information retrieval
applications in general.},
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2007
}
• M. Z. Islam and M. Khan, "Bangla Verb Morphology and a Multilingual Computational Morphology FrameWork for PC-KIMMO," in The Proceedings of Workshop on Morpho - Syntactic Analysis by the School of Asian Applied Natural Language Processing for Language Diversity and Language Resource Development (ADD), Bangkok, Thailand, 2007.
[BibTeX]

@InProceedings{Zahurul:Khan:2007,
Author         = {Islam, Md. Zahurul and Khan, Mumit},
Title          = {Bangla Verb Morphology and a Multilingual
Computational Morphology FrameWork for PC-KIMMO},
BookTitle      = {The Proceedings of Workshop on Morpho - Syntactic
Analysis by the School of Asian Applied Natural
Language Processing for Language Diversity and Language
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2007
}
• A. Mehler, P. Geibel, and O. Abramov, "Structural Classifiers of Text Types: Towards a Novel Model of Text Representation," Journal for Language Technology and Computational Linguistics (JLCL), vol. 22, iss. 2, pp. 51-66, 2007.
[Abstract] [BibTeX]

Texts can be distinguished in terms of their content,                    function, structure or layout (Brinker, 1992; Bateman                    et al., 2001; Joachims, 2002; Power et al., 2003).                    These reference points do not open necessarily                    orthogonal perspectives on text classification. As part                    of explorative data analysis, text classification aims                    at automatically dividing sets of textual objects into                    classes of maximum internal homogeneity and external                    heterogeneity. This paper deals with classifying texts                    into text types whose instances serve more or less                    homogeneous functions. Other than mainstream                    approaches, which rely on the vector space model                    (Sebastiani, 2002) or some of its descendants                    (Baeza-Yates and Ribeiro-Neto, 1999) and, thus, on                    content-related lexical features, we solely refer to                    structural differentiae. That is, we explore patterns                    of text structure as determinants of class membership.                    Our starting point are tree-like text representations                    which induce feature vectors and tree kernels. These                    kernels are utilized in supervised learning based on                    cross-validation as a method of model selection (Hastie                    et al., 2001) by example of a corpus of press                    communication. For a subset of categories we show that                    classification can be performed very well by structural                    differentia only.
@Article{Mehler:Geibel:Pustylnikov:2007,
Author         = {Mehler, Alexander and Geibel, Peter and Abramov, Olga},
Title          = {Structural Classifiers of Text Types: Towards a Novel
Model of Text Representation},
Journal        = {Journal for Language Technology and Computational
Linguistics (JLCL)},
Volume         = {22},
Number         = {2},
Pages          = {51-66},
abstract       = {Texts can be distinguished in terms of their content,
function, structure or layout (Brinker, 1992; Bateman
et al., 2001; Joachims, 2002; Power et al., 2003).
These reference points do not open necessarily
orthogonal perspectives on text classification. As part
of explorative data analysis, text classification aims
at automatically dividing sets of textual objects into
classes of maximum internal homogeneity and external
heterogeneity. This paper deals with classifying texts
into text types whose instances serve more or less
homogeneous functions. Other than mainstream
approaches, which rely on the vector space model
(Sebastiani, 2002) or some of its descendants
(Baeza-Yates and Ribeiro-Neto, 1999) and, thus, on
content-related lexical features, we solely refer to
structural differentiae. That is, we explore patterns
of text structure as determinants of class membership.
Our starting point are tree-like text representations
which induce feature vectors and tree kernels. These
kernels are utilized in supervised learning based on
cross-validation as a method of model selection (Hastie
et al., 2001) by example of a corpus of press
communication. For a subset of categories we show that
classification can be performed very well by structural
differentia only.},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.154.604},
year           = 2007
}
• O. Abramov and A. Mehler, "Structural Differentiae of Text Types. A Quantitative Model," in Proceedings of the 31st Annual Conference of the German Classification Society on Data Analysis, Machine Learning, and Applications (GfKl), 2007, p. 655–662.
[BibTeX]

@InProceedings{Abramov:Mehler:2007:b,
Author         = {Abramov, Olga and Mehler, Alexander},
Title          = {Structural Differentiae of Text Types. A Quantitative
Model},
BookTitle      = {Proceedings of the 31st Annual Conference of the
German Classification Society on Data Analysis, Machine
Learning, and Applications (GfKl)},
Pages          = {655–662},
pdf            = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/gfkl.pdf},
website        = {http://www.springerprofessional.de/077---structural-differentiae-of-text-types--a-quantitative-model/1957362.html},
year           = 2007
}
• A. Mehler and R. Köhler, Aspects of Automatic Text Analysis: Festschrift in Honor of Burghard Rieger, A. Mehler and R. Köhler, Eds., Berlin/New York: Springer, 2007.
[BibTeX]

@Book{Mehler:Koehler:2007:a,
Author         = {Mehler, Alexander and Köhler, Reinhard},
Editor         = {Mehler, Alexander and Köhler, Reinhard},
Title          = {Aspects of Automatic Text Analysis: Festschrift in
Honor of Burghard Rieger},
Publisher      = {Springer},
Series         = {Studies in Fuzziness and Soft Computing},
pagetotal      = {464},
review         = {http://www.degruyter.com/view/j/zrs.2011.3.issue-2/zrs.2011.050/zrs.2011.050.xml},
review2        = {http://irsg.bcs.org/informer/Informer27.pdf},
website        = {http://www.springer.com/de/book/9783540375203},
year           = 2007
}
• A. Mehler and A. Storrer, "What are Ontologies Good For? Evaluating Terminological Ontologies in the Framework of Text Graph Classification," in Proceedings of OTT '06 – Ontologies in Text Technology: Approaches to Extract Semantic Knowledge from Structured Information, Osnabrück, 2007, pp. 11-18.
[BibTeX]

@InProceedings{Mehler:Storrer:2007,
Author         = {Mehler, Alexander and Storrer, Angelika},
Title          = {What are Ontologies Good For? Evaluating
Terminological Ontologies in the Framework of Text
Graph Classification},
BookTitle      = {Proceedings of OTT '06 – Ontologies in Text
Technology: Approaches to Extract Semantic Knowledge
from Structured Information},
Editor         = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
Series         = {Publications of the Institute of Cognitive Science
(PICS)},
Pages          = {11-18},
pdf            = {http://cogsci.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Storrer_abstract.pdf},
website        = {http://citeseer.uark.edu:8080/citeseerx/viewdoc/summary?doi=10.1.1.91.2979},
year           = 2007
}
• M. Stührenberg, D. Goecke, N. Diewald, A. Mehler, and I. Cramer, "Web-based Annotation of Anaphoric Relations and Lexical Chains," in Proceedings of the Linguistic Annotation Workshop, ACL 2007, 2007, p. 140–147.
[BibTeX]

@InProceedings{Stuehrenberg:Goecke:Diewald:Mehler:Cramer:2007:a,
Author         = {Stührenberg, Maik and Goecke, Daniela and Diewald,
Nils and Mehler, Alexander and Cramer, Irene},
Title          = {Web-based Annotation of Anaphoric Relations and
Lexical Chains},
BookTitle      = {Proceedings of the Linguistic Annotation Workshop, ACL
2007},
Pages          = {140–147},
pdf            = {http://www.aclweb.org/anthology/W07-1523},
website        = {https://www.researchgate.net/publication/234800610_Web-based_annotation_of_anaphoric_relations_and_lexical_chains},
year           = 2007
}
• R. Ferrer i Cancho, A. Mehler, O. Abramov, and A. Díaz-Guilera, "Correlations in the organization of large-scale syntactic dependency networks," in Proceedings of Graph-based Methods for Natural Language Processing (TextGraphs-2) at the Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL-HLT 2007), Rochester, New York, 2007, pp. 65-72.
[BibTeX]

@InProceedings{Ferrer:i:Cancho:Mehler:Pustylnikov:Diaz-Guilera:2007:a,
Author         = {Ferrer i Cancho, Ramon and Mehler, Alexander and
Abramov, Olga and Díaz-Guilera, Albert},
Title          = {Correlations in the organization of large-scale
syntactic dependency networks},
BookTitle      = {Proceedings of Graph-based Methods for Natural
Language Processing (TextGraphs-2) at the Annual
Conference of the North American Chapter of the
Association for Computational Linguistics (NAACL-HLT
2007), Rochester, New York},
Pages          = {65-72},
year           = 2007
}
• R. Gleim, A. Mehler, H. Eikmeyer, and H. Rieser, "Ein Ansatz zur Repräsentation und Verarbeitung großer Korpora multimodaler Daten," in Data Structures for Linguistic Resources and Applications. Proceedings of the Biennial GLDV Conference 2007, 11.–13. April, Universität Tübingen, Tübingen, 2007, pp. 275-284.
[BibTeX]

@InProceedings{Gleim:Mehler:Eikmeyer:Rieser:2007,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer,
Hans-Jürgen and Rieser, Hannes},
Title          = {Ein Ansatz zur Repr{\"a}sentation und Verarbeitung
gro{\ss}er Korpora multimodaler Daten},
BookTitle      = {Data Structures for Linguistic Resources and
Applications. Proceedings of the Biennial GLDV
Conference 2007, 11.–13. April, Universit{\"a}t
Tübingen},
Editor         = {Rehm, Georg and Witt, Andreas and Lemnitzer, Lothar},
Pages          = {275-284},
Publisher      = {Narr},
year           = 2007
}
• A. Mehler, "Aspectos Metodológicos da Semiótica Computacional," in Computação, Cognição e Semiose, J. Queiroz, R. Gudwin, and A. Loula, Eds., Federal University of Bahia: EDUFBA, 2007, pp. 145-157.
[BibTeX]

@InCollection{Mehler:2004:2007,
Author         = {Mehler, Alexander},
Title          = {Aspectos Metodológicos da Semiótica Computacional},
BookTitle      = {Computação, Cognição e Semiose},
Publisher      = {EDUFBA},
Editor         = {Queiroz, João and Gudwin, Ricardo and Loula, Angelo},
Pages          = {145-157},
Address        = {Federal University of Bahia},
year           = 2007
}
• A. Mehler, "Compositionality in Quantitative Semantics. A Theoretical Perspective on Text Mining," in Aspects of Automatic Text Analysis, A. Mehler and R. Köhler, Eds., Berlin/New York: Springer, 2007, pp. 139-167.
[Abstract] [BibTeX]

This chapter introduces a variant of the principle of                    compositionality in quantitative text semantics as an                    alternative to the bag-of-features approach. The                    variant includes effects of context-sensitive                    interpretation as well as processes of meaning                    constitution and change in the sense of usage-based                    semantics. Its starting point is a combination of                    semantic space modeling and text structure analysis.                    The principle is implemented by means of a hierarchical                    constraint satisfaction process which utilizes the                    notion of hierarchical text structure superimposed by                    graph-inducing coherence relations. The major                    contribution of the chapter is a conceptualization and                    formalization of the principle of compositionality in                    terms of semantic spaces which tackles some well known                    deficits of existing approaches. In particular this                    relates to the missing linguistic interpretability of                    statistical meaning representations. 
@InCollection{Mehler:2007:b,
Author         = {Mehler, Alexander},
Title          = {Compositionality in Quantitative Semantics. A
Theoretical Perspective on Text Mining},
BookTitle      = {Aspects of Automatic Text Analysis},
Publisher      = {Springer},
Editor         = {Mehler, Alexander and Köhler, Reinhard},
Series         = {Studies in Fuzziness and Soft Computing},
Pages          = {139-167},
abstract       = {This chapter introduces a variant of the principle of
compositionality in quantitative text semantics as an
alternative to the bag-of-features approach. The
variant includes effects of context-sensitive
interpretation as well as processes of meaning
constitution and change in the sense of usage-based
semantics. Its starting point is a combination of
semantic space modeling and text structure analysis.
The principle is implemented by means of a hierarchical
constraint satisfaction process which utilizes the
notion of hierarchical text structure superimposed by
graph-inducing coherence relations. The major
contribution of the chapter is a conceptualization and
formalization of the principle of compositionality in
terms of semantic spaces which tackles some well known
deficits of existing approaches. In particular this
relates to the missing linguistic interpretability of
statistical meaning representations. },
year           = 2007
}
• M. Dehmer and A. Mehler, "A New Method of Measuring the Similarity for a Special Class of Directed Graphs," Tatra Mountains Mathematical Publications, vol. 36, pp. 39-59, 2007.
[BibTeX]

@Article{Dehmer:Mehler:2007:a,
Author         = {Dehmer, Matthias and Mehler, Alexander},
Title          = {A New Method of Measuring the Similarity for a Special
Class of Directed Graphs},
Journal        = {Tatra Mountains Mathematical Publications},
Volume         = {36},
Pages          = {39-59},
website        = {https://www.researchgate.net/publication/228905939_A_new_method_of_measuring_similarity_for_a_special_class_of_directed_graphs},
year           = 2007
}
• P. Geibel, U. Krumnack, O. Abramov, A. Mehler, H. Gust, and K. Kühnberger, "Structure-Sensitive Learning of Text Types," in Proceedings of AI 2007: Advances in Artificial Intelligence, 20th Australian Joint Conference on Artificial Intelligence, Gold Coast, Australia, December 2-6, 2007, 2007, pp. 642-646.
[Abstract] [BibTeX]

In this paper, we discuss the structure based                    classification of documents based on their logical                    document structure, i.e., their DOM trees. We describe                    a method using predefined structural features and also                    four tree kernels suitable for such structures. We                    evaluate the methods experimentally on a corpus                    containing the DOM trees of newspaper articles, and on                    the well-known SUSANNE corpus. We will demonstrate                    that, for the two corpora, many text types can be                    learned based on structural features only.
@InProceedings{Geibel:Krumnack:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
Author         = {Geibel, Peter and Krumnack, Ulf and Abramov, Olga and
Mehler, Alexander and Gust, Helmar and Kühnberger,
Kai-Uwe},
Title          = {Structure-Sensitive Learning of Text Types},
BookTitle      = {Proceedings of AI 2007: Advances in Artificial
Intelligence, 20th Australian Joint Conference on
Artificial Intelligence, Gold Coast, Australia,
December 2-6, 2007},
Editor         = {Orgun, Mehmet A. and Thornton, John},
Volume         = {4830},
Series         = {Lecture Notes in Computer Science},
Pages          = {642-646},
Publisher      = {Springer},
abstract       = {In this paper, we discuss the structure based
classification of documents based on their logical
document structure, i.e., their DOM trees. We describe
a method using predefined structural features and also
four tree kernels suitable for such structures. We
evaluate the methods experimentally on a corpus
containing the DOM trees of newspaper articles, and on
the well-known SUSANNE corpus. We will demonstrate
that, for the two corpora, many text types can be
learned based on structural features only.},
year           = 2007
}

### 2006 (22)

• A. Mehler, R. Gleim, and M. Dehmer, "Towards Structure-Sensitive Hypertext Categorization," in Proceedings of the 29th Annual Conference of the German Classification Society, March 9-11, 2005, Universität Magdeburg, Berlin/New York, 2006, pp. 406-413.
[Abstract] [BibTeX]

Hypertext categorization is the task of automatically                    assigning category labels to hypertext units.                    Comparable to text categorization it stays in the area                    of function learning based on the bag-of-features                    approach. This scenario faces the problem of a                    many-to-many relation between websites and their hidden                    logical document structure. The paper argues that this                    relation is a prevalent characteristic which interferes                    any effort of applying the classical apparatus of                    categorization to web genres. This is confirmed by a                    threefold experiment in hypertext categorization. In                    order to outline a solution to this problem, the paper                    sketches an alternative method of unsupervised learning                    which aims at bridging the gap between statistical and                    structural pattern recognition (Bunke et al. 2001) in                    the area of web mining.
@InProceedings{Mehler:Gleim:Dehmer:2006,
Author         = {Mehler, Alexander and Gleim, Rüdiger and Dehmer,
Matthias},
Title          = {Towards Structure-Sensitive Hypertext Categorization},
BookTitle      = {Proceedings of the 29th Annual Conference of the
German Classification Society, March 9-11, 2005,
Universit{\"a}t Magdeburg},
Editor         = {Spiliopoulou, Myra and Kruse, Rudolf and Borgelt,
Christian and Nürnberger, Andreas and Gaul, Wolfgang},
Pages          = {406-413},
Publisher      = {Springer},
abstract       = {Hypertext categorization is the task of automatically
assigning category labels to hypertext units.
Comparable to text categorization it stays in the area
of function learning based on the bag-of-features
approach. This scenario faces the problem of a
many-to-many relation between websites and their hidden
logical document structure. The paper argues that this
relation is a prevalent characteristic which interferes
any effort of applying the classical apparatus of
categorization to web genres. This is confirmed by a
threefold experiment in hypertext categorization. In
order to outline a solution to this problem, the paper
sketches an alternative method of unsupervised learning
which aims at bridging the gap between statistical and
structural pattern recognition (Bunke et al. 2001) in
the area of web mining.},
year           = 2006
}
• A. Mehler, "A Network Perspective on Intertextuality," in Exact Methods in the Study of Language and Text, P. Grzybek and R. Köhler, Eds., Berlin/New York: De Gruyter, 2006, pp. 437-446.
[BibTeX]

@InCollection{Mehler:2006:d,
Author         = {Mehler, Alexander},
Title          = {A Network Perspective on Intertextuality},
BookTitle      = {Exact Methods in the Study of Language and Text},
Publisher      = {De Gruyter},
Editor         = {Grzybek, Peter and Köhler, Reinhard},
Series         = {Quantitative Linguistics},
Pages          = {437-446},
year           = 2006
}
• M. Dehmer, F. Emmert-Streib, A. Mehler, and J. Kilian, "Measuring the Structural Similarity of Web-based Documents: A Novel Approach," International Journal of Computational Intelligence, vol. 3, iss. 1, pp. 1-7, 2006.
[Abstract] [BibTeX]

Most known methods for measuring the structural                    similarity of document structures are based on, e.g.,                    tag measures, path metrics and tree measures in terms                    of their DOM-Trees. Other methods measures the                    similarity in the framework of the well known vector                    space model. In contrast to these we present a new                    approach to measuring the structural similarity of                    web-based documents represented by so called                    generalized trees which are more general than DOM-Trees                    which represent only directed rooted trees. We will                    design a new similarity measure for graphs representing                    web-based hypertext structures. Our similarity measure                    is mainly based on a novel representation of a graph as                    strings of linear integers, whose components represent                    structural properties of the graph. The similarity of                    two graphs is then defined as the optimal alignment of                    the underlying property strings. In this paper we apply                    the well known technique of sequence alignments to                    solve a novel and challenging problem: Measuring the                    structural similarity of generalized trees. More                    precisely, we first transform our graphs considered as                    high dimensional objects in linear structures. Then we                    derive similarity values from the alignments of the                    property strings in order to measure the structural                    similarity of generalized trees. Hence, we transform a                    graph similarity problem to a string similarity                    problem. We demonstrate that our similarity measure                    captures important structural information by applying                    it to two different test sets consisting of graphs                    representing web-based documents.
@Article{Dehmer:Emmert:Streib:Mehler:Kilian:2006,
Author         = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler,
Alexander and Kilian, Jürgen},
Title          = {Measuring the Structural Similarity of Web-based
Documents: A Novel Approach},
Journal        = {International Journal of Computational Intelligence},
Volume         = {3},
Number         = {1},
Pages          = {1-7},
abstract       = {Most known methods for measuring the structural
similarity of document structures are based on, e.g.,
tag measures, path metrics and tree measures in terms
of their DOM-Trees. Other methods measures the
similarity in the framework of the well known vector
space model. In contrast to these we present a new
approach to measuring the structural similarity of
web-based documents represented by so called
generalized trees which are more general than DOM-Trees
which represent only directed rooted trees. We will
design a new similarity measure for graphs representing
web-based hypertext structures. Our similarity measure
is mainly based on a novel representation of a graph as
strings of linear integers, whose components represent
structural properties of the graph. The similarity of
two graphs is then defined as the optimal alignment of
the underlying property strings. In this paper we apply
the well known technique of sequence alignments to
solve a novel and challenging problem: Measuring the
structural similarity of generalized trees. More
precisely, we first transform our graphs considered as
high dimensional objects in linear structures. Then we
derive similarity values from the alignments of the
property strings in order to measure the structural
similarity of generalized trees. Hence, we transform a
graph similarity problem to a string similarity
problem. We demonstrate that our similarity measure
captures important structural information by applying
it to two different test sets consisting of graphs
representing web-based documents.},
pdf            = {http://waset.org/publications/15928/measuring-the-structural-similarity-of-web-based-documents-a-novel-approach},
website        = {http://connection.ebscohost.com/c/articles/24839145/measuring-structural-similarity-web-based-documents-novel-approach},
year           = 2006
}
• A. Mehler and R. Gleim, "The Net for the Graphs – Towards Webgenre Representation for Corpus Linguistic Studies," in WaCky! Working Papers on the Web as Corpus, M. Baroni and S. Bernardini, Eds., Bologna: Gedit, 2006, pp. 191-224.
[BibTeX]

@InCollection{Mehler:Gleim:2006:b,
Author         = {Mehler, Alexander and Gleim, Rüdiger},
Title          = {The Net for the Graphs – Towards Webgenre
Representation for Corpus Linguistic Studies},
BookTitle      = {WaCky! Working Papers on the Web as Corpus},
Publisher      = {Gedit},
Editor         = {Baroni, Marco and Bernardini, Silvia},
Pages          = {191-224},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.510.4125},
year           = 2006
}
• R. Gleim, A. Mehler, and M. Dehmer, "Web Corpus Mining by Instance of Wikipedia," in Proceedings of the EACL 2006 Workshop on Web as Corpus, April 3-7, 2006, Trento, Italy, 2006, pp. 67-74.
[Abstract] [BibTeX]

Workshop organizer: Adam Kilgarriff
@InProceedings{Gleim:Mehler:Dehmer:2006:a,
Author         = {Gleim, Rüdiger and Mehler, Alexander and Dehmer,
Matthias},
Title          = {Web Corpus Mining by Instance of Wikipedia},
BookTitle      = {Proceedings of the EACL 2006 Workshop on Web as
Corpus, April 3-7, 2006, Trento, Italy},
Editor         = {Kilgariff, Adam and Baroni, Marco},
Pages          = {67-74},
abstract       = {Workshop organizer: Adam Kilgarriff},
pdf            = {http://www.aclweb.org/anthology/W06-1710},
website        = {http://pub.uni-bielefeld.de/publication/1773538},
year           = 2006
}
• A. Mehler, "In Search of a Bridge Between Network Analysis in Computational Linguistics and Computational Biology-A Conceptual Note.," in BIOCOMP, 2006, pp. 496-502.
[BibTeX]

@InProceedings{mehler:2006,
Author         = {Mehler, Alexander},
Title          = {In Search of a Bridge Between Network Analysis in
Computational Linguistics and Computational Biology-A
Conceptual Note.},
BookTitle      = {BIOCOMP},
Pages          = {496--502},
pdf            = {https://pdfs.semanticscholar.org/81aa/0b840ed413089d69908cff60628a92609ccd.pdf},
year           = 2006
}
• T. vor der Brück and S. Busemann, "Automatic Error Correction for Tree-Mapping Grammars," in Proceedings of KONVENS 2006, Konstanz, Germany, 2006, pp. 1-8.
[Abstract] [BibTeX]

Tree mapping grammars are used in natural language                    generation (NLG) to map non-linguistic input onto a                    derivation tree from which the target text can be                    trivially read off as the terminal yield. Such grammars                    may consist of a large number of rules. Finding errors                    is quite tedious and sometimes very time-consuming.                    Often the generation fails because the relevant input                    subtree is not specified correctly. This work describes                    a method to detect and correct wrong assignments of                    input subtrees to grammar categories by                    cross-validating grammar rules with the given input                    structures. The result is implemented in a grammar                    development workbench and helps accelerating the                    grammar writer's work considerably.
@InProceedings{vor:der:Brueck:Busemann:2006,
Author         = {vor der Brück, Tim and Busemann, Stephan},
Title          = {Automatic Error Correction for Tree-Mapping Grammars},
BookTitle      = {Proceedings of KONVENS 2006},
Pages          = {1--8},
abstract       = {Tree mapping grammars are used in natural language
generation (NLG) to map non-linguistic input onto a
derivation tree from which the target text can be
trivially read off as the terminal yield. Such grammars
may consist of a large number of rules. Finding errors
is quite tedious and sometimes very time-consuming.
Often the generation fails because the relevant input
subtree is not specified correctly. This work describes
a method to detect and correct wrong assignments of
input subtrees to grammar categories by
cross-validating grammar rules with the given input
structures. The result is implemented in a grammar
development workbench and helps accelerating the
grammar writer's work considerably.},
annote         = {editor: Miriam Butt},
isbn           = {3-89318-050-8},
month          = {October},
url            = {http://pi7.fernuni-hagen.de/brueck/papers/brueck-busemann-konvens06.pdf},
website        = {http://www.dfki.de/lt/publication_show.php?id=3602},
year           = 2006
}
• A. Kranstedt, A. Lücking, T. Pfeiffer, H. Rieser, and M. Staudacher, "Measuring and Reconstructing Pointing in Visual Contexts," in brandial '06 -- Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, Potsdam, 2006, pp. 82-89.
[Abstract] [BibTeX]

We describe an experiment to gather original data on                    geometrical aspects of pointing. In particular, we are                    focusing upon the concept of the pointing cone, a                    geometrical model of a pointing’s extension. In our                    setting we employed methodological and technical                    procedures of a new type to integrate data from                    annotations as well as from tracker recordings. We                    combined exact information on position and orientation                    with rater’s classifications. Our first results seem                    to challenge classical linguistic and philosophical                    theories of demonstration in that they advise to                    separate pointings from reference.
@InProceedings{Kranstedt:et:al:2006:c,
Author         = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer,
Thies and Rieser, Hannes and Staudacher, Marc},
Title          = {Measuring and Reconstructing Pointing in Visual
Contexts},
BookTitle      = {brandial '06 -- Proceedings of the 10th Workshop on
the Semantics and Pragmatics of Dialogue},
Editor         = {David Schlangen and Raquel Fernández},
Pages          = {82--89},
Publisher      = {Universit{\"a}tsverlag Potsdam},
abstract       = {We describe an experiment to gather original data on
geometrical aspects of pointing. In particular, we are
focusing upon the concept of the pointing cone, a
geometrical model of a pointing’s extension. In our
setting we employed methodological and technical
procedures of a new type to integrate data from
annotations as well as from tracker recordings. We
combined exact information on position and orientation
with rater’s classifications. Our first results seem
to challenge classical linguistic and philosophical
theories of demonstration in that they advise to
separate pointings from reference.},
keywords       = {own},
month          = {9},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.144.8472},
year           = 2006
}
• A. Lücking, H. Rieser, and M. Staudacher, "Multi-modal Integration for Gesture and Speech," in brandial '06 -- Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, Potsdam, 2006, pp. 106-113.
[Abstract] [BibTeX]

Demonstratives, in particular gestures that 'only'                    accompany speech, are not a big issue in current                    theories of grammar. If we deal with gestures, fixing                    their function is one big problem, the other one is how                    to integrate the representations originating from                    different channels and, ultimately, how to determine                    their composite meanings. The growing interest in                    multi-modal settings, computer simulations,                    human-machine interfaces and VR-applications increases                    the need for theories of multi-modal structures and                    events. In our workshop-contribution we focus on the                    integration of multi-modal contents and investigate                    different approaches dealing with this problem such as                    Johnston et al. (1997) and Johnston (1998), Johnston                    and Bangalore (2000), Chierchia (1995), Asher (2005),                    and Rieser (2005).
@InProceedings{Luecking:Rieser:Staudacher:2006:a,
Author         = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
Title          = {Multi-modal Integration for Gesture and Speech},
BookTitle      = {brandial '06 -- Proceedings of the 10th Workshop on
the Semantics and Pragmatics of Dialogue},
Editor         = {David Schlangen and Raquel Fernández},
Pages          = {106--113},
Publisher      = {Universit{\"a}tsverlag Potsdam},
abstract       = {Demonstratives, in particular gestures that 'only'
accompany speech, are not a big issue in current
theories of grammar. If we deal with gestures, fixing
their function is one big problem, the other one is how
to integrate the representations originating from
different channels and, ultimately, how to determine
their composite meanings. The growing interest in
multi-modal settings, computer simulations,
human-machine interfaces and VR-applications increases
the need for theories of multi-modal structures and
events. In our workshop-contribution we focus on the
integration of multi-modal contents and investigate
different approaches dealing with this problem such as
Johnston et al. (1997) and Johnston (1998), Johnston
and Bangalore (2000), Chierchia (1995), Asher (2005),
and Rieser (2005).},
keywords       = {own},
month          = {9},
year           = 2006
}
• A. Kranstedt, A. Lücking, T. Pfeiffer, H. Rieser, and I. Wachsmuth, "Deictic Object Reference in Task-oriented Dialogue," in Situated Communication, G. Rickheit and I. Wachsmuth, Eds., Berlin: De Gruyter Mouton, 2006, pp. 155-207.
[Abstract] [BibTeX]

This chapter presents an original approach towards a                    detailed understanding of the usage of pointing                    gestures accompanying referring expressions. This                    effort is undertaken in the context of human-machine                    interaction integrating empirical studies, theory of                    grammar and logics, and simulation techniques. In                    particular, we take steps to classify the role of                    pointing in deictic expressions and to model the                    focussed area of pointing gestures, the so-called                    pointing cone. This pointing cone serves as a central                    concept in a formal account of multi-modal integration                    at the linguistic speech-gesture interface as well as                    in a computational model of processing multi-modal                    deictic expressions.
@InCollection{Kranstedt:et:al:2006:b,
Author         = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer,
Thies and Rieser, Hannes and Wachsmuth, Ipke},
Title          = {Deictic Object Reference in Task-oriented Dialogue},
BookTitle      = {Situated Communication},
Publisher      = {De Gruyter Mouton},
Editor         = {Gert Rickheit and Ipke Wachsmuth},
Pages          = {155--207},
abstract       = {This chapter presents an original approach towards a
detailed understanding of the usage of pointing
gestures accompanying referring expressions. This
effort is undertaken in the context of human-machine
interaction integrating empirical studies, theory of
grammar and logics, and simulation techniques. In
particular, we take steps to classify the role of
pointing in deictic expressions and to model the
focussed area of pointing gestures, the so-called
pointing cone. This pointing cone serves as a central
concept in a formal account of multi-modal integration
at the linguistic speech-gesture interface as well as
in a computational model of processing multi-modal
deictic expressions.},
keywords       = {own},
website        = {http://pub.uni-bielefeld.de/publication/1894485},
year           = 2006
}
• A. Kranstedt, A. Lücking, T. Pfeiffer, H. Rieser, and I. Wachsmuth, "Deixis: How to Determine Demonstrated Objects Using a Pointing Cone," in Gesture in Human-Computer Interaction and Simulation, S. Gibet, N. Courty, and J. Kamp, Eds., Berlin: Springer, 2006, pp. 300-311.
[Abstract] [BibTeX]

We present a collaborative approach towards a detailed                    understanding of the usage of pointing gestures                    accompanying referring expressions. This effort is                    undertaken in the context of human-machine interaction                    integrating empirical studies, theory of grammar and                    logics, and simulation techniques. In particular, we                    attempt to measure the precision of the focussed area                    of a pointing gesture, the so-called pointing cone. The                    pointing cone serves as a central concept in a formal                    account of multi-modal integration at the linguistic                    speech-gesture interface as well as in a computational                    model of processing multi-modal deictic expressions.
@InCollection{Kranstedt:et:al:2006:a,
Author         = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer,
Thies and Rieser, Hannes and Wachsmuth, Ipke},
Title          = {Deixis: How to Determine Demonstrated Objects Using a
Pointing Cone},
BookTitle      = {Gesture in Human-Computer Interaction and Simulation},
Publisher      = {Springer},
Editor         = {Sylvie Gibet and Nicolas Courty and Jean-Francois Kamp},
Pages          = {300--311},
abstract       = {We present a collaborative approach towards a detailed
understanding of the usage of pointing gestures
accompanying referring expressions. This effort is
undertaken in the context of human-machine interaction
integrating empirical studies, theory of grammar and
logics, and simulation techniques. In particular, we
attempt to measure the precision of the focussed area
of a pointing gesture, the so-called pointing cone. The
pointing cone serves as a central concept in a formal
account of multi-modal integration at the linguistic
speech-gesture interface as well as in a computational
model of processing multi-modal deictic expressions.},
anote          = {6th International Gesture Workshop, Berder Island,
France, 2005, Revised Selected Papers},
keywords       = {own},
year           = 2006
}
• T. Pfeiffer, A. Kranstedt, and A. Lücking, "Sprach-Gestik Experimente mit IADE, dem Interactive Augmented Data Explorer," in Proceedings: Dritter Workshop Virtuelle und Erweiterte Realität der GI-Fachgruppe VR/AR, Koblenz, 2006.
[Abstract] [BibTeX]

Für die empirische Erforschung natürlicher                    menschlicher Kommunikation sind wir auf die Akquise und                    Auswertung umfangreicher Daten angewiesen. Die                    Modalitäten, über die sich Menschen ausdrücken                    können, sind sehr unterschiedlich - und genauso                    verschieden sind die Repräsentationen, mit denen                    sie für die Empirie verfügbar gemacht werden können.                    Für eine Untersuchung des Zeigeverhaltens bei der                    Referenzierung von Objekten haben wir mit IADE ein                    Framework für die Aufzeichnung, Analyse und                    Resimulation von Sprach-Gestik Daten entwickelt. Mit                    dessen Hilfe können wir für unsere Forschung                    entscheidende Fortschritte in der linguistischen                    Experimentalmethodik machen.
@InProceedings{Pfeiffer:Kranstedt:Luecking:2006,
Author         = {Pfeiffer, Thies and Kranstedt, Alfred and Lücking,
Andy},
Title          = {Sprach-Gestik Experimente mit IADE, dem Interactive
Augmented Data Explorer},
BookTitle      = {Proceedings: Dritter Workshop Virtuelle und Erweiterte
Realit{\"a}t der GI-Fachgruppe VR/AR},
abstract       = {Für die empirische Erforschung natürlicher
menschlicher Kommunikation sind wir auf die Akquise und
Auswertung umfangreicher Daten angewiesen. Die
Modalit{\"a}ten, über die sich Menschen ausdrücken
können, sind sehr unterschiedlich - und genauso
verschieden sind die Repr{\"a}sentationen, mit denen
sie für die Empirie verfügbar gemacht werden können.
Für eine Untersuchung des Zeigeverhaltens bei der
Referenzierung von Objekten haben wir mit IADE ein
Framework für die Aufzeichnung, Analyse und
Resimulation von Sprach-Gestik Daten entwickelt. Mit
dessen Hilfe können wir für unsere Forschung
entscheidende Fortschritte in der linguistischen
Experimentalmethodik machen.},
keywords       = {own},
website        = {http://pub.uni-bielefeld.de/publication/2426853},
year           = 2006
}
• A. Lücking, H. Rieser, and M. Staudacher, "SDRT and Multi-modal Situated Communication," in brandial '06 -- Proceedings of the 10th Workshop on the Semantics and Pragmatics of Dialogue, 2006, pp. 72-79.
[BibTeX]

@InProceedings{Luecking:Rieser:Stauchdacher:2006:b,
Author         = {Lücking, Andy and Rieser, Hannes and Staudacher, Marc},
Title          = {SDRT and Multi-modal Situated Communication},
BookTitle      = {brandial '06 -- Proceedings of the 10th Workshop on
the Semantics and Pragmatics of Dialogue},
Editor         = {David Schlangen and Raquel Fernández},
Pages          = {72--79},
Publisher      = {Universit{\"a}tsverlag Potsdam},
keywords       = {own},
month          = {9},
year           = 2006
}
• M. Z. Islam and M. Khan, "JKimmo: A Multilingual Computational Morphology Framework for PC-KIMMO," in 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh, 2006.
[Abstract] [BibTeX]

Morphological analysis is of fundamental interest in                    computational linguistics and language processing.                    While there are established morphological analyzers for                    mostly Western and a few other languages using                    localized interfaces, the same cannot be said for Indic                    and other less-studied languages for which language                    processing is just beginning. There are three primary                    obstacles to computational morphological analysis of                    these less-studied languages: the generative rules that                    define the language morphology, the morphological                    processor, and the computational interface that a                    linguist can use to experiment with the generative                    rules. In this paper, we present JKimmo, a multilingual                    morphological open-source framework that uses the                    PC-KIMMO two-level morphological processor and provides                    a localized interface for Bangla morphological                    analysis. We then apply Jkimmo to Bangla computational                    morphology, demonstrating both its recognition and                    generation capabilities. Jkimmo’s                    internationalization (i18n) frame-work allows easy                    localization in other languages as well, using a                    property file for the interface definitions and a                    transliteration scheme for the analysis.
@InProceedings{Zahurul:Khan:2006,
Author         = {Islam, Md. Zahurul and Khan, Mumit},
Title          = {JKimmo: A Multilingual Computational Morphology
Framework for PC-KIMMO},
BookTitle      = {9th International Conference on Computer and
Information Technology (ICCIT 2006), Dhaka, Bangladesh},
abstract       = {Morphological analysis is of fundamental interest in
computational linguistics and language processing.
While there are established morphological analyzers for
mostly Western and a few other languages using
localized interfaces, the same cannot be said for Indic
and other less-studied languages for which language
processing is just beginning. There are three primary
obstacles to computational morphological analysis of
these less-studied languages: the generative rules that
define the language morphology, the morphological
processor, and the computational interface that a
linguist can use to experiment with the generative
rules. In this paper, we present JKimmo, a multilingual
morphological open-source framework that uses the
PC-KIMMO two-level morphological processor and provides
a localized interface for Bangla morphological
analysis. We then apply Jkimmo to Bangla computational
morphology, demonstrating both its recognition and
generation capabilities. Jkimmo’s
internationalization (i18n) frame-work allows easy
localization in other languages as well, using a
property file for the interface definitions and a
transliteration scheme for the analysis.},
owner          = {zahurul},
timestamp      = {2011.08.02},
website        = {https://www.researchgate.net/publication/237728403_JKimmo_A_Multilingual_Computational_Morphology_Framework_for_PC-KIMMO},
year           = 2006
}
• T. Rownok, M. Z. Islam, and M. Khan, "Bangla Text Input and Rendering Support for Short Message Service on Mobile Devices," in 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh, 2006.
[Abstract] [BibTeX]

Technology is the most important thing that involve in                    our everyday life. It is involving in almost every                    aspect of life like communication, work, shopping,                    recreation etc. Communication through mobile devices is                    the most effective and easy way now a day. It is                    faster, easier and you can communicate whenever you                    want from any-where. Mobile messaging or short message                    service is one of the popular ways to communicate using                    mobile devices. It is a big challenge to write and                    display Bangla characters on mobile devices. In this                    paper, we describe a Bangla text input method and                    rendering support on mobile devices for short message                    service.
@InProceedings{Rownok:Zahurul:Khan:2006,
Author         = {Rownok, Tofazzal and Islam, Md. Zahurul and Khan,
Mumit},
Title          = {Bangla Text Input and Rendering Support for Short
Message Service on Mobile Devices},
BookTitle      = {9th International Conference on Computer and
Information Technology (ICCIT 2006), Dhaka, Bangladesh},
abstract       = {Technology is the most important thing that involve in
our everyday life. It is involving in almost every
aspect of life like communication, work, shopping,
recreation etc. Communication through mobile devices is
the most effective and easy way now a day. It is
faster, easier and you can communicate whenever you
want from any-where. Mobile messaging or short message
service is one of the popular ways to communicate using
mobile devices. It is a big challenge to write and
display Bangla characters on mobile devices. In this
paper, we describe a Bangla text input method and
rendering support on mobile devices for short message
service.},
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2006
}
• Y. Arafat, M. Z. Islam, and M. Khan, "Analysis and Observations From a Bangla news corpus," in 9th International Conference on Computer and Information Technology (ICCIT 2006), Dhaka, Bangladesh, 2006.
[BibTeX]

@InProceedings{Arafat:Zahurul:Khan:2006,
Author         = {Arafat, Yeasir and Islam, Md. Zahurul and Khan, Mumit},
Title          = {Analysis and Observations From a Bangla news corpus},
BookTitle      = {9th International Conference on Computer and
Information Technology (ICCIT 2006), Dhaka, Bangladesh},
owner          = {zahurul},
timestamp      = {2011.08.02},
year           = 2006
}
• R. Gleim, "HyGraph - Ein Framework zur Extraktion, Repräsentation und Analyse webbasierter Hypertextstrukturen," in Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Tagung 2005, Universität Bonn, Frankfurt a. M., 2006, pp. 42-53.
[BibTeX]

@InProceedings{Gleim:2006,
Author         = {Gleim, Rüdiger},
Title          = {HyGraph - Ein Framework zur Extraktion,
Repr{\"a}sentation und Analyse webbasierter
Hypertextstrukturen},
BookTitle      = {Sprachtechnologie, mobile Kommunikation und
linguistische Ressourcen. Beitr{\"a}ge zur GLDV-Tagung
2005, Universit{\"a}t Bonn},
Editor         = {Fisseni, Bernhard and Schmitz, Hans-Christian and
Schröder, Bernhard and Wagner, Petra},
Pages          = {42-53},
Publisher      = {Lang},
website        = {https://www.researchgate.net/publication/268294000_HyGraph__Ein_Framework_zur_Extraktion_Reprsentation_und_Analyse_webbasierter_Hypertextstrukturen},
year           = 2006
}
• A. Mehler, "Text Linkage in the Wiki Medium – A Comparative Study," in Proceedings of the EACL Workshop on New Text – Wikis and blogs and other dynamic text sources, April 3-7, 2006, Trento, Italy, 2006, pp. 1-8.
[Abstract] [BibTeX]

Workshop organizer: Jussi Karlgren
@InProceedings{Mehler:2006:c,
Author         = {Mehler, Alexander},
Title          = {Text Linkage in the Wiki Medium – A Comparative
Study},
BookTitle      = {Proceedings of the EACL Workshop on New Text – Wikis
and blogs and other dynamic text sources, April 3-7,
2006, Trento, Italy},
Editor         = {Karlgren, Jussi},
Pages          = {1-8},
abstract       = {Workshop organizer: Jussi Karlgren},
pdf            = {http://www.aclweb.org/anthology/W06-2801},
website        = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.6390},
year           = 2006
}
• A. Mehler, "Stratified Constraint Satisfaction Networks in Synergetic Multi-Agent Simulations of Language Evolution," in Artificial Cognition Systems, A. Loula, R. Gudwin, and J. Queiroz, Eds., Hershey: Idea Group Inc., 2006, pp. 140-174.
[Abstract] [BibTeX]

Ehedem = Mehler:2005:e
@InCollection{Mehler:2006:e,
Author         = {Mehler, Alexander},
Title          = {Stratified Constraint Satisfaction Networks in
Synergetic Multi-Agent Simulations of Language
Evolution},
BookTitle      = {Artificial Cognition Systems},
Publisher      = {Idea Group Inc.},
Editor         = {Loula, Angelo and Gudwin, Ricardo and Queiroz, João},
Pages          = {140-174},
abstract       = {Ehedem = Mehler:2005:e},
year           = 2006
}
• A. Mehler and L. Sichelschmidt, "Reconceptualizing Latent Semantic Analysis in Terms of Complex Network Theory. A Corpus-Linguistic Approach," in 2nd International Conference of the German Cognitive Linguistics Association – Theme Session: Cognitive-Linguistic Approaches: What can we gain by computational treatment of data? 5.-7. Oktober 2006, Ludwig-Maximilians-Universität München, 2006, pp. 23-26.
[BibTeX]

@InProceedings{Mehler:Sichelschmidt:2006,
Author         = {Mehler, Alexander and Sichelschmidt, Lorenz},
Title          = {Reconceptualizing Latent Semantic Analysis in Terms of
Complex Network Theory. A Corpus-Linguistic Approach},
BookTitle      = {2nd International Conference of the German Cognitive
Linguistics Association – Theme Session:
Cognitive-Linguistic Approaches: What can we gain by
computational treatment of data? 5.-7. Oktober 2006,
Ludwig-Maximilians-Universit{\"a}t München},
Pages          = {23-26},
editors        = {Alonge, Antonietta and Lönneker-Rodman, Birte},
year           = 2006
}
• A. Mehler, M. Dehmer, and R. Gleim, "Towards Logical Hypertext Structure - A Graph-Theoretic Perspective," in Proceedings of the Fourth International Workshop on Innovative Internet Computing Systems (I2CS '04), Berlin/New York, 2006, pp. 136-150.
[Abstract] [BibTeX]

Facing the retrieval problem according to the                    overwhelming set of documents online the adaptation of                    text categorization to web units has recently been                    pushed. The aim is to utilize categories of web sites                    and pages as an additional retrieval criterion. In this                    context, the bag-of-words model has been utilized just                    as HTML tags and link structures. In spite of promising                    results this adaptation stays in the framework of IR                    specific models since it neglects the content-based                    structuring inherent to hypertext units. This paper                    approaches hypertext modelling from the perspective of                    graph-theory. It presents an XML-based format for                    representing websites as hypergraphs. These hypergraphs                    are used to shed light on the relation of hypertext                    structure types and their web-based instances. We place                    emphasis on two characteristics of this relation: In                    terms of realizational ambiguity we speak of functional                    equivalents to the manifestation of the same structure                    type. In terms of polymorphism we speak of a single web                    unit which manifests different structure types. It is                    shown that polymorphism is a prevalent characteristic                    of web-based units. This is done by means of a                    categorization experiment which analyses a corpus of                    hypergraphs representing the structure and content of                    pages of conference websites. On this background we                    plead for a revision of text representation models by                    means of hypergraphs which are sensitive to the                    manifold structuring of web documents.
@InProceedings{Mehler:Dehmer:Gleim:2006,
Author         = {Mehler, Alexander and Dehmer, Matthias and Gleim,
Rüdiger},
Title          = {Towards Logical Hypertext Structure - A
Graph-Theoretic Perspective},
BookTitle      = {Proceedings of the Fourth International Workshop on
Innovative Internet Computing Systems (I2CS '04)},
Editor         = {Böhme, Thomas and Heyer, Gerhard},
Series         = {Lecture Notes in Computer Science 3473},
Pages          = {136-150},
Publisher      = {Springer},
abstract       = {Facing the retrieval problem according to the
overwhelming set of documents online the adaptation of
text categorization to web units has recently been
pushed. The aim is to utilize categories of web sites
and pages as an additional retrieval criterion. In this
context, the bag-of-words model has been utilized just
as HTML tags and link structures. In spite of promising
results this adaptation stays in the framework of IR
specific models since it neglects the content-based
structuring inherent to hypertext units. This paper
approaches hypertext modelling from the perspective of
graph-theory. It presents an XML-based format for
representing websites as hypergraphs. These hypergraphs
are used to shed light on the relation of hypertext
structure types and their web-based instances. We place
emphasis on two characteristics of this relation: In
terms of realizational ambiguity we speak of functional
equivalents to the manifestation of the same structure
type. In terms of polymorphism we speak of a single web
unit which manifests different structure types. It is
shown that polymorphism is a prevalent characteristic
of web-based units. This is done by means of a
categorization experiment which analyses a corpus of
hypergraphs representing the structure and content of
pages of conference websites. On this background we
plead for a revision of text representation models by
means of hypergraphs which are sensitive to the
manifold structuring of web documents.},
website        = {http://rd.springer.com/chapter/10.1007/11553762_14},
year           = 2006
}
• A. Mehler, "In Search of a Bridge between Network Analysis in Computational Linguistics and Computational Biology – A Conceptual Note," in Proceedings of the 2006 International Conference on Bioinformatics & Computational Biology (BIOCOMP '06), June 26, 2006, Las Vegas, USA, 2006, pp. 496-500.
[BibTeX]

@InProceedings{Mehler:2006:a,
Author         = {Mehler, Alexander},
Title          = {In Search of a Bridge between Network Analysis in
Computational Linguistics and Computational Biology –
A Conceptual Note},
BookTitle      = {Proceedings of the 2006 International Conference on
Bioinformatics \& Computational Biology (BIOCOMP '06),
June 26, 2006, Las Vegas, USA},
Editor         = {Arabnia, Hamid R. and Valafar, Homayoun},
Pages          = {496-500},
year           = 2006
}

### 2005 (13)

• M. Dehmer, F. Emmert-Streib, A. Mehler, J. Kilian, and M. Mühlhäuser, "Application of a similarity measure for graphs to web-based document structures," in Proceedings of VI. International Conference on Enformatika, Systems Sciences and Engineering, Budapest, Hungary, October 2005, International Academy of Sciences: Enformatika 8 (2005), 2005, pp. 77-81.
[Abstract] [BibTeX]

Due to the tremendous amount of information provided                    by the World Wide Web (WWW) developing methods for                    mining the structure of web-based documents is of                    considerable interest. In this paper we present a                    similarity measure for graphs representing web-based                    hypertext structures. Our similarity measure is mainly                    based on a novel representation of a graph as linear                    integer strings, whose components represent structural                    properties of the graph. The similarity of two graphs                    is then defined as the optimal alignment of the                    underlying property strings. In this paper we apply the                    well known technique of sequence alignments for solving                    a novel and challenging problem: Measuring the                    structural similarity of generalized trees. In other                    words: We first transform our graphs considered as high                    dimensional objects in linear structures. Then we                    derive similarity values from the alignments of the                    property strings in order to measure the structural                    similarity of generalized trees. Hence, we transform a                    graph similarity problem to a string similarity problem                    for developing a efficient graph similarity measure. We                    demonstrate that our similarity measure captures                    important structural information by applying it to two                    different test sets consisting of graphs representing                    web-based document structures.
@InProceedings{Dehmer:Emmert:Streib:Mehler:Kilian:Muehlhaeuser:2005,
Author         = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler,
Alexander and Kilian, Jürgen and Mühlh{\"a}user, Max},
Title          = {Application of a similarity measure for graphs to
web-based document structures},
BookTitle      = {Proceedings of VI. International Conference on
Enformatika, Systems Sciences and Engineering,
Budapest, Hungary, October 2005, International Academy
of Sciences: Enformatika 8 (2005)},
Pages          = {77-81},
abstract       = {Due to the tremendous amount of information provided
by the World Wide Web (WWW) developing methods for
mining the structure of web-based documents is of
considerable interest. In this paper we present a
similarity measure for graphs representing web-based
hypertext structures. Our similarity measure is mainly
based on a novel representation of a graph as linear
integer strings, whose components represent structural
properties of the graph. The similarity of two graphs
is then defined as the optimal alignment of the
underlying property strings. In this paper we apply the
well known technique of sequence alignments for solving
a novel and challenging problem: Measuring the
structural similarity of generalized trees. In other
words: We first transform our graphs considered as high
dimensional objects in linear structures. Then we
derive similarity values from the alignments of the
property strings in order to measure the structural
similarity of generalized trees. Hence, we transform a
graph similarity problem to a string similarity problem
for developing a efficient graph similarity measure. We
demonstrate that our similarity measure captures
important structural information by applying it to two
different test sets consisting of graphs representing
web-based document structures.},
pdf            = {http://waset.org/publications/15299/application-of-a-similarity-measure-for-graphs-to-web-based-document-structures},
website        = {https://www.researchgate.net/publication/238687277_Application_of_a_Similarity_Measure_for_Graphs_to_Web-based_Document_Structures},
year           = 2005
}
• A. Mehler, "Preliminaries to an Algebraic Treatment of Lexical Associations," in Learning and Extending Lexical Ontologies. Proceedings of the Workshop at the 22nd International Conference on Machine Learning (ICML '05), August 7-11, 2005, Universität Bonn, Germany, 2005, pp. 41-47.
[BibTeX]

@InProceedings{Mehler:2005:c,
Author         = {Mehler, Alexander},
Title          = {Preliminaries to an Algebraic Treatment of Lexical
Associations},
BookTitle      = {Learning and Extending Lexical Ontologies. Proceedings
of the Workshop at the 22nd International Conference on
Machine Learning (ICML '05), August 7-11, 2005,
Universit{\"a}t Bonn, Germany},
Editor         = {Biemann, Chris and Paa{\ss}, Gerhard},
Pages          = {41-47},
year           = 2005
}
• A. Mehler and R. Gleim, "Polymorphism in Generic Web Units. A corpus linguistic study," in Proceedings of Corpus Linguistics '05, July 14-17, 2005, University of Birmingham, Great Britian, 2005.
[Abstract] [BibTeX]

Corpus linguistics and related disciplines which focus                    on statistical analyses of textual units have                    substantial need for large corpora. More speciﬁcally,                    genre or register speciﬁc corpora are needed which                    allow studying variations in language use. Along with                    the incredible growth of the internet, the web became                    an important source of linguistic data. Of course, web                    corpora face the same problem of acquiring genre                    speciﬁc corpora. Amongst other things, web mining is                    a framework of methods for automatically assigning                    category labels to web units and thus may be seen as a                    solution to this corpus acquisition problem as far as                    genre categories are applied. The paper argues that                    this approach is faced with the problem of a                    many-to-many relation between expression units on the                    one hand and content or function units on the other                    hand. A quantitative study is performed which supports                    the argumentation that functions of web-based                    communication are very often concentrated on single web                    pages and thus interfere any effort of directly                    applying the classical apparatus of categorization on                    web page level. The paper outlines a two-level                    algorithm as an alternative approach to category                    assignment which is sensitive to genre speciﬁc                    structures and thus may be used to tackle the problem                    of acquiring genre speciﬁc corpora.
@InProceedings{Mehler:Gleim:2005:a,
Author         = {Mehler, Alexander and Gleim, Rüdiger},
Title          = {Polymorphism in Generic Web Units. A corpus linguistic
study},
BookTitle      = {Proceedings of Corpus Linguistics '05, July 14-17,
2005, University of Birmingham, Great Britian},
Volume         = {Corpus Linguistics Conference Series 1(1)},
abstract       = {Corpus linguistics and related disciplines which focus
on statistical analyses of textual units have
substantial need for large corpora. More speciﬁcally,
genre or register speciﬁc corpora are needed which
allow studying variations in language use. Along with
the incredible growth of the internet, the web became
an important source of linguistic data. Of course, web
corpora face the same problem of acquiring genre
speciﬁc corpora. Amongst other things, web mining is
a framework of methods for automatically assigning
category labels to web units and thus may be seen as a
solution to this corpus acquisition problem as far as
genre categories are applied. The paper argues that
this approach is faced with the problem of a
many-to-many relation between expression units on the
one hand and content or function units on the other
hand. A quantitative study is performed which supports
the argumentation that functions of web-based
communication are very often concentrated on single web
pages and thus interfere any effort of directly
applying the classical apparatus of categorization on
web page level. The paper outlines a two-level
algorithm as an alternative approach to category
assignment which is sensitive to genre speciﬁc
structures and thus may be used to tackle the problem
of acquiring genre speciﬁc corpora.},
issn           = {1747-9398},
pdf            = {http://www.birmingham.ac.uk/Documents/college-artslaw/corpus/conference-archives/2005-journal/Thewebasacorpus/AlexanderMehlerandRuedigerGleimCorpusLinguistics2005.pdf},
year           = 2005
}
• A. Mehler and C. Wolff, "Einleitung: Perspektiven und Positionen des Text Mining," Journal for Language Technology and Computational Linguistics (JLCL), vol. 20, iss. 1, pp. 1-18, 2005.
[Abstract] [BibTeX]

Beiträge zum Thema Text Mining beginnen vielfach                    mit dem Hinweis auf die enorme Zunahme online                    verfügbarer Dokumente, ob nun im Internet oder in                    Intranets (Losiewicz et al. 2000; Merkl 2000; Feldman                    2001; Mehler 2001; Joachims & Leopold 2002). Der                    hiermit einhergehenden „Informationsflut“ wird das                    Ungenügen des Information Retrieval (IR) bzw. seiner                    gängigen Verfahren der Informationsaufbereitung und                    Informationserschließung gegenübergestellt. Es                    wird bemängelt, dass sich das IR weitgehend darin                    erschöpft, Teilmengen von Textkollektionen auf                    Suchanfragen hin aufzufinden und in der Regel bloß                    listenförmig anzuordnen. Das auf diese Weise                    dargestellte Spannungsverhältnis von                    Informationsexplosion und Defiziten bestehender                    IR-Verfahren bildet den Hintergrund für die                    Entwicklung von Verfahren zur automatischen                    Verarbeitung textueller Einheiten, die sich stärker                    an den Anforderungen von Informationssuchenden                    orientieren. Anders ausgedrückt: Mit der Einführung                    der Neuen Medien wächst die Bedeutung                    digitalisierter Dokumente als Primärmedium für die                    Verarbeitung, Verbreitung und Verwaltung von                    Information in öffentlichen und betrieblichen                    Organisationen. Dabei steht wegen der Menge zu                    verarbeitender Einheiten die Alternative einer                    intellektuellen Dokumenterschließung nicht zur                    Verfügung. Andererseits wachsen die Anforderung an                    eine automatische Textanalyse, der das klassische IR                    nicht gerecht wird. Der Mehrzahl der hiervon                    betroffenen textuellen Einheiten fehlt die explizite                    Strukturiertheit formaler Datenstrukturen. Vielmehr                    weisen sie je nach Text- bzw. Dokumenttyp ganz                    unterschiedliche Strukturierungsgrade auf. Dabei                    korreliert die Flexibilität der Organisationsziele                    negativ mit dem Grad an explizierter Strukturiertheit                    und positiv mit der Anzahl jener Texte und Texttypen                    (E-Mails, Memos, Expertisen, technische Dokumentationen                    etc.), die im Zuge ihrer Realisierung produziert bzw.                    rezipiert werden. Vor diesem Hintergrund entsteht ein                    Bedarf an Texttechnologien, die ihren Benutzern nicht                    nur „intelligente“ Schnittstellen zur Textrezeption                    anbieten, sondern zugleich auf inhaltsorientierte                    Textanalysen zielen, um auf diese Weise                    aufgabenrelevante Daten explorieren und kontextsensitiv                    aufbereiten zu helfen. Das Text Mining ist mit dem                    Versprechen verbunden, eine solche Technologie                    darzustellen bzw. sich als solche zu entwickeln. Dieser                    einheitlichen Problembeschreibung stehen konkurrierende                    Textmining-Spezifikationen gegenüber, was bereits die                    Vielfalt der Namensgebungen verdeutlicht. So finden                    sich neben der Bezeichnung Text Mining (Joachims &                    Leopold 2002; Tan 1999) die Alternativen • Text Data                    Mining (Hearst 1999b; Merkl 2000), • Textual Data                    Mining (Losiewicz et al. 2000), • Text Knowledge                    Engineering (Hahn & Schnattinger 1998), Knowledge                    Discovery in Texts (Kodratoff 1999) oder Knowledge                    Discovery in Textual Databases (Feldman & Dagan 1995).                    Dabei lässt bereits die Namensgebung erkennen, dass                    es sich um Analogiebildungen zu dem (nur unwesentlich                    älteren) Forschungsgebiet des Data Mining (DM; als                    Bestandteil des Knowledge Discovery in Databases –                    KDD) handelt. Diese Namensvielfalt findet ihre                    Entsprechung in widerstreitenden Aufgabenzuweisungen.                    So setzt beispielsweise Sebastiani (2002)                    Informationsextraktion und Text Mining weitgehend                    gleich, wobei er eine Schnittmenge zwischen Text Mining                    und Textkategorisierung ausmacht (siehe auch Dörre et                    al. 1999). Demgegenüber betrachten Kosala & Blockeel                    (2000) Informationsextraktion und Textkategorisierung                    lediglich als Teilbereiche des ihrer Ansicht nach                    umfassenderen Text Mining, während Hearst (1999a)                    im Gegensatz hierzu Informationsextraktion und                    Textkategorisierung explizit aus dem Bereich des                    explorativen Text Mining ausschließt.
@Article{Mehler:Wolff:2005:b,
Author         = {Mehler, Alexander and Wolff, Christian},
Title          = {Einleitung: Perspektiven und Positionen des Text
Mining},
Journal        = {Journal for Language Technology and Computational
Linguistics (JLCL)},
Volume         = {20},
Number         = {1},
Pages          = {1-18},
abstract       = {Beitr{\"a}ge zum Thema Text Mining beginnen vielfach
mit dem Hinweis auf die enorme Zunahme online
verfügbarer Dokumente, ob nun im Internet oder in
Intranets (Losiewicz et al. 2000; Merkl 2000; Feldman
2001; Mehler 2001; Joachims \& Leopold 2002). Der
hiermit einhergehenden „Informationsflut“ wird das
Ungenügen des Information Retrieval (IR) bzw. seiner
g{\"a}ngigen Verfahren der Informationsaufbereitung und
Informationserschlie{\ss}ung gegenübergestellt. Es
wird bem{\"a}ngelt, dass sich das IR weitgehend darin
erschöpft, Teilmengen von Textkollektionen auf
Suchanfragen hin aufzufinden und in der Regel blo{\ss}
listenförmig anzuordnen. Das auf diese Weise
dargestellte Spannungsverh{\"a}ltnis von
Informationsexplosion und Defiziten bestehender
IR-Verfahren bildet den Hintergrund für die
Entwicklung von Verfahren zur automatischen
Verarbeitung textueller Einheiten, die sich st{\"a}rker
an den Anforderungen von Informationssuchenden
orientieren. Anders ausgedrückt: Mit der Einführung
der Neuen Medien w{\"a}chst die Bedeutung
digitalisierter Dokumente als Prim{\"a}rmedium für die
Verarbeitung, Verbreitung und Verwaltung von
Information in öffentlichen und betrieblichen
Organisationen. Dabei steht wegen der Menge zu
verarbeitender Einheiten die Alternative einer
intellektuellen Dokumenterschlie{\ss}ung nicht zur
Verfügung. Andererseits wachsen die Anforderung an
eine automatische Textanalyse, der das klassische IR
nicht gerecht wird. Der Mehrzahl der hiervon
betroffenen textuellen Einheiten fehlt die explizite
Strukturiertheit formaler Datenstrukturen. Vielmehr
weisen sie je nach Text- bzw. Dokumenttyp ganz
korreliert die Flexibilit{\"a}t der Organisationsziele
negativ mit dem Grad an explizierter Strukturiertheit
und positiv mit der Anzahl jener Texte und Texttypen
(E-Mails, Memos, Expertisen, technische Dokumentationen
etc.), die im Zuge ihrer Realisierung produziert bzw.
rezipiert werden. Vor diesem Hintergrund entsteht ein
Bedarf an Texttechnologien, die ihren Benutzern nicht
nur „intelligente“ Schnittstellen zur Textrezeption
anbieten, sondern zugleich auf inhaltsorientierte
Textanalysen zielen, um auf diese Weise
aufgabenrelevante Daten explorieren und kontextsensitiv
aufbereiten zu helfen. Das Text Mining ist mit dem
Versprechen verbunden, eine solche Technologie
darzustellen bzw. sich als solche zu entwickeln. Dieser
einheitlichen Problembeschreibung stehen konkurrierende
Textmining-Spezifikationen gegenüber, was bereits die
Vielfalt der Namensgebungen verdeutlicht. So finden
sich neben der Bezeichnung Text Mining (Joachims \&
Leopold 2002; Tan 1999) die Alternativen • Text Data
Mining (Hearst 1999b; Merkl 2000), • Textual Data
Mining (Losiewicz et al. 2000), • Text Knowledge
Engineering (Hahn \& Schnattinger 1998), Knowledge
Discovery in Texts (Kodratoff 1999) oder Knowledge
Discovery in Textual Databases (Feldman \& Dagan 1995).
Dabei l{\"a}sst bereits die Namensgebung erkennen, dass
es sich um Analogiebildungen zu dem (nur unwesentlich
{\"a}lteren) Forschungsgebiet des Data Mining (DM; als
Bestandteil des Knowledge Discovery in Databases –
KDD) handelt. Diese Namensvielfalt findet ihre
Entsprechung in widerstreitenden Aufgabenzuweisungen.
So setzt beispielsweise Sebastiani (2002)
Informationsextraktion und Text Mining weitgehend
gleich, wobei er eine Schnittmenge zwischen Text Mining
und Textkategorisierung ausmacht (siehe auch Dörre et
al. 1999). Demgegenüber betrachten Kosala \& Blockeel
(2000) Informationsextraktion und Textkategorisierung
lediglich als Teilbereiche des ihrer Ansicht nach
umfassenderen Text Mining, w{\"a}hrend Hearst (1999a)
im Gegensatz hierzu Informationsextraktion und
Textkategorisierung explizit aus dem Bereich des
explorativen Text Mining ausschlie{\ss}t.},
website        = {http://epub.uni-regensburg.de/6844/},
year           = 2005
}
• A. Mehler, Korpuslinguistik, A. Mehler, Ed., , 2005, vol. 20(2).
[BibTeX]

@Book{Mehler:2005:e,
Author         = {Mehler, Alexander},
Editor         = {Mehler, Alexander},
Title          = {Korpuslinguistik},
Volume         = {20(2)},
Series         = {Journal for Language Technology and Computational
Linguistics (JLCL)},
pagetotal      = {97},
website        = {http://www.jlcl.org/2005_Heft2/LDV_Forum_Band_20_Heft_2.pdf},
year           = 2005
}
• A. Mehler, M. Dehmer, and R. Gleim, "Zur Automatischen Klassifikation von Webgenres," in Sprachtechnologie, mobile Kommunikation und linguistische Ressourcen. Beiträge zur GLDV-Frühjahrstagung '05, 10. März – 01. April 2005, Universität Bonn, Frankfurt a. M., 2005, pp. 158-174.
[BibTeX]

@InProceedings{Mehler:Dehmer:Gleim:2005,
Author         = {Mehler, Alexander and Dehmer, Matthias and Gleim,
Rüdiger},
Title          = {Zur Automatischen Klassifikation von Webgenres},
BookTitle      = {Sprachtechnologie, mobile Kommunikation und
linguistische Ressourcen. Beitr{\"a}ge zur
GLDV-Frühjahrstagung '05, 10. M{\"a}rz – 01. April
2005, Universit{\"a}t Bonn},
Editor         = {Fisseni, Bernhard and Schmitz, Hans-Christina and
Schröder, Bernhard and Wagner, Petra},
Pages          = {158-174},
Publisher      = {Lang},
year           = 2005
}
• A. Mehler and C. Wolff, Text Mining, A. Mehler and C. Wolff, Eds., GSCL, 2005, vol. 20(1).
[BibTeX]

@Book{Mehler:Wolff:2005:a,
Author         = {Mehler, Alexander and Wolff, Christian},
Editor         = {Mehler, Alexander and Wolff, Christian},
Title          = {Text Mining},
Publisher      = {GSCL},
Volume         = {20(1)},
Series         = {Journal for Language Technology and Computational
Linguistics (JLCL)},
image          = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextM