Total: 446
2025
2025.
DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing. Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities.
accepted.
BibTeX
@inproceedings{Abrami:et:al:2025,
author = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
title = {DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing},
year = {2025},
booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities},
numpages = {3},
location = {Bielefeld, Germany},
series = {DHd 2025},
keywords = {duui},
note = {accepted}
}
2024
2024.
Towards New Data Spaces for the Study of Multiple Documents with
Va.Si.Li-Lab: A Conceptual Analysis. In: Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains, 259–303.
Springer Nature Switzerland.
BibTeX
@inbook{Mehler:et:al:2024:a,
author = {Mehler, Alexander and Bagci, Mevl{\"u}t and Schrottenbacher, Patrick
and Henlein, Alexander and Konca, Maxim and Abrami, Giuseppe and B{\"o}nisch, Kevin
and Stoeckel, Manuel and Spiekermann, Christian and Engel, Juliane},
editor = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
and Mehler, Alexander},
title = {Towards New Data Spaces for the Study of Multiple Documents with
Va.Si.Li-Lab: A Conceptual Analysis},
booktitle = {Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains},
year = {2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {259--303},
abstract = {The constitution of multiple documents has so far been studied
essentially as a process in which a single learner consults a
number (of segments) of different documents in the context of
the task at hand in order to construct a mental model for the
purpose of completing the task. As a result of this research focus,
the constitution of multiple documents appears predominantly as
a monomodal, non-interactive process in which mainly textual units
are studied, supplemented by images, text-image relations and
comparable artifacts. This approach is reflected in the contextual
fixity of the research design, in which the learners under study
search for information using suitably equipped computers. If,
on the other hand, we consider the openness of multi-agent learning
situations, this scenario lacks the aspects of interactivity,
contextual openness and, above all, the multimodality of information
objects, information processing and information exchange. This
is where the chapter comes in. It describes Va.Si.Li-Lab as an
instrument for multimodal measurement for studying and modeling
multiple documents in the context of interactive learning in a
multi-agent environment. To this end, the chapter places Va.Si.Li-Lab
in the spectrum of evolutionary approaches that vary the combination
of human and machine innovation and selection. It also combines
the requirements of multimodal representational learning with
various aspects of contextual plasticity to prepare Va.Si.Li-Lab
as a system that can be used for experimental research. The chapter
is conceptual in nature, designing a system of requirements using
the example of Va.Si.Li-Lab to outline an experimental environment
in which the study of Critical Online Reasoning (COR) as a group
process becomes possible. Although the chapter illustrates some
of these requirements with realistic data from the field of simulation-based
learning, the focus is still conceptual rather than experimental,
hypothesis-driven. That is, the chapter is concerned with the
design of a technology for future research into COR processes.},
isbn = {978-3-031-69510-0},
doi = {10.1007/978-3-031-69510-0_12},
url = {https://doi.org/10.1007/978-3-031-69510-0_12}
}
2024.
Visualizing Domain-specific and Generic Critical Online Reasoning
Related Structures of Online Texts: A Hybrid Approach. In: Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains, 195–239.
Springer Nature Switzerland.
BibTeX
@inbook{Konca:et:al:2024:a,
author = {Konca, Maxim and Mehler, Alexander and L{\"u}cking, Andy and Baumartz, Daniel},
editor = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
and Mehler, Alexander},
title = {Visualizing Domain-specific and Generic Critical Online Reasoning
Related Structures of Online Texts: A Hybrid Approach},
booktitle = {Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains},
year = {2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {195--239},
abstract = {Besides ``traditional'' educational media, young professionals
in higher education use the Internet to obtain information. To
utilize their online research in professional contexts, they critically
evaluate the information they access and its sources. One dimension
of this evaluation is an assessment of the linguistic state of
the online sources, either implicitly or explicitly. This computational
educational linguistic study applies methods from computational
linguistics to online sources visited by young professionals from
three fields (law students, teacher trainees, and medicine student)
and develops partly novel visualizations that allow to quickly
discover similarities as well as differences between multi-heterogeneous
Internet sources, that is, sources that exhibit various topics,
genres, and textual structure, among others. The visualizations
also allow a comparison of search behaviour between different
professional fields. In this way, we found that (1) genre classification
has a significant impact on reliability scores, (2) young professionals'
search approaches vary by their professional field, and, (3) the
best predictor of reliability is indeed the linguistic profile
of an online source.},
isbn = {978-3-031-69510-0},
doi = {10.1007/978-3-031-69510-0_10},
url = {https://doi.org/10.1007/978-3-031-69510-0_10}
}
2024.
Students’, Graduates’ and Young Professionals’ Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains.
Springer Cham.
BibTeX
@book{Zlatkin-Troitschanskaia:et:al:2024,
title = {Students’, Graduates’ and Young Professionals’ Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains},
editor = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
and Mehler, Alexander},
isbn = {9783031695100},
url = {http://dx.doi.org/10.1007/978-3-031-69510-0},
doi = {10.1007/978-3-031-69510-0},
publisher = {Springer Cham},
year = {2024},
abstract = {This book addresses the topic of online information for everyday
personal and professional use by students, graduates, and young
professionals. It focuses on the development of the job-related
use of online information by young professionals in their practical
phases of education (traineeship/practical year) in the domains
of law, teaching, and medicine. The research conducted in this
context investigates the general and domain-specific use of online
resources in educational contexts and examines the effectiveness
of an innovative digital training approach in enhancing skills
required for the competent use of online information. For this
purpose, the presented research uses a yet unprecedented approach
of data triangulation, in which self-rated data, digitally and
in vivo assessed response process data and expert ratings are
integrated into a theoretically founded assessment framework and
are examined from various interdisciplinary perspectives with
different analysis methods. Overall, this work addresses key research
questions related to the use of online information in practical
tasks as well as to the impact of digital training. It provides
in-depth multidisciplinary analyses of multimodal processes and
performance data, allowing implications equally relevant for practitioners,
policymakers, and researchers in the field of education.}
}
2024.
BA Thesis: Identifying toxic behaviour in online games.
Goethe University.
BibTeX
@bathesis{schrottenbacher:2024,
author = {Patrick Schrottenbacher},
title = {Identifying toxic behaviour in online games},
institution = {Goethe University},
pages = {35},
year = {2024},
url = {https://publikationen.ub.uni-frankfurt.de/files/81676/Toxic_video_game_classification.pdf}
repository = {https://github.com/TheBv/toxic-video-games-gnn}
}
2024.
Virtually Restricting Modalities in Interactions: Va.Si.Li-Lab
for Experimental Multimodal Research. Proceedings of the 2nd International Symposium on Multimodal Communication
(MMSYM 2024), Frankfurt, 25-27 September 2024, 96–97.
BibTeX
@inproceedings{Henlein:Luecking:Mehler:2024,
title = {Virtually Restricting Modalities in Interactions: Va.Si.Li-Lab
for Experimental Multimodal Research},
author = {Henlein, Alexander and L{\"u}cking, Andy and Mehler, Alexander},
booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
(MMSYM 2024), Frankfurt, 25-27 September 2024},
pages = {96--97},
year = {2024},
pdf = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf}
}
2024.
The Gesture–Prosody Link in Multimodal Grammar. Proceedings of the 2nd International Symposium on Multimodal Communication
(MMSYM 2024), Frankfurt, 25-27 September 2024, 128–129.
BibTeX
@inproceedings{Luecking:Mehler:Henlein:2024,
title = {The Gesture–Prosody Link in Multimodal Grammar},
author = {L{\"u}cking, Andy and Mehler, Alexander and Henlein, Alexander},
booktitle = {Proceedings of the 2nd International Symposium on Multimodal Communication
(MMSYM 2024), Frankfurt, 25-27 September 2024},
pages = {128--129},
year = {2024},
pdf = {http://mmsym.org/wp-content/uploads/2024/09/BookOfAbstractsMMSYM2024-3.pdf}
}
2024.
Swann's name: Towards a Dialogical Brain Semantics. Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue.
BibTeX
@inproceedings{Ginzburg:Eliasmith:Luecking:2024-swann,
title = {Swann's name: {Towards} a Dialogical Brain Semantics},
author = {Ginzburg, Jonathan and Eliasmith, Chris and Lücking, Andy},
year = {2024},
booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
series = {SemDial'24 -- TrentoLogue},
location = {Università di Trento, Palazzo Piomarta, Rovereto},
url = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-3007/},
pdf = {http://semdial.org/anthology/Z24-Ginzburg_semdial_0007.pdf}
}
2024.
The Linguistic Interpretation of Non-emblematic Gestures Must
be agreed in Dialogue: Combining Perceptual Classifiers and Grounding/Clarification
Mechanisms. Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue.
BibTeX
@inproceedings{Luecking:Mehler:Henlein:2024-classifier,
title = {The Linguistic Interpretation of Non-emblematic Gestures Must
be agreed in Dialogue: Combining Perceptual Classifiers and Grounding/Clarification
Mechanisms},
author = {Lücking, Andy and Mehler, Alexander and Henlein, Alexander},
year = {2024},
booktitle = {Proceedings of the 28th Workshop on The Semantics and Pragmatics of Dialogue},
series = {SemDial'24 -- TrentoLogue},
location = {Università di Trento, Palazzo Piomarta, Rovereto},
url = {https://www.semdial.org/anthology/papers/Z/Z24/Z24-4031/},
pdf = {http://semdial.org/anthology/Z24-Lucking_semdial_0031.pdf}
}
Sep., 2024.
On German verb sense disambiguation: A three-part approach based
on linking a sense inventory (GermaNet) to a corpus through annotation
(TGVCorp) and using the corpus to train a VSD classifier (TTvSense). Journal of Language Modelling, 12(1):155–212.
BibTeX
@article{Mattern:Hemati:Lücking:Mehler:2024,
author = {Mattern, Dominik and Hemati, Wahed and Lücking, Andy and Mehler, Alexander},
title = {On German verb sense disambiguation: A three-part approach based
on linking a sense inventory (GermaNet) to a corpus through annotation
(TGVCorp) and using the corpus to train a VSD classifier (TTvSense)},
abstractnote = {We develop a three-part approach to Verb Sense Disambiguation (VSD) in German. After considering a set of lexical resources and corpora, we arrive at a statistically motivated selection of a subset of verbs and their senses from GermaNet. This sub-inventory is then used to disambiguate the occurrences of the corresponding verbs in a corpus resulting from the union of TüBa-D/Z, Salsa, and E-VALBU. The corpus annotated in this way is called TGVCorp. It is used in the third part of the paper for training a classifier for VSD and for its comparative evaluation with a state-of-the-art approach in this research area, namely EWISER. Our simple classifier outperforms the transformer-based approach on the same data in both accuracy and speed in German but not in English and we discuss possible reasons.},
journal = {Journal of Language Modelling},
volume = {12},
number = {1},
year = {2024},
month = {Sep.},
pages = {155–212},
url = {https://jlm.ipipan.waw.pl/index.php/JLM/article/view/356}
}
2024.
Finding Needles in Emb(a)dding Haystacks: Legal Document Retrieval
via Bagging and SVR Ensembles. Proceedings of the 2nd Legal Information Retrieval meets Artificial
Intelligence Workshop LIRAI 2024.
accepted.
BibTeX
@inproceedings{Boenisch:Mehler:2024,
title = {Finding Needles in Emb(a)dding Haystacks: Legal Document Retrieval
via Bagging and SVR Ensembles},
author = {B\"{o}nisch, Kevin and Mehler, Alexander},
year = {2024},
booktitle = {Proceedings of the 2nd Legal Information Retrieval meets Artificial
Intelligence Workshop LIRAI 2024},
location = {Poznan, Poland},
publisher = {CEUR-WS.org},
address = {Aachen, Germany},
series = {CEUR Workshop Proceedings},
note = {accepted},
abstract = {We introduce a retrieval approach leveraging Support Vector Regression
(SVR) ensembles, bootstrap aggregation (bagging), and embedding
spaces on the German Dataset for Legal Information Retrieval (GerDaLIR).
By conceptualizing the retrieval task in terms of multiple binary
needle-in-a-haystack subtasks, we show improved recall over the
baselines (0.849 > 0.803 | 0.829) using our voting ensemble, suggesting
promising initial results, without training or fine-tuning any
deep learning models. Our approach holds potential for further
enhancement, particularly through refining the encoding models
and optimizing hyperparameters.},
keywords = {legal information retrieval, support vector regression, word embeddings, bagging ensemble}
}
2024.
Geo-spatial hypertext in virtual reality: mapping and navigating
global news event spaces. New Review of Hypermedia and Multimedia, 0(0):1–30.
BibTeX
@article{Schrottenbacher:et:al:2024,
author = {Schrottenbacher, Patrick and Mehler, Alexander and Berg, Theresa
and Hustedt, Jasper and Gagel, Julian and Lüttig, Timo and Abrami, Giuseppe},
title = {Geo-spatial hypertext in virtual reality: mapping and navigating
global news event spaces},
journal = {New Review of Hypermedia and Multimedia},
volume = {0},
number = {0},
pages = {1--30},
year = {2024},
publisher = {Taylor \& Francis},
doi = {10.1080/13614568.2024.2383601},
url = {https://doi.org/10.1080/13614568.2024.2383601},
eprint = {https://doi.org/10.1080/13614568.2024.2383601},
abstract = {Every day, a myriad of events take place that are documented and
shared online through news articles from a variety of sources.
As a result, as users navigate the Web, the volume of data can
lead to information overload, making it difficult to find specific
details about an event. We present News in Time and Space (NiTS)
to address this issue: NiTS is a fully immersive system integrated
into Va.Si.Li-Lab that organises textual information in a geospatial
hypertext system in virtual reality. With NiTS, users can visualise,
filter and interact with information currently based on GDELT
on a virtual globe providing document networks to analyse global
events and trends. The article describes NiTS, its event semantics
and architecture. It evaluates NiTS in comparison to a classic
search engine website, extended by NiTSs information filtering
capabilities to make it comparable. Our comparison with this website
technology, which is directly linked to the user's usage habits,
shows that NiTS enables comparable information exploration even
if the users have little or no experience with VR. That is, we
observe an equivalent search result behaviour, but with the advantage
that VR allows users to get their results with a higher level
of usability without distracting them from their tasks. Through
its integration with Va.Si.Li-Lab, a simulation-based learning
environment, NiTS can be used in simulations of learning processes
aimed at studying critical online reasoning, where Va.Si.Li-Lab
guarantees that this can be done in relation to individual or
groups of learners.}
}
2024.
Viki LibraRy: Collaborative Hypertext Browsing and Navigation
in Virtual Reality. New Review of Hypermedia and Multimedia, 0(0):1–31.
BibTeX
@article{Boenisch:et:al:2024:b,
author = {B\"{o}nisch, Kevin and Mehler, Alexander and Babbili, Shaduan
and Heinrich, Yannick and Stephan, Philipp and Abrami, Giuseppe},
abstract = {We present Viki LibraRy, a dynamically built library in virtual
reality (VR) designed to visualize hypertext systems, with an
emphasis on collaborative interaction and spatial immersion. Viki
LibraRy goes beyond traditional methods of text distribution by
providing a platform where users can share, process, and engage
with textual information. It operates at the interface of VR,
collaborative learning and spatial data processing to make reading
tangible and memorable in a spatially mediated way. The article
describes the building blocks of Viki LibraRy, its underlying
architecture, and several use cases. It evaluates Viki LibraRy
in comparison to a conventional web interface for text retrieval
and reading. The article shows that Viki LibraRy provides users
with spatial references for structuring their recall, so that
they can better remember consulted texts and their meta-information
(e.g. in terms of subject areas and content categories)},
title = {{Viki LibraRy: Collaborative Hypertext Browsing and Navigation
in Virtual Reality}},
journal = {New Review of Hypermedia and Multimedia},
volume = {0},
number = {0},
pages = {1--31},
year = {2024},
publisher = {Taylor \& Francis},
doi = {10.1080/13614568.2024.2383581},
url = {https://doi.org/10.1080/13614568.2024.2383581},
eprint = {https://doi.org/10.1080/13614568.2024.2383581}
}
2024.
HyperCausal: Visualizing Causal Inference in 3D Hypertext. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 330––336.
BibTeX
@inproceedings{Boenisch:et:al:2024,
author = {B\"{o}nisch, Kevin and Stoeckel, Manuel and Mehler, Alexander},
title = {HyperCausal: Visualizing Causal Inference in 3D Hypertext},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3677049},
doi = {10.1145/3648188.3677049},
abstract = {We present HyperCausal, a 3D hypertext visualization framework
for exploring causal inference in generative Large Language Models
(LLMs). HyperCausal maps the generative processes of LLMs into
spatial hypertexts, where tokens are represented as nodes connected
by probability-weighted edges. The edges are weighted by the prediction
scores of next tokens, depending on the underlying language model.
HyperCausal facilitates navigation through the causal space of
the underlying LLM, allowing users to explore predicted word sequences
and their branching. Through comparative analysis of LLM parameters
such as token probabilities and search algorithms, HyperCausal
provides insight into model behavior and performance. Implemented
using the Hugging Face transformers library and Three.js, HyperCausal
ensures cross-platform accessibility to advance research in natural
language processing using concepts from hypertext research. We
demonstrate several use cases of HyperCausal and highlight the
potential for detecting hallucinations generated by LLMs using
this framework. The connection with hypertext research arises
from the fact that HyperCausal relies on user interaction to unfold
graphs with hierarchically appearing branching alternatives in
3D space. This approach refers to spatial hypertexts and early
concepts of hierarchical hypertext structures. A third connection
concerns hypertext fiction, since the branching alternatives mediated
by HyperCausal manifest non-linearly organized reading threads
along artificially generated texts that the user decides to follow
optionally depending on the reading context.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {330–-336},
numpages = {7},
keywords = {3D hypertext, large language models, visualization},
location = {Poznan, Poland},
series = {HT '24},
video = {https://www.youtube.com/watch?v=ANHFTupnKhI}
}
2024.
Measuring Group Creativity of Dialogic Interaction Systems by
Means of Remote Entailment Analysis. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 153––166.
BibTeX
@inproceedings{Baumartz:et:al:2024,
author = {Baumartz, Daniel and Konca, Maxim and Mehler, Alexander and Schrottenbacher, Patrick
and Braunheim, Dominik},
title = {Measuring Group Creativity of Dialogic Interaction Systems by
Means of Remote Entailment Analysis},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3675140},
doi = {10.1145/3648188.3675140},
abstract = {We present a procedure for assessing group creativity that allows
us to compare the contributions of human interlocutors and chatbots
based on generative AI such as ChatGPT. We focus on everyday creativity
in terms of dialogic communication and test four hypotheses about
the difference between human and artificial communication. Our
procedure is based on a test that requires interlocutors to cooperatively
interpret a sequence of sentences for which we control for coherence
gaps with reference to the notion of entailment. Using NLP methods,
we automatically evaluate the spoken or written contributions
of interlocutors (human or otherwise). The paper develops a routine
for automatic transcription based on Whisper, for sampling texts
based on their entailment relations, for analyzing dialogic contributions
along their semantic embeddings, and for classifying interlocutors
and interaction systems based on them. In this way, we highlight
differences between human and artificial conversations under conditions
that approximate free dialogic communication. We show that despite
their obvious classificatory differences, it is difficult to see
clear differences even in the domain of dialogic communication
given the current instruments of NLP.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {153–-166},
numpages = {14},
keywords = {Creative AI, Creativity, Generative AI, Hermeneutics, NLP},
location = {Poznan, Poland},
series = {HT '24}
}
2024.
Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change
and User Feedback in Va.Si.Li-Lab. Proceedings of the 35th ACM Conference on Hypertext and Social Media, 357––368.
BibTeX
@inproceedings{Abrami:et:al:2024:b,
author = {Abrami, Giuseppe and Wontke, Dominik Alexander and Singh, Gurpreet
and Mehler, Alexander},
title = {Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change
and User Feedback in Va.Si.Li-Lab},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3675154},
doi = {10.1145/3648188.3675154},
abstract = {Flexibility, adaptability, modularity, and extensibility in the
context of a collaborative system are critical features for multi-user
hypertext systems. In addition to facilitating acceptance and
increasing reusability, these features simplify development cycles
and enable a larger range of application areas. However, especially
in virtual 3D hypertext systems, many of the features are only
partially available or not available at all. To fill this gap,
we present an approach to virtual hypertext systems for the realization
of dynamic event systems. Such an event system can be created
and serialized simultaneously at run time regarding the modification
of situational, environmental parameters. This includes informing
users and allowing them to participate in the environmental dynamics
of the system. We present Va.Si.Li-ES as a module of Va.Si.Li-Lab,
describe several environmental scenarios that can be adapted,
and provide use cases in the context of 3D hypertext systems.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {357–-368},
numpages = {12},
keywords = {Collaborative Simulation, Environmental Event System, Hypertext, Ubiq, Va.Si.Li-Lab, Virtual Reality},
location = {Poznan, Poland},
series = {HT '24}
}
2024.
An Outlook for AI Innovation in Multimodal Communication Research. Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management., 182–234.
BibTeX
@inproceedings{Henlein:et:al:2024-vicom,
title = {An Outlook for AI Innovation in Multimodal Communication Research},
author = {Henlein, Alexander and Bauer, Anastasia and Bhattacharjee, Reetu
and Ćwiek, Aleksandra and Gregori, Alina and Kügler, Frank and Lemanski, Jens
and Lücking, Andy and Mehler, Alexander and Prieto, Pilar and Sánchez-Ramón, Paula G.
and Schepens, Job and Schulte-Rüther, Martin and Schweinberger, Stefan R.
and von Eiff, Celina I.},
editor = {Duffy, Vincent G.},
year = {2024},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management.},
series = {HCII 2024. Lecture Notes in Computer Science},
publisher = {Springer},
address = {Cham},
pages = {182--234},
isbn = {978-3-031-61066-0}
}
August, 2024.
Efficient, uniform and scalable parallel NLP pre-processing with
DUUI: Perspectives and Best Practice for the Digital Humanities. Digital Humanities Conference 2024 - Book of Abstracts (DH 2024).
BibTeX
@inproceedings{Abrami:Mehler:2024,
author = {Abrami, Giuseppe and Mehler, Alexander},
title = {Efficient, uniform and scalable parallel NLP pre-processing with
DUUI: Perspectives and Best Practice for the Digital Humanities},
year = {2024},
month = {08},
editor = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
location = {Washington, DC, USA},
series = {DH},
keywords = {duui},
publisher = {Zenodo},
doi = {10.5281/zenodo.13761079},
url = {https://doi.org/10.5281/zenodo.13761079}
pages = {15--18},
numpages = {4}
}
May, 2024.
Dependencies over Times and Tools (DoTT). Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 4641–4653.
BibTeX
@inproceedings{Luecking:et:al:2024,
abstract = {Purpose: Based on the examples of English and German, we investigate
to what extent parsers trained on modern variants of these languages
can be transferred to older language levels without loss. Methods:
We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
which covers, roughly, the time period from 1800 until today,
in conjunction with the further development of the annotation
tool DependencyAnnotator. DoTT consists of a collection of diachronic
corpora enriched with dependency annotations using 3 parsers,
6 pre-trained language models, 5 newly trained models for German,
and two tag sets (TIGER and Universal Dependencies). To assess
how the different parsers perform on texts from different time
periods, we created a gold standard sample as a benchmark. Results:
We found that the parsers/models perform quite well on modern
texts (document-level LAS ranging from 82.89 to 88.54) and slightly
worse on older texts, as expected (average document-level LAS
84.60 vs. 86.14), but not significantly. For German texts, the
(German) TIGER scheme achieved slightly better results than UD.
Conclusion: Overall, this result speaks for the transferability
of parsers to past language levels, at least dating back until
around 1800. This very transferability, it is however argued,
means that studies of language change in the field of dependency
syntax can draw on dependency distance but miss out on some grammatical
phenomena.},
address = {Torino, Italy},
author = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
month = {may},
pages = {4641--4653},
publisher = {ELRA and ICCL},
title = {Dependencies over Times and Tools ({D}o{TT})},
url = {https://aclanthology.org/2024.lrec-main.415},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
year = {2024}
}
May, 2024.
German SRL: Corpus Construction and Model Training. Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 7717–7727.
BibTeX
@inproceedings{Konca:et:al:2024,
abstract = {A useful semantic role-annotated resource for training semantic
role models for the German language is missing. We point out some
problems of previous resources and provide a new one due to a
combined translation and alignment process: The gold standard
CoNLL-2012 semantic role annotations are translated into German.
Semantic role labels are transferred due to alignment models.
The resulting dataset is used to train a German semantic role
model. With F1-scores around 0.7, the major roles achieve competitive
evaluation scores, but avoid limitations of previous approaches.
The described procedure can be applied to other languages as well.},
address = {Torino, Italy},
author = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
month = {may},
pages = {7717--7727},
publisher = {ELRA and ICCL},
title = {{G}erman {SRL}: Corpus Construction and Model Training},
url = {https://aclanthology.org/2024.lrec-main.682},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_GERMAN_SRL.pdf},
year = {2024}
}
2024.
German Parliamentary Corpus (GerParCor) Reloaded. Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 7707–7716.
BibTeX
@inproceedings{Abrami:et:al:2024:a,
abstract = {In 2022, the largest German-speaking corpus of parliamentary protocols
from three different centuries, on a national and federal level
from the countries of Germany, Austria, Switzerland and Liechtenstein,
was collected and published - GerParCor. Through GerParCor, it
became possible to provide for the first time various parliamentary
protocols which were not available digitally and, moreover, could
not be retrieved and processed in a uniform manner. Furthermore,
GerParCor was additionally preprocessed using NLP methods and
made available in XMI format. In this paper, GerParCor is significantly
updated by including all new parliamentary protocols in the corpus,
as well as adding and preprocessing further parliamentary protocols
previously not covered, so that a period up to 1797 is now covered.
Besides the integration of a new, state-of-the-art and appropriate
NLP preprocessing for the handling of large text corpora, this
update also provides an overview of the further reuse of GerParCor
by presenting various provisioning capabilities such as API's,
among others.},
address = {Torino, Italy},
author = {Abrami, Giuseppe and Bagci, Mevl{\"u}t and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
pages = {7707--7716},
publisher = {ELRA and ICCL},
title = {{G}erman Parliamentary Corpus ({G}er{P}ar{C}or) Reloaded},
url = {https://aclanthology.org/2024.lrec-main.681},
pdf = {https://aclanthology.org/2024.lrec-main.681.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/GerParCor_Reloaded_Poster.pdf},
video = {https://www.youtube.com/watch?v=5X-w_oXOAYo},
keywords = {gerparcor,corpus},
year = {2024}
}
2023
2023.
A Roadmap for Technological Innovation in Multimodal Communication Research. Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management, 402–438.
BibTeX
@inproceedings{Gregori:et:al:2023-vicom,
author = {Gregori, Alina and Amici, Federica and Brilmayer, Ingmar and {\'{C}}wiek, Aleksandra
and Fritzsche, Lennart and Fuchs, Susanne and Henlein, Alexander and Herbort, Oliver
and K{\"u}gler, Frank and Lemanski, Jens and Liebal, Katja and L{\"u}cking, Andy
and Mehler, Alexander and Nguyen, Kim Tien and Pouw, Wim and Prieto, Pilar
and Rohrer, Patrick Louis and S{\'a}nchez-Ram{\'o}n, Paula G. and Schulte-R{\"u}ther, Martin
and Schumacher, Petra B. and Schweinberger, Stefan R. and Struckmeier, Volker
and Trettenbrein, Patrick C. and von Eiff, Celina I.},
editor = {Duffy, Vincent G.},
title = {A Roadmap for Technological Innovation in Multimodal Communication Research},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
year = {2023},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {402--438},
abstract = {Multimodal communication research focuses on how different means
of signalling coordinate to communicate effectively. This line
of research is traditionally influenced by fields such as cognitive
and neuroscience, human-computer interaction, and linguistics.
With new technologies becoming available in fields such as natural
language processing and computer vision, the field can increasingly
avail itself of new ways of analyzing and understanding multimodal
communication. As a result, there is a general hope that multimodal
research may be at the ``precipice of greatness'' due to technological
advances in computer science and resulting extended empirical
coverage. However, for this to come about there must be sufficient
guidance on key (theoretical) needs of innovation in the field
of multimodal communication. Absent such guidance, the research
focus of computer scientists might increasingly diverge from crucial
issues in multimodal communication. With this paper, we want to
further promote interaction between these fields, which may enormously
benefit both communities. The multimodal research community (represented
here by a consortium of researchers from the Visual Communication
[ViCom] Priority Programme) can engage in the innovation by clearly
stating which technological tools are needed to make progress
in the field of multimodal communication. In this article, we
try to facilitate the establishment of a much needed common ground
on feasible expectations (e.g., in terms of terminology and measures
to be able to train machine learning algorithms) and to critically
reflect possibly idle hopes for technical advances, informed by
recent successes and challenges in computer science, social signal
processing, and related domains.},
isbn = {978-3-031-35748-0},
pdf = {https://pure.mpg.de/rest/items/item_3511464_5/component/file_3520176/content}
}
2023.
BA Thesis: Dialog generation using language models.
Goethe University.
BibTeX
@bathesis{boenisch:2023,
author = {Kevin B{\"o}nisch},
title = {Dialog generation using language models},
institution = {Goethe University},
pages = {28},
year = {2023},
url = {https://publikationen.ub.uni-frankfurt.de/opus4/frontdoor/index/index/docId/79165},
repository = {https://github.com/texttechnologylab/ROBERT}
}
2023.
Bundestags-Mine: Natural Language Processing for Extracting
Key Information from Government Documents. Legal Knowledge and Information Systems.
BibTeX
@inproceedings{Boenisch:et:al:2023,
title = {{Bundestags-Mine}: Natural Language Processing for Extracting
Key Information from Government Documents},
isbn = {9781643684734},
issn = {1879-8314},
url = {http://dx.doi.org/10.3233/FAIA230996},
doi = {10.3233/faia230996},
booktitle = {Legal Knowledge and Information Systems},
publisher = {IOS Press},
author = {B\"{o}nisch, Kevin and Abrami, Giuseppe and Wehnert, Sabine and Mehler, Alexander},
year = {2023}
}
2023.
Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI. Findings of the Association for Computational Linguistics: EMNLP 2023, 385–399.
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui}
}
2023.
Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue. SemDial 2023 – MariLogue.
University of Maribor.
BibTeX
@proceedings{SemDial:2023-marilogue,
title = {Proceedings of the 27th Workshop On the Semantics and Pragmatics of Dialogue},
keywords = {own,editor},
year = {2023},
editor = {Lücking, Andy and Mazzocconi, Chiara and Verdonik, Darinka},
editor+an = {1=highlight},
series = {SemDial 2023 -- MariLogue},
publisher = {University of Maribor},
url = {https://www.semdial.org/anthology/events/semdial-2023/}
}
2023.
Referential Transparency and Inquisitivity. Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond
the Sentence Boundary, 11–20.
BibTeX
@inproceedings{Ginzburg:Luecking:2023-wh,
author = {Ginzburg, Jonathan and Lücking, Andy},
author+an = {2=highlight},
keywords = {own,conference},
title = {Referential Transparency and Inquisitivity},
booktitle = {Proceedings of the 4th Workshop on Inquisitiveness Below and Beyond
the Sentence Boundary},
series = {InqBnB4'23},
pages = {11-20},
location = {Nancy, France, hosted with IWCS 2023},
year = {2023},
url = {https://aclanthology.org/2023.inqbnb-1.2/},
pdf = {https://aclanthology.org/2023.inqbnb-1.2.pdf}
}
2023.
Towards Referential Transparent Annotations of Quantified Noun Phrases. Proceedings of the 2023 Joint ACL–ISO Workshop on Interoperable
Semantic Annotation, 47–55.
BibTeX
@inproceedings{Luecking:2023-rtt-annotation,
author = {Lücking, Andy},
author+an = {1=highlight},
keywords = {own,conference},
title = {Towards Referential Transparent Annotations of Quantified Noun Phrases},
booktitle = {Proceedings of the 2023 Joint ACL--ISO Workshop on Interoperable
Semantic Annotation},
series = {ISA-19},
pages = {47-55},
location = {Nancy, France, hosted with IWCS 2023},
year = {2023},
url = {https://aclanthology.org/2023.isa-1.7/},
pdf = {https://aclanthology.org/2023.isa-1.7.pdf}
}
2023.
TTR at the SPA: Relating type-theoretical semantics to neural
semantic pointers. Proceedings of Natural Logic Meets Machine Learning IV.
BibTeX
@inproceedings{Larsson:Cooper:Ginzburg:Luecking:2023-ttr-spa,
author = {Larsson, Staffan and Cooper, Robin and Ginzburg, Jonathan and Lücking, Andy},
author+an = {4=highlight},
keywords = {own,conference},
title = {{TTR} at the {SPA}: {Relating} type-theoretical semantics to neural
semantic pointers},
booktitle = {Proceedings of Natural Logic Meets Machine Learning IV},
series = {NALOMA'23},
location = {Nancy, France, hosted with IWCS 2023},
year = {2023},
url = {https://aclanthology.org/2023.naloma-1.5/},
pdf = {https://aclanthology.org/2023.naloma-1.5.pdf}
}
2023.
Towards grounding multimodal semantics in interaction data with Va.Si.Li-Lab. Proceedings of the 8th Conference on Gesture and Speech in Interaction (GESPIN).
BibTeX
@inproceedings{Henlein:et:al:2023c,
title = {Towards grounding multimodal semantics in interaction data with Va.Si.Li-Lab},
author = {Henlein, Alexander and Lücking, Andy and Bagci, Mevlüt and Mehler, Alexander},
booktitle = {Proceedings of the 8th Conference on Gesture and Speech in Interaction (GESPIN)},
location = {Nijmegen, Netherlands},
year = {2023},
keywords = {vasililab},
pdf = {https://www.gespin2023.nl/documents/talks_and_posters/GeSpIn_2023_papers/GeSpIn_2023_paper_1692.pdf}
}
2023.
Viki LibraRy: A Virtual Reality Library for Collaborative Browsing
and Navigation through Hypertext. Proceedings of the 34th ACM Conference on Hypertext and Social Media.
BibTeX
@inproceedings{Babbili:et:al:2023,
author = {Babbili, Shaduan and B\"{o}nisch, Kevin and Heinrich, Yannick
and Stephan, Philipp and Abrami, Giuseppe and Mehler, Alexander},
title = {Viki LibraRy: A Virtual Reality Library for Collaborative Browsing
and Navigation through Hypertext},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609079},
doi = {10.1145/3603163.3609079},
abstract = {We present Viki LibraRy, a virtual-reality-based system for generating
and exploring online information as a spatial hypertext. It creates
a virtual library based on Wikipedia in which Rooms are used to
make data available via a RESTful backend. In these Rooms, users
can browse through all articles of the corresponding Wikipedia
category in the form of Books. In addition, users can access different
Rooms, through virtual portals. Beyond that, the explorations
can be done alone or collaboratively, using Ubiq.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {6},
numpages = {3},
keywords = {virtual reality simulation, virtual reality, virtual hypertext, virtual museum},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609079}
}
2023.
News in Time and Space: Global Event Exploration in Virtual Reality. Proceedings of the 34th ACM Conference on Hypertext and Social Media.
BibTeX
@inproceedings{Gagel:et:al:2023,
author = {Gagel, Julian and Hustedt, Jasper and L\"{u}ttig, Timo and Berg, Theresa
and Abrami, Giuseppe and Mehler, Alexander},
title = {News in Time and Space: Global Event Exploration in Virtual Reality},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609080},
doi = {10.1145/3603163.3609080},
abstract = {We present News in Time and Space (NiTS), a virtual reality application
for visualization, filtering and interaction with geo-referenced
events based on GDELT. It can be used both via VR glasses and
as a desktop solution for shared use by multiple users with Ubiq.
The aim of NiTS is to provide overviews of global events and trends
in order to create a resource for their monitoring and analysis.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {7},
numpages = {3},
keywords = {virtual hypertext, human data interaction, spatial computing, virtual reality simulation, geographic information systems, virtual reality},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609080}
}
2023.
Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in
Virtual Reality and Its Potential Fields of Application. Proceedings of the 34th ACM Conference on Hypertext and Social Media.
BibTeX
@inproceedings{Abrami:et:al:2023,
author = {Abrami, Giuseppe and Mehler, Alexander and Bagci, Mevl\"{u}t and Schrottenbacher, Patrick
and Henlein, Alexander and Spiekermann, Christian and Engel, Juliane
and Schreiber, Jakob},
title = {Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in
Virtual Reality and Its Potential Fields of Application},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609076},
doi = {10.1145/3603163.3609076},
abstract = {During the last thirty years a variety of hypertext approaches
and virtual environments -- some virtual hypertext environments
-- have been developed and discussed. Although the development
of virtual and augmented reality technologies is rapid and improving,
and many technologies can be used at affordable conditions, their
usability for hypertext systems has not yet been explored. At
the same time, even for virtual three-dimensional virtual and
augmented environments, there is no generally accepted concept
that is similar or nearly as elegant as hypertext. This gap will
have to be filled in the next years and a good concept should
be developed; in this article we aim to contribute in this direction
and also introduce a prototype for a possible implementation of
criteria for virtual hypertext simulations.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {22},
numpages = {9},
keywords = {VaSiLiLab, virtual hypertext, virtual reality, virtual reality simulation, authoring system},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609076}
}
2023.
PhD Thesis: Toward context-based text-to-3D scene generation.
BibTeX
@phdthesis{Henlein:2023,
author = {Alexander Henlein},
title = {Toward context-based text-to-3D scene generation},
type = {doctoralthesis},
pages = {199},
school = {Johann Wolfgang Goethe-Universität},
doi = {10.21248/gups.73448},
year = {2023},
pdf = {https://publikationen.ub.uni-frankfurt.de/files/73448/main.pdf}
}
2023.
Grounding human-object interaction to affordance behavior in multimodal datasets. Frontiers in Artificial Intelligence, 6.
BibTeX
@article{Henlein:et:al:2023a,
author = {Henlein, Alexander and Gopinath, Anju and Krishnaswamy, Nikhil
and Mehler, Alexander and Pustejovsky, James},
doi = {10.3389/frai.2023.1084740},
issn = {2624-8212},
journal = {Frontiers in Artificial Intelligence},
title = {Grounding human-object interaction to affordance behavior in multimodal datasets},
url = {https://www.frontiersin.org/articles/10.3389/frai.2023.1084740},
volume = {6},
year = {2023}
}
2023.
Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework. Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management, 461–479.
BibTeX
@inproceedings{Henlein:et:al:2023b,
author = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
editor = {Duffy, Vincent G.},
title = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
year = {2023},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {461--479},
abstract = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
scene framework using SemAF (Semantic Annotation Framework) as
a scheme for annotating discourse structures. SeSB integrates
a variety of tools and resources by using SemAF and UIMA as a
unified data structure to generate 3D scenes from textual descriptions.
Based on VR, SeSB allows its users to change annotations through
body movements instead of symbolic manipulations: from annotations
in texts to corrections in editing steps to adjustments in generated
scenes, all this is done by grabbing and moving objects. We evaluate
SeSB in comparison with a state-of-the-art open source text-to-scene
method (the only one which is publicly available) and find that
our approach not only performs better, but also allows for modeling
a greater variety of scenes.},
isbn = {978-3-031-35748-0},
doi = {10.1007/978-3-031-35748-0_32}
}
2023.
A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab. Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management, 539–565.
BibTeX
@inproceedings{Mehler:et:al:2023:a,
abstract = {Simulation-based learning is a method in which learners learn
to master real-life scenarios and tasks from simulated application
contexts. It is particularly suitable for the use of VR technologies,
as these allow immersive experiences of the targeted scenarios.
VR methods are also relevant for studies on online learning, especially
in groups, as they provide access to a variety of multimodal learning
and interaction data. However, VR leads to a trade-off between
technological conditions of the observability of such data and
the openness of learner behavior. We present Va.Si.Li-Lab, a VR-L
ab for Simulation-based Learn ing developed to address this trade-off.
Va.Si.Li-Lab uses a graph-theoretical model based on hypergraphs
to represent the data diversity of multimodal learning and interaction.
We develop this data model in relation to mono- and multimodal,
intra- and interpersonal data and interleave it with ISO-Space
to describe distributed multiple documents from the perspective
of their interactive generation. The paper adds three use cases
to motivate the broad applicability of Va.Si.Li-Lab and its data
model.},
address = {Cham},
author = {Mehler, Alexander and Bagci, Mevl{\"u}t and Henlein, Alexander
and Abrami, Giuseppe and Spiekermann, Christian and Schrottenbacher, Patrick
and Konca, Maxim and L{\"u}cking, Andy and Engel, Juliane and Quintino, Marc
and Schreiber, Jakob and Saukel, Kevin and Zlatkin-Troitschanskaia, Olga},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
editor = {Duffy, Vincent G.},
isbn = {978-3-031-35741-1},
pages = {539--565},
publisher = {Springer Nature Switzerland},
title = {A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab},
year = {2023},
doi = {10.1007/978-3-031-35741-1_39}
}
2022
2022.
Introduction to the 2nd Edition of “Semantic, Artificial and
Computational Interaction Studies”. HCI International 2022 - Late Breaking Papers. Multimodality in
Advanced Interaction Environments, 36–47.
BibTeX
@inproceedings{Ebert:et:al:2022,
abstract = {``Behavioromics'' is a term that has been invented to cover the
study of multimodal interaction from various disciplines and points
of view. These disciplines and points of view, however, lack a
platform for exchange. The workshop session on ``Semantic, artificial
and computational interaction studies'' provides such a platform.
We motivate behavioromics, sketch its historical background, and
summarize this year's contributions.},
address = {Cham},
author = {Ebert, Cornelia and L{\"u}cking, Andy and Mehler, Alexander},
booktitle = {HCI International 2022 - Late Breaking Papers. Multimodality in
Advanced Interaction Environments},
editor = {Kurosu, Masaaki and Yamamoto, Sakae and Mori, Hirohiko and Schmorrow, Dylan D.
and Fidopiastis, Cali M. and Streitz, Norbert A. and Konomi, Shin'ichi},
isbn = {978-3-031-17618-0},
pages = {36--47},
publisher = {Springer Nature Switzerland},
title = {Introduction to the 2nd Edition of ``Semantic, Artificial and
Computational Interaction Studies''},
doi = {https://doi.org/10.1007/978-3-031-17618-0_3},
year = {2022}
}
October, 2022.
Tafsir Dataset: A Novel Multi-Task Benchmark for Named Entity
Recognition and Topic Modeling in Classical Arabic Literature. Proceedings of the 29th International Conference on Computational Linguistics, 3753–3768.
BibTeX
@inproceedings{Ahmed:et:al:2022,
title = {Tafsir Dataset: A Novel Multi-Task Benchmark for Named Entity
Recognition and Topic Modeling in Classical {A}rabic Literature},
author = {Ahmed, Sajawel and van der Goot, Rob and Rehman, Misbahur and Kruse, Carl
and {\"O}zsoy, {\"O}mer and Mehler, Alexander and Roig, Gemma},
booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
month = {oct},
year = {2022},
address = {Gyeongju, Republic of Korea},
publisher = {International Committee on Computational Linguistics},
url = {https://aclanthology.org/2022.coling-1.330},
pages = {3753--3768},
abstract = {Various historical languages, which used to be lingua franca of
science and arts, deserve the attention of current NLP research.
In this work, we take the first data-driven steps towards this
research line for Classical Arabic (CA) by addressing named entity
recognition (NER) and topic modeling (TM) on the example of CA
literature. We manually annotate the encyclopedic work of Tafsir
Al-Tabari with span-based NEs, sentence-based topics, and span-based
subtopics, thus creating the Tafsir Dataset with over 51,000 sentences,
the first large-scale multi-task benchmark for CA. Next, we analyze
our newly generated dataset, which we make open-source available,
with current language models (lightweight BiLSTM, transformer-based
MaChAmP) along a novel script compression method, thereby achieving
state-of-the-art performance for our target task CA-NER. We also
show that CA-TM from the perspective of historical topic models,
which are central to Arabic studies, is very challenging. With
this interdisciplinary work, we lay the foundations for future
research on automatic analysis of CA literature.}
}
2022.
The Integrated Model of Memory: A Dialogical Perspective. Proceedings of SemDial 2022, 6–17.
BibTeX
@inproceedings{Ginzburg:Luecking:2022:a,
title = {The Integrated Model of Memory: {A} Dialogical Perspective},
author = {Ginzburg, Jonathan and L{\"u}cking, Andy},
booktitle = {Proceedings of SemDial 2022},
series = {SemDial 2022 -- DubDial},
location = {Dublin, Ireland},
year = {2022},
editor = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
pages = {6-17},
url = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3004/},
pdf = {http://semdial.org/anthology/Z22-Ginzburg_semdial_0004.pdf}
}
2022.
Leading voices: Dialogue semantics, cognitive science, and the
polyphonic structure of multimodal interaction. Language and Cognition.
BibTeX
@article{Luecking:Ginzburg:2022:b,
title = {Leading voices: {Dialogue} semantics, cognitive science, and the
polyphonic structure of multimodal interaction},
author = {L{\"u}cking, Andy and Ginzburg, Jonathan},
journal = {Language and Cognition},
year = {2022},
doi = {10.1017/langcog.2022.30}
}
2022.
How to repair a slip of the tongue?. Proceedings of SemDial 2022, 35–46.
BibTeX
@inproceedings{Luecking:Ginzburg:2022:a,
title = {How to repair a slip of the tongue?},
author = {Lücking, Andy and Ginzburg, Jonathan},
booktitle = {Proceedings of SemDial 2022},
series = {SemDial 2022 -- DubDial},
location = {Dublin, Ireland},
year = {2022},
editor = {Gregoromichelaki, Eleni and Hough, Julian and Kelleher, John D.},
pages = {35-46},
url = {https://www.semdial.org/anthology/papers/Z/Z22/Z22-3007/},
pdf = {http://semdial.org/anthology/Z22-Lücking_semdial_0007.pdf}
}
April, 2022.
Computational educational linguistics for `Critical Online Reasoning'
among young professionals in medicine, law and teaching.
BibTeX
@misc{Konca:et:al:2022,
author = {Konca, Maxim and L{\"u}cking, Andy and Mehler, Alexander and Nagel, Marie-Theres
and Zlatkin-Troitschanskaia, Olga},
howpublished = {Presentation given at the AERA annual meeting, 21.-26.04. 2022, WERA symposium},
month = {04},
title = {Computational educational linguistics for `Critical Online Reasoning'
among young professionals in medicine, law and teaching},
year = {2022},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2022/10/BRIDGE_WERA_AERA-2022_reduce.pdf}
}
2022.
Referential transparency as the proper treatment of quantification. Semantics and Pragmatics, 15.
March, 2022.
On latent domain-specific textual preferences in solving Internet-based
generic tasks among graduates/young professionals from three domains.
BibTeX
@misc{Mehler:et:al:2022,
author = {Mehler, Alexander and Konca, Maxim and Nagel, Marie-Theres and L\"{u}cking, Andy
and Zlatkin-Troitschanskaia, Olga},
year = {2022},
month = {03},
howpublished = {Presentation at BEBF 2022},
title = {On latent domain-specific textual preferences in solving Internet-based
generic tasks among graduates/young professionals from three domains},
abstract = {Although Critical Online Reasoning (COR) is often viewed as a
general competency (e.g. Alexander et al. 2016), studies have
found evidence supporting their domain-specificity (Toplak et
al. 2002). To investigate this assumption, we focus on commonalities
and differences in textual preferences in solving COR-related
tasks between graduates/young professionals from three domains.
For this reason, we collected data by requiring participants to
solve domain-specific (DOM-COR) and generic (GEN-COR) tasks in
an authentic Internet-based COR performance assessment (CORA),
allowing us to disentangle the assumed components of COR abilities.
Here, we focus on GEN-COR to distinguish between different groups
of graduates from the three disciplines in the context of generic
COR tasks. We present a computational model for educationally
relevant texts that combines features at multiple levels (lexical,
syntactic, semantic). We use machine learning to predict domain-specific
group membership based on documents consulted during task solving.
A major contribution of our analyses is a multi-part text classification
system that contrasts human annotation and rating of the documents
used with a semi-automatic classification to predict the document
type of web pages. That is, we work with competing classifications
to support our findings. In this way, we develop a computational
linguistic model that correlates GEN-COR abilities with properties
of documents consulted for solving the GEN-COR tasks. Results
show that participants from different domains indeed inquire different
sets of online sources for the same task. Machine learning-based
classifications show that the distributional differences can be
reproduced by computational linguistic models.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2022/04/On_latent_domain-specific_textual_preferences_in_solving_Internet-based_generic_tasks_among_graduates__young_professionals_from_three_domains.pdf}
}
2022.
What do Toothbrushes do in the Kitchen? How Transformers Think
our World is Structured. Proceedings of the 2022 Conference of the North American Chapter
of the Association for Computational Linguistics: Human Language
Technologies, 5791–5807.
BibTeX
@inproceedings{Henlein:Mehler:2022,
title = {What do Toothbrushes do in the Kitchen? How Transformers Think
our World is Structured},
author = {Henlein, Alexander and Mehler, Alexander},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter
of the Association for Computational Linguistics: Human Language
Technologies},
year = {2022},
address = {Seattle, United States},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2022.naacl-main.425},
doi = {10.18653/v1/2022.naacl-main.425},
pages = {5791--5807},
abstract = {Transformer-based models are now predominant in NLP.They outperform
approaches based on static models in many respects. This success
has in turn prompted research that reveals a number of biases
in the language models generated by transformers. In this paper
we utilize this research on biases to investigate to what extent
transformer-based language models allow for extracting knowledge
about object relations (X occurs in Y; X consists of Z; action
A involves using X).To this end, we compare contextualized models
with their static counterparts. We make this comparison dependent
on the application of a number of similarity measures and classifiers.
Our results are threefold:Firstly, we show that the models combined
with the different similarity measures differ greatly in terms
of the amount of knowledge they allow for extracting. Secondly,
our results suggest that similarity measures perform much worse
than classifier-based approaches. Thirdly, we show that, surprisingly,
static models perform almost as well as contextualized models
{--} in some cases even better.}
}
2022.
German Parliamentary Corpus (GerParCor). Proceedings of the Language Resources and Evaluation Conference, 1900–1906.
BibTeX
@inproceedings{Abrami:Bagci:Hammerla:Mehler:2022,
author = {Abrami, Giuseppe and Bagci, Mevlüt and Hammerla, Leon and Mehler, Alexander},
title = {German Parliamentary Corpus (GerParCor)},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
year = {2022},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {1900--1906},
abstract = {Parliamentary debates represent a large and partly unexploited
treasure trove of publicly accessible texts. In the German-speaking
area, there is a certain deficit of uniformly accessible and annotated
corpora covering all German-speaking parliaments at the national
and federal level. To address this gap, we introduce the German
Parliamentary Corpus (GerParCor). GerParCor is a genre-specific
corpus of (predominantly historical) German-language parliamentary
protocols from three centuries and four countries, including state
and federal level data. In addition, GerParCor contains conversions
of scanned protocols and, in particular, of protocols in Fraktur
converted via an OCR process based on Tesseract. All protocols
were preprocessed by means of the NLP pipeline of spaCy3 and automatically
annotated with metadata regarding their session date. GerParCor
is made available in the XMI format of the UIMA project. In this
way, GerParCor can be used as a large corpus of historical texts
in the field of political communication for various tasks in NLP.},
url = {https://aclanthology.org/2022.lrec-1.202}
poster = {https://www.texttechnologylab.org/wp-content/uploads/2022/06/GerParCor_LREC_2022.pdf},
keywords = {gerparcor},
pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.202.pdf}
}
2022.
I still have Time(s): Extending HeidelTime for German Texts. Proceedings of the 13th Language Resources and Evaluation Conference.
BibTeX
@inproceedings{Luecking:Stoeckel:Abrami:Mehler:2022,
author = {L{\"u}cking, Andy and Stoeckel, Manuel and Abrami, Giuseppe and Mehler, Alexander},
title = {I still have Time(s): Extending {HeidelTime} for {German} Texts},
booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference},
series = {LREC 2022},
location = {Marseille, France},
year = {2022},
url = {https://aclanthology.org/2022.lrec-1.505},
pdf = {https://aclanthology.org/2022.lrec-1.505.pdf}
}
2021
2021.
Requesting clarifications with speech and gestures. Proceedings of the 1st Workshop on Multimodal Semantic Representations, 21–31.
BibTeX
@inproceedings{Ginzburg:Luecking:2021-clarifications,
title = {Requesting clarifications with speech and gestures},
author = {Ginzburg, Jonathan and L{\"u}cking, Andy},
series = {MMSR},
year = {2021},
booktitle = {Proceedings of the 1st Workshop on Multimodal Semantic Representations},
location = {Groningen, Netherlands (Online)},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2021.mmsr-1.3},
pdf = {https://aclanthology.org/2021.mmsr-1.3.pdf},
pages = {21--31},
abstract = {In multimodal natural language interaction both speech and non-speech
gestures are involved in the basic mechanism of grounding and
repair. We discuss a couple of multimodal clarification requests
and argue that gestures, as well as speech expressions, underlie
comparable parallelism constraints. In order to make this precise,
we slightly extend the formal dialogue framework KoS to cover
also gestural counterparts of verbal locutionary propositions.}
}
2021.
From distinguishability to informativity. A quantitative text
model for detecting random texts.. Language and Text: Data, models, information and applications, 356:145–162.
BibTeX
@article{Konca:et:al:2021,
title = {From distinguishability to informativity. A quantitative text
model for detecting random texts.},
author = {Konca, Maxim and Mehler, Alexander and Baumartz, Daniel and Hemati, Wahed},
journal = {Language and Text: Data, models, information and applications},
volume = {356},
pages = {145--162},
year = {2021},
editor = {Adam Paw{\l}owski, Jan Ma{\v{c}}utek, Sheila Embleton and George Mikros},
publisher = {John Benjamins Publishing Company},
doi = {10.1075/cilt.356.10kon}
}
November, 2021.
On the asymptotic behavior of the average geodesic distance L
and the compactness CB of simple connected undirected graphs whose
order approaches infinity. PLOS ONE, 16(11):1–13.
BibTeX
@article{Lokot:Abramov:Mehler:2021,
doi = {10.1371/journal.pone.0259776},
author = {Lokot, Tatiana and Abramov, Olga and Mehler, Alexander},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {On the asymptotic behavior of the average geodesic distance L
and the compactness CB of simple connected undirected graphs whose
order approaches infinity},
year = {2021},
month = {11},
volume = {16},
url = {https://doi.org/10.1371/journal.pone.0259776},
pages = {1-13},
abstract = {The average geodesic distance L Newman (2003) and the compactness
CB Botafogo (1992) are important graph indices in applications
of complex network theory to real-world problems. Here, for simple
connected undirected graphs G of order n, we study the behavior
of L(G) and CB(G), subject to the condition that their order |V(G)|
approaches infinity. We prove that the limit of L(G)/n and CB(G)
lies within the interval [0;1/3] and [2/3;1], respectively. Moreover,
for any not necessarily rational number β ∈ [0;1/3] (α ∈ [2/3;1])
we show how to construct the sequence of graphs {G}, |V(G)| =
n → ∞, for which the limit of L(G)/n (CB(G)) is exactly β (α)
(Theorems 1 and 2). Based on these results, our work points to
novel classification possibilities of graphs at the node level
as well as to the information-theoretic classification of the
structural complexity of graph indices.},
number = {11}
}
2021.
SemioGraphs: Visualizing Topic Networks as Mulit-Codal Graphs. International Quantitative Linguistics Conference (QUALICO 2021).
BibTeX
@inproceedings{Mehler:Uslu:Baumartz:2021,
author = {Mehler, Alexander and Baumartz, Daniel and Uslu, Tolga},
title = {{SemioGraphs:} Visualizing Topic Networks as Mulit-Codal Graphs},
booktitle = {International Quantitative Linguistics Conference (QUALICO 2021)},
series = {QUALICO 2021},
location = {Tokyo, Japan},
year = {2021},
poster = {https://www.texttechnologylab.org/files/Qualico_2021_Semiograph_Poster.pdf}
}
2021.
Grammar in dialogue. Head Driven Phrase Structure Grammar: The handbook, 1155–1199.
BibTeX
@incollection{Luecking:Ginzburg:Cooper:2021,
author = {L\"{u}cking, Andy and Ginzburg, Jonathan and Cooper, Robin},
title = {Grammar in dialogue},
chapter = {26},
pages = {1155-1199},
url = {https://langsci-press.org/catalog/book/259},
editor = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
and Koenig, Jean-Pierre},
booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
year = {2021},
series = {Empirically Oriented Theoretical Morphology and
Syntax},
number = {9},
address = {Berlin},
publisher = {Language Science Press},
doi = {10.5281/zenodo.5543318}
}
2021.
Gesture. Head Driven Phrase Structure Grammar: The handbook, 1201–1250.
BibTeX
@incollection{Luecking:2021,
author = {L\"{u}cking, Andy},
title = {Gesture},
pages = {1201-1250},
chapter = {27},
url = {https://langsci-press.org/catalog/book/259},
editor = {M{\"u}ller, Stefan and Abeill{\'e}, Anne and Borsley, Robert D.
and Koenig, Jean-Pierre},
booktitle = {{Head Driven Phrase Structure Grammar: The handbook}},
year = {2021},
series = {Empirically Oriented Theoretical Morphology and
Syntax},
number = {9},
address = {Berlin},
publisher = {Language Science Press},
doi = {10.5281/zenodo.5543318}
}
2021.
Saying and shaking `No'. Proceedings of the 28th International Conference on Head-Driven
Phrase Structure Grammar, Online (Frankfurt/Main), 283–299.
BibTeX
@inproceedings{Luecking:Ginzburg:2021:a,
author = {L{\"u}cking, Andy and Ginzburg, Jonathan},
title = {Saying and shaking `No'},
booktitle = {{Proceedings of the 28th International Conference on Head-Driven
Phrase Structure Grammar, Online (Frankfurt/Main)}},
editor = {M{\"u}ller, Stefan and Melnik, Nurit},
issn = {1535-1793},
doi = {10.21248/hpsg.2021.15},
publisher = {University Library},
address = {Frankfurt/Main},
pages = {283--299},
year = {2021}
}
2021.
Requesting clarifications with speech and gestures. Beyond Language: Multimodal Semantic Representations.
BibTeX
@inproceedings{Ginzburg:Luecking:2021:a,
title = {Requesting clarifications with speech and gestures},
author = {Ginzburg, Jonathan and L{\"u}cking, Andy},
booktitle = {Beyond Language: Multimodal Semantic Representations},
series = {MMSR I},
year = {2021},
location = {Virtually at the University of Groningen, held in
conjuction with IWCS 2021},
url = {https://iwcs2021.github.io/proceedings/mmsr/pdf/2021.mmsr-1.3.pdf}
}
2021.
I thought pointing is rude: A dialogue-semantic analysis of
pointing at the addressee. Proceedings of Sinn und Bedeutung 25, 276–291.
Special Session: Gestures and Natural Language
Semantics.
BibTeX
@inproceedings{Ginzburg:Luecking:2021:b,
author = {Ginzburg, Jonathan and L{\"u}cking, Andy},
title = {I thought pointing is rude: {A} dialogue-semantic analysis of
pointing at the addressee},
booktitle = {Proceedings of \textit{Sinn und Bedeutung 25}},
series = {SuB 25},
year = {2021},
pages = {276-291},
editor = {Grosz, Patrick and Mart{\'i}, Luisa and Pearson, Hazel and Sudo, Yasutada
and Zobel, Sarah},
note = {Special Session: Gestures and Natural Language
Semantics},
location = {University College London (Online)},
url = {https://ojs.ub.uni-konstanz.de/sub/index.php/sub/article/view/937}
}
2021.
Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine. Remote Learning in Times of Pandemic - Issues, Implications and Best Practice.
BibTeX
@incollection{Henlein:et:al:2021,
author = {Alexander Henlein and Giuseppe Abrami and Attila Kett and Christian Spiekermann
and Alexander Mehler},
title = {Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine},
editor = {Linda Daniela and Anna Visvizin},
booktitle = {Remote Learning in Times of Pandemic - Issues, Implications and Best Practice},
publisher = {Routledge},
address = {Thames, Oxfordshire, England, UK},
year = {2021},
chapter = {3}
}
2021.
Multiple Annotation for Biodiversity: Developing an annotation
framework among biology, linguistics and text technology. Language Resources and Evaluation.
BibTeX
@article{Luecking:et:al:2021,
author = {Andy Lücking and Christine Driller and Manuel Stoeckel and Giuseppe Abrami
and Adrian Pachzelt and Alexander Mehler},
year = {2021},
journal = {Language Resources and Evaluation},
title = {Multiple Annotation for Biodiversity: Developing an annotation
framework among biology, linguistics and text technology},
editor = {Nancy Ide and Nicoletta Calzolari},
doi = {10.1007/s10579-021-09553-5},
pdf = {https://link.springer.com/content/pdf/10.1007/s10579-021-09553-5.pdf},
keywords = {biofid}
}
2021.
Multi-Type-TD-TSR - Extracting Tables from Document Images using
a Multi-stage Pipeline for Table Detection and Table Structure
Recognition: from OCR to Structured Table Representations. Proceedings of the 44th German Conference on Artificial Intelligence.
BibTeX
@inproceedings{Fischer:et:al:2021,
author = {Fischer, Pascal and Smajic, Alen and Abrami, Giuseppe and Mehler, Alexander},
title = {Multi-Type-TD-TSR - Extracting Tables from Document Images using
a Multi-stage Pipeline for Table Detection and Table Structure
Recognition: from OCR to Structured Table Representations},
booktitle = {Proceedings of the 44th German Conference on Artificial Intelligence},
series = {KI2021},
location = {Berlin, Germany},
year = {2021},
url = {https://www.springerprofessional.de/multi-type-td-tsr-extracting-tables-from-document-images-using-a/19711570},
pdf = {https://arxiv.org/pdf/2105.11021.pdf}
}
June, 2021.
VoxML Annotation Tool Review and Suggestions for Improvement. Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation (ISA-17, Note for special track on visual
information annotation).
BibTeX
@inproceedings{Klement:et:al:2021,
author = {Klement, Mark and Henlein, Alexander and Mehler, Alexander},
title = {VoxML Annotation Tool Review and Suggestions for Improvement},
booktitle = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation (ISA-17, Note for special track on visual
information annotation)},
series = {ISA-17},
location = {Groningen, Netherlands},
month = {June},
year = {2021},
pdf = {https://sigsem.uvt.nl/isa17/32_Klement-Paper.pdf}
}
June, 2021.
Unleashing annotations with TextAnnotator: Multimedia, multi-perspective
document views for ubiquitous annotation. Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, 65–75.
BibTeX
@inproceedings{Abrami:et:al:2021,
author = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila
and Adeberg, Pascal and Mehler, Alexander},
title = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective
document views for ubiquitous annotation},
booktitle = {Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation},
series = {ISA-17},
publisher = {Association for Computational Linguistics},
address = {Groningen, The Netherlands (online)},
month = {June},
editor = {Bunt, Harry},
year = {2021},
url = {https://aclanthology.org/2021.isa-1.7},
pages = {65--75},
keywords = {textannotator},
pdf = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf},
abstract = {We argue that mainly due to technical innovation in the landscape
of annotation tools, a conceptual change in annotation models
and processes is also on the horizon. It is diagnosed that these
changes are bound up with multi-media and multi-perspective facilities
of annotation tools, in particular when considering virtual reality
(VR) and augmented reality (AR) applications, their potential
ubiquitous use, and the exploitation of externally trained natural
language pre-processing methods. Such developments potentially
lead to a dynamic and exploratory heuristic construction of the
annotation process. With TextAnnotator an annotation suite is
introduced which focuses on multi-mediality and multi-perspectivity
with an interoperable set of task-specific annotation modules
(e.g., for word classification, rhetorical structures, dependency
trees, semantic roles, and more) and their linkage to VR and mobile
implementations. The basic architecture and usage of TextAnnotator
is described and related to the above mentioned shifts in the
field.}
}
2021.
Computational linguistic assessment of textbooks and online texts
by means of threshold concepts in economics. Frontiers in Education.
BibTeX
@article{Luecking:Brueckner:Abrami:Uslu:Mehler:2021,
journal = {Frontiers in Education},
doi = {10.3389/feduc.2020.578475},
title = {Computational linguistic assessment of textbooks and online texts
by means of threshold concepts in economics},
author = {L{\"u}cking, Andy and Br{\"u}ckner, Sebastian and Abrami, Giuseppe
and Uslu, Tolga and Mehler, Alexander},
eid = {578475},
url = {https://www.frontiersin.org/articles/10.3389/feduc.2020.578475/},
year = {2021}
}
2020
2020.
PhD Thesis: Multi-document analysis : semantic analysis of large text corpora
beyond topic modeling.
BibTeX
@phdthesis{Uslu:2020,
author = {Tolga Uslu},
title = {Multi-document analysis : semantic analysis of large text corpora
beyond topic modeling},
pages = {204},
year = {2020},
url = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56140},
pdf = {http://publikationen.ub.uni-frankfurt.de/files/56140/Dissertation_Tolga_Uslu.pdf}
}
2020.
PhD Thesis: TextImager-VSD : large scale verb sense disambiguation and named
entity recognition in the context of TextImager.
BibTeX
@phdthesis{Hemati:2020,
author = {Wahed Hemati},
title = {TextImager-VSD : large scale verb sense disambiguation and named
entity recognition in the context of TextImager},
pages = {174},
year = {2020},
url = {http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56089},
pdf = {http://publikationen.ub.uni-frankfurt.de/files/56089/dissertation_Wahed_Hemati.pdf}
}
2020.
Multiple Texts as a Limiting Factor in Online Learning: Quantifying
(Dis-)similarities of Knowledge Networks. Frontiers in Education, 5:206.
BibTeX
@article{Mehler:Hemati:Welke:Konca:Uslu:2020,
abstract = {We test the hypothesis that the extent to which one obtains information
on a given topic through Wikipedia depends on the language in
which it is consulted. Controlling the size factor, we investigate
this hypothesis for a number of 25 subject areas. Since Wikipedia
is a central part of the web-based information landscape, this
indicates a language-related, linguistic bias. The article therefore
deals with the question of whether Wikipedia exhibits this kind
of linguistic relativity or not. From the perspective of educational
science, the article develops a computational model of the information
landscape from which multiple texts are drawn as typical input
of web-based reading. For this purpose, it develops a hybrid model
of intra- and intertextual similarity of different parts of the
information landscape and tests this model on the example of 35
languages and corresponding Wikipedias. In the way it measures
the similarities of hypertexts, the article goes beyond existing
approaches by examining their structural and semantic aspects
intra- and intertextually. In this way it builds a bridge between
reading research, educational science, Wikipedia research and
computational linguistics.},
author = {Mehler, Alexander and Hemati, Wahed and Welke, Pascal and Konca, Maxim
and Uslu, Tolga},
doi = {10.3389/feduc.2020.562670},
issn = {2504-284X},
journal = {Frontiers in Education},
pages = {206},
title = {Multiple Texts as a Limiting Factor in Online Learning: Quantifying
(Dis-)similarities of Knowledge Networks},
url = {https://www.frontiersin.org/article/10.3389/feduc.2020.562670},
pdf = {https://www.frontiersin.org/articles/10.3389/feduc.2020.562670/pdf},
volume = {5},
year = {2020}
}
2020.
Computational linguistic assessment of textbook and online learning
media by means of threshold concepts in business education. CoRR, abs/2008.02096.
BibTeX
@article{Luecking:et:al:2020,
author = {Andy L{\"{u}}cking and Sebastian Br{\"{u}}ckner and Giuseppe Abrami
and Tolga Uslu and Alexander Mehler},
title = {Computational linguistic assessment of textbook and online learning
media by means of threshold concepts in business education},
journal = {CoRR},
volume = {abs/2008.02096},
year = {2020},
url = {https://arxiv.org/abs/2008.02096},
archiveprefix = {arXiv},
eprint = {2008.02096},
timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-02096.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
June, 2020.
BA Thesis: Automatic Topic Modeling in the Context of Digital Libraries:
Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
experimentelle Studie.
BibTeX
@bathesis{Baumartz:2020,
author = {Baumartz, Daniel},
title = {{Automatic Topic Modeling in the Context of Digital Libraries:
Mehrsprachige Korpus-basierte Erweiterung von text2ddc - eine
experimentelle Studie}},
year = {2020},
month = {6},
school = {Johann Wolfgang Goethe-Universität, Institute of Computer
Science and Mathematics, Text Technology Lab},
address = {Frankfurt, Germany},
url = {https://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/56381},
pdf = {https://publikationen.ub.uni-frankfurt.de/files/56381/baumartz_bachelorarbeit_2020_pub.pdf}
}
2020.
Fast and Easy Access to Central European Biodiversity Data with BIOfid. Biodiversity Information Science and Standards, 4:e59157.
BibTeX
@article{Driller:et:al:2020,
author = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati
and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek},
title = {Fast and Easy Access to Central European Biodiversity Data with BIOfid},
volume = {4},
number = {},
year = {2020},
doi = {10.3897/biss.4.59157},
publisher = {Pensoft Publishers},
abstract = {The storage of data in public repositories such as the Global
Biodiversity Information Facility (GBIF) or the National Center
for Biotechnology Information (NCBI) is nowadays stipulated in
the policies of many publishers in order to facilitate data replication
or proliferation. Species occurrence records contained in legacy
printed literature are no exception to this. The extent of their
digital and machine-readable availability, however, is still far
from matching the existing data volume (Thessen and Parr 2014).
But precisely these data are becoming more and more relevant to
the investigation of ongoing loss of biodiversity. In order to
extract species occurrence records at a larger scale from available
publications, one has to apply specialised text mining tools.
However, such tools are in short supply especially for scientific
literature in the German language.The Specialised Information
Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims
at reducing this desideratum, inter alia, by preparing a searchable
text corpus semantically enriched by a new kind of multi-label
annotation. For this purpose, we feed manual annotations into
automatic, machine-learning annotators. This mixture of automatic
and manual methods is needed, because BIOfid approaches a new
application area with respect to language (mainly German of the
19th century), text type (biological reports), and linguistic
focus (technical and everyday language).We will present current
results of the performance of BIOfid’s semantic search engine
and the application of independent natural language processing
(NLP) tools. Most of these are freely available online, such as
TextImager (Hemati et al. 2016). We will show how TextImager is
tied into the BIOfid pipeline and how it is made scalable (e.g.
extendible by further modules) and usable on different systems
(docker containers).Further, we will provide a short introduction
to generating machine-learning training data using TextAnnotator
(Abrami et al. 2019) for multi-label annotation. Annotation reproducibility
can be assessed by the implementation of inter-annotator agreement
methods (Abrami et al. 2020). Beyond taxon recognition and entity
linking, we place particular emphasis on location and time information.
For this purpose, our annotation tag-set combines general categories
and biology-specific categories (including taxonomic names) with
location and time ontologies. The application of the annotation
categories is regimented by annotation guidelines (Lücking et
al. 2020). Within the next years, our work deliverable will be
a semantically accessible and data-extractable text corpus of
around two million pages. In this way, BIOfid is creating a new
valuable resource that expands our knowledge of biodiversity and
its determinants.},
issn = {},
pages = {e59157},
url = {https://doi.org/10.3897/biss.4.59157},
eprint = {https://doi.org/10.3897/biss.4.59157},
journal = {Biodiversity Information Science and Standards},
keywords = {biofid}
}
2020.
On Laughter and Forgetting and Reconversing: A neurologically-inspired
model of conversational context. Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue.
BibTeX
@inproceedings{Ginzburg:Luecking:2020:a,
author = {Ginzburg, Jonathan and L{\"u}cking, Andy},
title = {On Laughter and Forgetting and Reconversing: {A} neurologically-inspired
model of conversational context},
booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
series = {SemDial/WatchDial},
year = {2020},
location = {Brandeis University, Waltham, New Jersey (Online)},
url = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3008/},
pdf = {http://semdial.org/anthology/Z20-Ginzburg_semdial_0008.pdf}
}
2020.
Towards the score of communication. Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue.
BibTeX
@inproceedings{Luecking:Ginzburg:2020,
author = {L{\"u}cking, Andy and Ginzburg, Jonathan},
title = {Towards the score of communication},
booktitle = {Proceedings of the 24th Workshop on the Semantics and Pragmatics of Dialogue},
series = {SemDial/WatchDial},
year = {2020},
location = {Brandeis University, Waltham, New Jersey (Online)},
url = {https://www.semdial.org/anthology/papers/Z/Z20/Z20-3016/},
pdf = {http://semdial.org/anthology/Z20-Luecking_semdial_0016.pdf}
}
2020.
TextAnnotator: A web-based annotation suite for texts. Proceedings of the Digital Humanities 2020.
BibTeX
@inproceedings{Abrami:Mehler:Stoeckel:2020,
author = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
title = {{TextAnnotator}: A web-based annotation suite for texts},
booktitle = {Proceedings of the Digital Humanities 2020},
series = {DH 2020},
location = {Ottawa, Canada},
year = {2020},
url = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html},
doi = {http://dx.doi.org/10.17613/tenm-4907},
abstract = {The TextAnnotator is a tool for simultaneous and collaborative
annotation of texts with visual annotation support, integration
of knowledge bases and, by pipelining the TextImager, a rich variety
of pre-processing and automatic annotation tools. It includes
a variety of modules for the annotation of texts, which contains
the annotation of argumentative, rhetorical, propositional and
temporal structures as well as a module for named entity linking
and rapid annotation of named entities. Especially the modules
for annotation of temporal, argumentative and propositional structures
are currently unique in web-based annotation tools. The TextAnnotator,
which allows the annotation of texts as a platform, is divided
into a front- and a backend component. The backend is a web service
based on WebSockets, which integrates the UIMA Database Interface
to manage and use texts. Texts are made accessible by using the
ResourceManager and the AuthorityManager, based on user and group
access permissions. Different views of a document can be created
and used depending on the scenario. Once a document has been opened,
access is gained to the annotations stored within annotation views
in which these are organized. Any annotation view can be assigned
with access permissions and by default, each user obtains his
or her own user view for every annotated document. In addition,
with sufficient access permissions, all annotation views can also
be used and curated. This allows the possibility to calculate
an Inter-Annotator-Agreement for a document, which shows an agreement
between the annotators. Annotators without sufficient rights cannot
display this value so that the annotators do not influence each
other. This contribution is intended to reflect the current state
of development of TextAnnotator, demonstrate the possibilities
of an instantaneous Inter-Annotator-Agreement and trigger a discussion
about further functions for the community.},
keywords = {textannotator},
poster = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf}
}
2020.
TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
Annotation of Texts. Proceedings of The 12th Language Resources and Evaluation Conference, 891–900.
BibTeX
@inproceedings{Abrami:Stoeckel:Mehler:2020,
author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
Annotation of Texts},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {891--900},
isbn = {979-10-95546-34-4},
abstract = {The annotation of texts and other material in the field of digital
humanities and Natural Language Processing (NLP) is a common task
of research projects. At the same time, the annotation of corpora
is certainly the most time- and cost-intensive component in research
projects and often requires a high level of expertise according
to the research interest. However, for the annotation of texts,
a wide range of tools is available, both for automatic and manual
annotation. Since the automatic pre-processing methods are not
error-free and there is an increasing demand for the generation
of training data, also with regard to machine learning, suitable
annotation tools are required. This paper defines criteria of
flexibility and efficiency of complex annotations for the assessment
of existing annotation tools. To extend this list of tools, the
paper describes TextAnnotator, a browser-based, multi-annotation
system, which has been developed to perform platform-independent
multimodal annotations and annotate complex textual structures.
The paper illustrates the current state of development of TextAnnotator
and demonstrates its ability to evaluate annotation quality (inter-annotator
agreement) at runtime. In addition, it will be shown how annotations
of different users can be performed simultaneously and collaboratively
on the same document from different platforms using UIMA as the
basis for annotation.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.112},
keywords = {textannotator},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}
2020.
Text2SceneVR: Generating Hypertexts with VAnnotatoR as a Pre-processing
Step for Text2Scene Systems. Proceedings of the 31st ACM Conference on Hypertext and Social Media, 177–186.
BibTeX
@inproceedings{Abrami:Henlein:Kett:Mehler:2020,
author = {Abrami, Giuseppe and Henlein, Alexander and Kett, Attila and Mehler, Alexander},
title = {{Text2SceneVR}: Generating Hypertexts with VAnnotatoR as a Pre-processing
Step for Text2Scene Systems},
booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media},
series = {HT ’20},
year = {2020},
location = {Virtual Event, USA},
isbn = {9781450370981},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3372923.3404791},
doi = {10.1145/3372923.3404791},
pages = {177–186},
numpages = {10},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3372923.3404791}
}
May, 2020.
Voting for POS tagging of Latin texts: Using the flair of FLAIR
to better Ensemble Classifiers by Example of Latin. Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies
for Historical and Ancient Languages, 130–135.
BibTeX
@inproceedings{Stoeckel:et:al:2020,
author = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander},
title = {{Voting for POS tagging of Latin texts: Using the flair of FLAIR
to better Ensemble Classifiers by Example of Latin}},
booktitle = {Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies
for Historical and Ancient Languages},
month = {May},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association (ELRA)},
pages = {130--135},
abstract = {Despite the great importance of the Latin language in the past,
there are relatively few resources available today to develop
modern NLP tools for this language. Therefore, the EvaLatin Shared
Task for Lemmatization and Part-of-Speech (POS) tagging was published
in the LT4HALA workshop. In our work, we dealt with the second
EvaLatin task, that is, POS tagging. Since most of the available
Latin word embeddings were trained on either few or inaccurate
data, we trained several embeddings on better data in the first
step. Based on these embeddings, we trained several state-of-the-art
taggers and used them as input for an ensemble classifier called
LSTMVoter. We were able to achieve the best results for both the
cross-genre and the cross-time task (90.64\% and 87.00\%) without
using additional annotated data (closed modality). In the meantime,
we further improved the system and achieved even better results
(96.91\% on classical, 90.87\% on cross-genre and 87.35\% on cross-time).},
url = {https://www.aclweb.org/anthology/2020.lt4hala-1.21},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/LT4HALA/pdf/2020.lt4hala-1.21.pdf}
}
2020.
The Frankfurt Latin Lexicon. From Morphological Expansion and
Word Embeddings to SemioGraphs. Studi e Saggi Linguistici, 58(1):121–155.
BibTeX
@article{Mehler:et:al:2020b,
author = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
title = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
Word Embeddings to SemioGraphs}},
journal = {Studi e Saggi Linguistici},
doi = {10.4454/ssl.v58i1.276},
year = {2020},
volume = {58},
number = {1},
pages = {121--155},
abstract = {In this article we present the Frankfurt Latin Lexicon (FLL),
a lexical resource for Medieval Latin that is used both for the
lemmatization of Latin texts and for the post-editing of lemmatizations.
We describe recent advances in the development of lemmatizers
and test them against the Capitularies corpus (comprising Frankish
royal edicts, mid-6th to mid-9th century), a corpus created as
a reference for processing Medieval Latin. We also consider the
post-correction of lemmatizations using a limited crowdsourcing
process aimed at continuous review and updating of the FLL. Starting
from the texts resulting from this lemmatization process, we describe
the extension of the FLL by means of word embeddings, whose interactive
traversing by means of SemioGraphs completes the digital enhanced
hermeneutic circle. In this way, the article argues for a more
comprehensive understanding of lemmatization, encompassing classical
machine learning as well as intellectual post-corrections and,
in particular, human computation in the form of interpretation
processes based on graph representations of the underlying lexical
resources.},
url = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
pdf = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}
May, 2020.
Transfer of ISOSpace into a 3D Environment for Annotations and Applications. Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, 32–35.
BibTeX
@inproceedings{Henlein:et:al:2020,
author = {Henlein, Alexander and Abrami, Giuseppe and Kett, Attila and Mehler, Alexander},
title = {Transfer of ISOSpace into a 3D Environment for Annotations and Applications},
booktitle = {Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation},
month = {May},
year = {2020},
address = {Marseille},
publisher = {European Language Resources Association},
pages = {32--35},
abstract = {People's visual perception is very pronounced and therefore it
is usually no problem for them to describe the space around them
in words. Conversely, people also have no problems imagining a
concept of a described space. In recent years many efforts have
been made to develop a linguistic concept for spatial and spatial-temporal
relations. However, the systems have not really caught on so far,
which in our opinion is due to the complex models on which they
are based and the lack of available training data and automated
taggers. In this paper we describe a project to support spatial
annotation, which could facilitate annotation by its many functions,
but also enrich it with many more information. This is to be achieved
by an extension by means of a VR environment, with which spatial
relations can be better visualized and connected with real objects.
And we want to use the available data to develop a new state-of-the-art
tagger and thus lay the foundation for future systems such as
improved text understanding for Text2Scene.},
url = {https://www.aclweb.org/anthology/2020.isa-1.4},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/ISA16/pdf/2020.isa-1.4.pdf}
}
May, 2020.
Recognizing Sentence-level Logical Document Structures with the
Help of Context-free Grammars. Proceedings of The 12th Language Resources and Evaluation Conference, 5282–5290.
BibTeX
@inproceedings{Hildebrand:Hemati:Mehler:2020,
author = {Hildebrand, Jonathan and Hemati, Wahed and Mehler, Alexander},
title = {Recognizing Sentence-level Logical Document Structures with the
Help of Context-free Grammars},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
month = {May},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {5282--5290},
abstract = {Current sentence boundary detectors split documents into sequentially
ordered sentences by detecting their beginnings and ends. Sentences,
however, are more deeply structured even on this side of constituent
and dependency structure: they can consist of a main sentence
and several subordinate clauses as well as further segments (e.g.
inserts in parentheses); they can even recursively embed whole
sentences and then contain multiple sentence beginnings and ends.
In this paper, we introduce a tool that segments sentences into
tree structures to detect this type of recursive structure. To
this end, we retrain different constituency parsers with the help
of modified training data to transform them into sentence segmenters.
With these segmenters, documents are mapped to sequences of sentence-related
“logical document structures”. The resulting segmenters aim to
improve downstream tasks by providing additional structural information.
In this context, we experiment with German dependency parsing.
We show that for certain sentence categories, which can be determined
automatically, improvements in German dependency parsing can be
achieved using our segmenter for preprocessing. The assumption
suggests that improvements in other languages and tasks can be
achieved.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.650},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.650.pdf}
}
May, 2020.
On the Influence of Coreference Resolution on Word Embeddings
in Lexical-semantic Evaluation Tasks. Proceedings of The 12th Language Resources and Evaluation Conference, 27–33.
BibTeX
@inproceedings{Henlein:Mehler:2020,
author = {Henlein, Alexander and Mehler, Alexander},
title = {{On the Influence of Coreference Resolution on Word Embeddings
in Lexical-semantic Evaluation Tasks}},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
month = {May},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {27--33},
abstract = {Coreference resolution (CR) aims to find all spans of a text that
refer to the same entity. The F1-Scores on these task have been
greatly improved by new developed End2End-approaches and transformer
networks. The inclusion of CR as a pre-processing step is expected
to lead to improvements in downstream tasks. The paper examines
this effect with respect to word embeddings. That is, we analyze
the effects of CR on six different embedding methods and evaluate
them in the context of seven lexical-semantic evaluation tasks
and instantiation/hypernymy detection. Especially in the last
tasks we hoped for a significant increase in performance. We show
that all word embedding approaches do not benefit significantly
from pronoun substitution. The measurable improvements are only
marginal (around 0.5\% in most test cases). We explain this result
with the loss of contextual information, reduction of the relative
occurrence of rare words and the lack of pronouns to be replaced.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.4},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.4.pdf}
}
2020.
From Topic Networks to Distributed Cognitive Maps: Zipfian Topic
Universes in the Area of Volunteered Geographic Information. Complexity, 4:1–47.
BibTeX
@article{Mehler:Gleim:Gaitsch:Uslu:Hemati:2020,
author = {Alexander Mehler and R{\"{u}}diger Gleim and Regina Gaitsch and Tolga Uslu
and Wahed Hemati},
title = {From Topic Networks to Distributed Cognitive Maps: {Zipfian} Topic
Universes in the Area of Volunteered Geographic Information},
journal = {Complexity},
volume = {4},
doi = {10.1155/2020/4607025},
pages = {1-47},
issuetitle = {Cognitive Network Science: A New Frontier},
year = {2020}
}
2020.
WikNectVR: A Gesture-Based Approach for Interacting in Virtual
Reality Based on WikNect and Gestural Writing. Virtual, Augmented and Mixed Reality. Design and Interaction -
12th International Conference, VAMR 2020, Held as Part of the
22nd HCI International Conference, HCII 2020, Copenhagen,
Denmark, July 19-24, 2020, Proceedings, Part I, 12190:299–312.
BibTeX
@inproceedings{Kuehn:Abrami:Mehler:2020,
author = {Vincent K{\"{u}}hn and Giuseppe Abrami and Alexander Mehler},
editor = {Jessie Y. C. Chen and Gino Fragomeni},
title = {WikNectVR: {A} Gesture-Based Approach for Interacting in Virtual
Reality Based on WikNect and Gestural Writing},
booktitle = {Virtual, Augmented and Mixed Reality. Design and Interaction -
12th International Conference, {VAMR} 2020, Held as Part of the
22nd {HCI} International Conference, {HCII} 2020, Copenhagen,
Denmark, July 19-24, 2020, Proceedings, Part {I}},
series = {Lecture Notes in Computer Science},
volume = {12190},
pages = {299--312},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-49695-1_20},
doi = {10.1007/978-3-030-49695-1_20},
timestamp = {Tue, 14 Jul 2020 10:55:57 +0200},
biburl = {https://dblp.org/rec/conf/hci/KuhnAM20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
2020.
Educational Technologies in the area of ubiquitous historical
computing in virtual reality. In: New Perspectives on Virtual and Augmented Reality: Finding New
Ways to Teach in a Transformed Learning Environment.
Taylor & Francis.
BibTeX
@inbook{Abrami:et:al:2020,
author = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian
and Kett, Attila and L{\"o}{\"o}ck, Simon and Schwarz, Lukas},
editor = {Daniela, Linda},
title = {Educational Technologies in the area of ubiquitous historical
computing in virtual reality},
booktitle = {New Perspectives on Virtual and Augmented Reality: Finding New
Ways to Teach in a Transformed Learning Environment},
year = {2020},
publisher = {Taylor \& Francis},
abstract = {At ever shorter intervals, new technologies are being developed
that are opening up more and more areas of application. This regards,
for example, Virtual Reality (VR) and Augmented Reality (AR) devices.
In addition to the private sector, the public and education sectors,
which already make intensive use of these devices, benefit from
these technologies. However, especially in the field of historical
education, there are not many frameworks for generating immersive
virtual environments that can be used flexibly enough. This chapter
addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile
framework for the creation and use of virtual environments that
serve to model historical processes in historical education. The
paper describes the building blocks of VAnnotatoR and describes
applications in historical education.},
isbn = {978-0-367-43211-9},
url = {https://www.routledge.com/New-Perspectives-on-Virtual-and-Augmented-Reality-Finding-New-Ways-to-Teach/Daniela/p/book/9780367432119}
}
2020.
Ursachen der Entstehung von ubiquitären Zentrum-Peripheriestrukturen
und ihre Folgen. Soziale Welt – Zeitschrift für sozialwissenschaftliche Forschung und Praxis (SozW), Sonderband 23:265–284.
BibTeX
@article{Stegbauer:Mehler:2020,
author = {Christian Stegbauer and Alexander Mehler},
title = {Ursachen der Entstehung von ubiquit{\"{a}}ren Zentrum-Peripheriestrukturen
und ihre Folgen},
journal = {Soziale Welt -- Zeitschrift f\"{u}r sozialwissenschaftliche Forschung und Praxis (SozW)},
volume = {Sonderband 23},
year = {2020},
pages = {265--284}
}
2019
2019.
Positive Learning in the Internet Age: Developments and Perspectives
in the PLATO Program. In: Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 1–5.
Springer International Publishing.
BibTeX
@inbook{Zlatkin-Troitschanskaia:et:al:2019,
author = {Zlatkin-Troitschanskaia, Olga and Bisang, Walter and Mehler, Alexander
and Banerjee, Mita and Roeper, Jochen},
editor = {Zlatkin-Troitschanskaia, Olga},
title = {Positive Learning in the Internet Age: Developments and Perspectives
in the PLATO Program},
booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
year = {2019},
publisher = {Springer International Publishing},
address = {Cham},
pages = {1--5},
abstract = {The Internet has become the main informational entity, i.e., a
public source of information. The Internet offers many new benefits
and opportunities for human learning, teaching, and research.
However, by providing a vast amount of information from innumerable
sources, it also enables the manipulation of information; there
are countless examples of disseminated misinformation and false
data in mass and social media. Much of the information presented
online is conflicting, preselected, or algorithmically obscure,
often colliding with fundamental humanistic values and posing
moral or ethical problems.},
isbn = {978-3-030-26578-6},
doi = {10.1007/978-3-030-26578-6_1},
url = {https://doi.org/10.1007/978-3-030-26578-6_1}
}
2019.
TextInContext: On the Way to a Framework for Measuring the Context-Sensitive
Complexity of Educationally Relevant Texts—A Combined Cognitive
and Computational Linguistic Approach. In: Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 167–195.
Springer International Publishing.
BibTeX
@inbook{Mehler:Ramesh:2019,
author = {Mehler, Alexander and Ramesh, Visvanathan},
editor = {Zlatkin-Troitschanskaia, Olga},
title = {{TextInContext}: On the Way to a Framework for Measuring the Context-Sensitive
Complexity of Educationally Relevant Texts---A Combined Cognitive
and Computational Linguistic Approach},
booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
year = {2019},
publisher = {Springer International Publishing},
address = {Cham},
pages = {167--195},
abstract = {We develop a framework for modeling the context sensitivity of
text interpretation. As a point of reference, we focus on the
complexity of educational texts. To open up a broader basis for
representing phenomena of context sensitivity, we integrate a
learning theory (i.e., the Cognitive Load Theory) with a theory
of discourse comprehension (i.e., the Construction Integration
Model) and a theory of cognitive semantics (i.e., the theory of
Conceptual Spaces). The aim is to construct measures that view
text complexity as a relational attribute by analogy to the relational
concept of meaning in situation semantics. To this end, we reconstruct
the situation semantic notion of relational meaning from the perspective
of a computationally informed cognitive semantics. The aim is
to prepare the development of measurements for predicting learning
outcomes in the form of positive or negative learning. This prediction
ideally depends on the underlying learning material, the learner's
situational context, and knowledge retrieved from his or her long-term
memory, which he or she uses to arrive at coherent mental representations
of the underlying texts. Finally, our model refers to machine
learning as a tool for modeling such memory content. In this way,
the chapter integrates approaches from different disciplines (linguistic
semantics, computational linguistics, cognitive science, and data
science).},
isbn = {978-3-030-26578-6},
doi = {10.1007/978-3-030-26578-6_14},
url = {https://doi.org/10.1007/978-3-030-26578-6_14}
}
2019.
Dialogue semantics: From cognitive structures to positive and
negative learning. Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO), 197–205.
BibTeX
@incollection{Luecking:2019:a,
author = {L\"{u}cking, Andy},
title = {Dialogue semantics: {From} cognitive structures to positive and
negative learning},
year = {2019},
pages = {197-205},
publisher = {Springer Nature Switzerland AG},
address = {Cham, Switzerland},
editor = {Zlatkin-Troitschankskaia, Olga},
booktitle = {Frontiers and Advances in Positive Learning in the Age of InformaTiOn (PLATO)},
doi = {10.1007/978-3-030-26578-6},
url = {https://link.springer.com/chapter/10.1007/978-3-030-26578-6_15}
}
2019.
Not few but all quantifiers can be negated: towards a referentially
transparent semantics of quantified noun phrases. Proceedings of the Amsterdam Colloquium 2019, 269–278.
BibTeX
@inproceedings{Luecking:Ginzburg:2019,
author = {L{\"u}cking, Andy and Ginzburg, Jonathan},
title = {Not few but all quantifiers can be negated: towards a referentially
transparent semantics of quantified noun phrases},
booktitle = {Proceedings of the Amsterdam Colloquium 2019},
series = {AC'19},
location = {University of Amsterdam},
year = {2019},
pages = {269-278},
url = {http://events.illc.uva.nl/AC/AC2019/},
pdf = {http://events.illc.uva.nl/AC/AC2019/uploaded_files/inlineitem/L_cking_and_Ginzburg_Not_few_but_all_quantifiers_ca.pdf}
}
2019.
Gesture. Head-Driven Phrase Structure Grammar: The handbook.
BibTeX
@incollection{Luecking:2019:b,
keywords = {own,bookchapter},
author+an = {1=highlight},
author = {L\"{u}cking, Andy},
year = {2019},
title = {Gesture},
editor = {M\"{u}ller, Stefan and Abeill\'{e}, Anne and Borsley, Robert D.
and Koenig, Jean-Pierre},
booktitle = {{Head-Driven Phrase Structure Grammar}: {The} handbook},
address = {Berlin},
publisher = {Language Science Press},
pdf = {https://hpsg.hu-berlin.de/Projects/HPSG-handbook/PDFs/gesture.pdf},
url = {https://langsci-press.org/catalog/book/259}
}
2019.
Grammar in dialogue. Head-Driven Phrase Structure Grammar: The handbook.
BibTeX
@incollection{Luecking:Ginzburg:Cooper:2019,
keywords = {own,bookchapter},
author+an = {1=highlight},
author = {L\"{u}cking, Andy and Ginzburg, Jonathan and Cooper, Robin},
year = {2019},
title = {Grammar in dialogue},
editor = {M\"{u}ller, Stefan and Abeill\'{e}, Anne and Borsley, Robert D.
and Koenig, Jean-Pierre},
booktitle = {{Head-Driven Phrase Structure Grammar}: {The} handbook},
address = {Berlin},
publisher = {Language Science Press},
pdf = {https://hpsg.hu-berlin.de/Projects/HPSG-handbook/PDFs/dialogue.pdf},
url = {https://langsci-press.org/catalog/book/259}
}
2019.
Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection. Proceedings of the 15th Conference on Natural Language Processing (KONVENS).
BibTeX
@inproceedings{Schweter:Ahmed:2019,
author = {Stefan Schweter and Sajawel Ahmed},
title = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
location = {Erlangen, Germany},
year = {2019}
}
November, 2019.
When Specialization Helps: Using Pooled Contextualized Embeddings
to Detect Chemical and Biomedical Entities in Spanish. Proceedings of The 5th Workshop on BioNLP Open Shared Tasks, 11–15.
BibTeX
@inproceedings{Stoeckel:Hemati:Mehler:2019,
title = {When Specialization Helps: Using Pooled Contextualized Embeddings
to Detect Chemical and Biomedical Entities in {S}panish},
author = {Stoeckel, Manuel and Hemati, Wahed and Mehler, Alexander},
booktitle = {Proceedings of The 5th Workshop on BioNLP Open Shared Tasks},
month = {nov},
year = {2019},
address = {Hong Kong, China},
publisher = {Association for Computational Linguistics},
url = {https://www.aclweb.org/anthology/D19-5702},
doi = {10.18653/v1/D19-5702},
pages = {11--15},
abstract = {The recognition of pharmacological substances, compounds and proteins
is an essential preliminary work for the recognition of relations
between chemicals and other biomedically relevant units. In this
paper, we describe an approach to Task 1 of the PharmaCoNER Challenge,
which involves the recognition of mentions of chemicals and drugs
in Spanish medical texts. We train a state-of-the-art BiLSTM-CRF
sequence tagger with stacked Pooled Contextualized Embeddings,
word and sub-word embeddings using the open-source framework FLAIR.
We present a new corpus composed of articles and papers from Spanish
health science journals, termed the Spanish Health Corpus, and
use it to train domain-specific embeddings which we incorporate
in our model training. We achieve a result of 89.76{\%} F1-score
using pre-trained embeddings and are able to improve these results
to 90.52{\%} F1-score using specialized embeddings.}
}
2019.
BIOfid Dataset: Publishing a German Gold Standard for Named Entity
Recognition in Historical Biodiversity Literature. Proceedings of the 23rd Conference on Computational Natural Language
Learning (CoNLL), 871–880.
BibTeX
@inproceedings{Ahmed:Stoeckel:Driller:Pachzelt:Mehler:2019,
author = {Sajawel Ahmed and Manuel Stoeckel and Christine Driller and Adrian Pachzelt
and Alexander Mehler},
title = {{BIOfid Dataset: Publishing a German Gold Standard for Named Entity
Recognition in Historical Biodiversity Literature}},
publisher = {Association for Computational Linguistics},
year = {2019},
booktitle = {Proceedings of the 23rd Conference on Computational Natural Language
Learning (CoNLL)},
address = {Hong Kong, China},
url = {https://www.aclweb.org/anthology/K19-1081},
doi = {10.18653/v1/K19-1081},
pages = {871--880},
abstract = {The Specialized Information Service Biodiversity Research (BIOfid)
has been launched to mobilize valuable biological data from printed
literature hidden in German libraries for over the past 250 years.
In this project, we annotate German texts converted by OCR from
historical scientific literature on the biodiversity of plants,
birds, moths and butterflies. Our work enables the automatic extraction
of biological information previously buried in the mass of papers
and volumes. For this purpose, we generated training data for
the tasks of Named Entity Recognition (NER) and Taxa Recognition
(TR) in biological documents. We use this data to train a number
of leading machine learning tools and create a gold standard for
TR in biodiversity literature. More specifically, we perform a
practical analysis of our newly generated BIOfid dataset through
various downstream-task evaluations and establish a new state
of the art for TR with 80.23{\%} F-score. In this sense, our paper
lays the foundations for future work in the field of information
extraction in biology texts.},
keywords = {biofid}
}
October 10–11.
VAnnotatoR: A framework for the multimodal reconstruction of
historical situations and spaces. Proceedings of the Time Machine Conference.
BibTeX
@inproceedings{Mehler:Abrami:2019,
author = {Mehler, Alexander and Abrami, Giuseppe},
title = {{VAnnotatoR}: A framework for the multimodal reconstruction of
historical situations and spaces},
booktitle = {Proceedings of the Time Machine Conference},
year = {2019},
date = {October 10-11},
address = {Dresden, Germany},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/09/TimeMachineConference.pdf}
}
2019.
Corpus2Wiki: A MediaWiki-based Tool for Automatically Generating
Wikiditions in Digital Humanities. INF-DH-2019.
BibTeX
@inproceedings{Hunziker:et:al:2019,
author = {Hunziker, Alex and Mammadov, Hasanagha and Hemati, Wahed and Mehler, Alexander},
title = {{Corpus2Wiki}: A MediaWiki-based Tool for Automatically Generating
Wikiditions in Digital Humanities},
booktitle = {INF-DH-2019},
year = {2019},
editor = {Burghardt, Manuel AND Müller-Birn, Claudia},
publisher = {Gesellschaft für Informatik e.V.},
address = {Bonn}
}
May, 2019.
Distribution is not enough – Going Firther. Proceedings of Natural Language and Computer Science.
BibTeX
@inproceedings{Luecking:Cooper:Larsson:Ginzburg:2019,
author = {Lücking, Andy and Cooper, Robin and Larsson, Staffan and Ginzburg, Jonathan},
title = {Distribution is not enough -- Going {Firther}},
booktitle = {Proceedings of Natural Language and Computer Science},
maintitle = {The 13th International Conference on Computational
Semantics (IWCS 2019)},
series = {NLCS 6},
location = {Gothenburg, Sweden},
month = {May},
year = {2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/05/Distribution_is_not_enough.pdf}
}
March, 2019.
CRFVoter: gene and protein related object recognition using
a conglomerate of CRF-based tools. Journal of Cheminformatics, 11(1):11.
BibTeX
@article{Hemati:Mehler:2019b,
author = {Hemati, Wahed and Mehler, Alexander},
title = {{{CRFVoter}: gene and protein related object recognition using
a conglomerate of CRF-based tools}},
journal = {Journal of Cheminformatics},
year = {2019},
month = {Mar},
day = {14},
volume = {11},
number = {1},
pages = {11},
abstract = {Gene and protein related objects are an important class of entities
in biomedical research, whose identification and extraction from
scientific articles is attracting increasing interest. In this
work, we describe an approach to the BioCreative V.5 challenge
regarding the recognition and classification of gene and protein
related objects. For this purpose, we transform the task as posed
by BioCreative V.5 into a sequence labeling problem. We present
a series of sequence labeling systems that we used and adapted
in our experiments for solving this task. Our experiments show
how to optimize the hyperparameters of the classifiers involved.
To this end, we utilize various algorithms for hyperparameter
optimization. Finally, we present CRFVoter, a two-stage application
of Conditional Random Field (CRF) that integrates the optimized
sequence labelers from our study into one ensemble classifier.},
issn = {1758-2946},
doi = {10.1186/s13321-019-0343-x},
url = {https://doi.org/10.1186/s13321-019-0343-x}
}
May, 2019.
TextAnnotator: A flexible framework for semantic annotations. Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, (ISA-15).
BibTeX
@inproceedings{Abrami:et:al:2019,
author = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias
and Helfrich, Philipp},
title = {{TextAnnotator}: A flexible framework for semantic annotations},
booktitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, (ISA-15)},
series = {ISA-15},
location = {Gothenburg, Sweden},
month = {May},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf},
year = {2019},
keywords = {textannotator},
abstract = {Modern annotation tools should meet at least the following general
requirements: they can handle diverse data and annotation levels
within one tool, and they support the annotation process with
automatic (pre-)processing outcomes as much as possible. We developed
a framework that meets these general requirements and that enables
versatile and browser-based annotations of texts, the TextAnnotator.
It combines NLP methods of pre-processing with methods of flexible
post-processing. Infact, machine learning (ML) requires a lot
of training and test data, but is usually far from achieving perfect
results. Producing high-level annotations for ML and post-correcting
its results are therefore necessary. This is the purpose of TextAnnotator,
which is entirely implemented in ExtJS and provides a range of
interactive visualizations of annotations. In addition, it allows
for flexibly integrating knowledge resources, e.g. in the course
of post-processing named entity recognition. The paper describes
TextAnnotator’s architecture together with three use cases: annotating
temporal structures, argument structures and named entity linking.}
}
2019.
Computing Classifier-based Embeddings with the Help of text2ddc. Proceedings of the 20th International Conference on Computational
Linguistics and Intelligent Text Processing, (CICLing 2019).
BibTeX
@inproceedings{Uslu:Mehler:Baumartz:2019,
author = {Uslu, Tolga and Mehler, Alexander and Baumartz, Daniel},
booktitle = {{Proceedings of the 20th International Conference on Computational
Linguistics and Intelligent Text Processing, (CICLing 2019)}},
location = {La Rochelle, France},
series = {{CICLing 2019}},
title = {{Computing Classifier-based Embeddings with the Help of text2ddc}},
year = {2019}
}
2019.
BigSense: a Word Sense Disambiguator for Big Data. Proceedings of the Digital Humanities 2019, (DH2019).
BibTeX
@inproceedings{Uslu:Mehler:Schulz:Baumartz:2019,
author = {Uslu, Tolga and Mehler, Alexander and Schulz, Clemens and Baumartz, Daniel},
booktitle = {{Proceedings of the Digital Humanities 2019, (DH2019)}},
location = {Utrecht, Netherlands},
series = {{DH2019}},
title = {{{BigSense}: a Word Sense Disambiguator for Big Data}},
year = {2019},
url = {https://dev.clariah.nl/files/dh2019/boa/0199.html}
}
January, 2019.
LSTMVoter: chemical named entity recognition using a conglomerate
of sequence labeling tools. Journal of Cheminformatics, 11(1):7.
BibTeX
@article{Hemati:Mehler:2019a,
abstract = {Chemical and biomedical named entity recognition (NER) is an essential
preprocessing task in natural language processing. The identification
and extraction of named entities from scientific articles is also
attracting increasing interest in many scientific disciplines.
Locating chemical named entities in the literature is an essential
step in chemical text mining pipelines for identifying chemical
mentions, their properties, and relations as discussed in the
literature. In this work, we describe an approach to the BioCreative
V.5 challenge regarding the recognition and classification of
chemical named entities. For this purpose, we transform the task
of NER into a sequence labeling problem. We present a series of
sequence labeling systems that we used, adapted and optimized
in our experiments for solving this task. To this end, we experiment
with hyperparameter optimization. Finally, we present LSTMVoter,
a two-stage application of recurrent neural networks that integrates
the optimized sequence labelers from our study into a single ensemble
classifier.},
author = {Hemati, Wahed and Mehler, Alexander},
day = {10},
doi = {10.1186/s13321-018-0327-2},
issn = {1758-2946},
journal = {Journal of Cheminformatics},
month = {Jan},
number = {1},
pages = {7},
title = {{{LSTMVoter}: chemical named entity recognition using a conglomerate
of sequence labeling tools}},
url = {https://doi.org/10.1186/s13321-018-0327-2},
volume = {11},
year = {2019}
}
July, 2019.
Graph-based Format for Modeling Multimodal Annotations in Virtual
Reality by Means of VAnnotatoR. Proceedings of the 21th International Conference on Human-Computer
Interaction, HCII 2019, 351–358.
BibTeX
@inproceedings{Abrami:Mehler:Spiekermann:2019,
author = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian},
title = {{Graph-based Format for Modeling Multimodal Annotations in Virtual
Reality by Means of VAnnotatoR}},
booktitle = {Proceedings of the 21th International Conference on Human-Computer
Interaction, HCII 2019},
series = {HCII 2019},
location = {Orlando, Florida, USA},
editor = {Stephanidis, Constantine and Antona, Margherita},
month = {July},
publisher = {Springer International Publishing},
address = {Cham},
pages = {351--358},
abstract = {Projects in the field of Natural Language Processing (NLP), the
Digital Humanities (DH) and related disciplines dealing with machine
learning of complex relationships between data objects need annotations
to obtain sufficiently rich training and test sets. The visualization
of such data sets and their underlying Human Computer Interaction
(HCI) are perennial problems of computer science. However, despite
some success stories, the clarity of information presentation
and the flexibility of the annotation process may decrease with
the complexity of the underlying data objects and their relationships.
In order to face this problem, the so-called VAnnotatoR was developed,
as a flexible annotation tool using 3D glasses and augmented reality
devices, which enables annotation and visualization in three-dimensional
virtual environments. In addition, multimodal objects are annotated
and visualized within a graph-based approach.},
isbn = {978-3-030-30712-7},
pdf = {https://link.springer.com/content/pdf/10.1007\%2F978-3-030-30712-7_44.pdf},
year = {2019}
}
2019.
text2ddc meets Literature - Ein Verfahren für die Analyse und
Visualisierung thematischer Makrostrukturen. Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019.
BibTeX
@inproceedings{Mehler:Uslu:Gleim:Baumartz:2019,
author = {Mehler, Alexander and Uslu, Tolga and Gleim, Rüdiger and Baumartz, Daniel},
title = {{text2ddc meets Literature - Ein Verfahren für die Analyse und
Visualisierung thematischer Makrostrukturen}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD_Poster___text2ddc_meets_Literature_Poster.pdf},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_DHd2019_text2ddc_meets_Literature.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
2019.
VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke
in dreidimensionalen virtuellen Umgebungen. Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019.
BibTeX
@inproceedings{Abrami:Spiekermann:Mehler:2019,
author = {Abrami, Giuseppe and Spiekermann, Christian and Mehler, Alexander},
title = {{VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke
in dreidimensionalen virtuellen Umgebungen}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_VAnnotatoR_DHd2019.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHDVAnnotatoRPoster.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
2019.
Der TextImager als Front- und Backend für das verteilte NLP von
Big Digital Humanities Data. Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019.
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Abrami:2019,
author = {Hemati, Wahed and Mehler, Alexander and Uslu, Tolga and Abrami, Giuseppe},
title = {{Der TextImager als Front- und Backend für das verteilte NLP von
Big Digital Humanities Data}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Der-TextImager-als-Fron-und-Backend.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD19_TextImager.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
2019.
A practitioner's view: a survey and comparison of lemmatization
and morphological tagging in German and Latin. Journal of Language Modeling.
BibTeX
@article{Gleim:Eger:Mehler:2019,
author = {Gleim, R\"{u}diger and Eger, Steffen and Mehler, Alexander and Uslu, Tolga
and Hemati, Wahed and L\"{u}cking, Andy and Henlein, Alexander and Kahlsdorf, Sven
and Hoenen, Armin},
title = {A practitioner's view: a survey and comparison of lemmatization
and morphological tagging in German and Latin},
journal = {Journal of Language Modeling},
year = {2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/07/jlm-tagging.pdf},
doi = {10.15398/jlm.v7i1.205},
url = {http://jlm.ipipan.waw.pl/index.php/JLM/article/view/205}
}
2018
May 7–12, 2018, 2018.
Multi Modal Distance - An Approach to Stemma Generation With Weighting. Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018).
BibTeX
@inproceedings{HOENEN18.285,
author = {Armin Hoenen},
title = {{Multi Modal Distance - An Approach to Stemma Generation With Weighting}},
booktitle = {Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
address = {Miyazaki, Japan},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/285.pdf},
language = {english}
}
May 7–12, 2018, 2018.
From Manuscripts to Archetypes through Iterative Clustering. Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018).
BibTeX
@inproceedings{HOENEN18.314,
author = {Armin Hoenen},
title = {{From Manuscripts to Archetypes through Iterative Clustering}},
booktitle = {Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
address = {Miyazaki, Japan},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/314.pdf},
language = {english}
}
May 7–12, 2018, 2018.
Knowing the Author by the Company His Words Keep. Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018).
BibTeX
@inproceedings{HOENEN18.349,
author = {Armin Hoenen and Niko Schenk},
title = {{Knowing the Author by the Company His Words Keep}},
booktitle = {Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018)},
year = {2018},
month = {May 7-12, 2018},
address = {Miyazaki, Japan},
editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri
and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara
and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno
and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},
publisher = {European Language Resources Association (ELRA)},
isbn = {979-10-95546-00-9},
url = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/349.pdf},
language = {english}
}
7–12.
Attempts at Visualization of Etymological Information. Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018).
BibTeX
@inproceedings{HOENEN18.9,
author = {Armin Hoenen},
title = {Attempts at Visualization of Etymological Information},
booktitle = {Proceedings of the Eleventh International Conference on Language
Resources and Evaluation (LREC 2018)},
year = {2018},
month = {may},
date = {7-12},
location = {Miyazaki, Japan},
editor = {Kernerman, Ilan and Krek, Simon},
publisher = {European Language Resources Association (ELRA)},
address = {Paris, France},
isbn = {979-10-95546-28-3},
language = {english},
url = {http://lrec-conf.org/workshops/lrec2018/W33/pdf/book_of_proceedings.pdf}
}
2018.
PhD Thesis: Tools, evaluation and preprocessing for stemmatology.
BibTeX
@phdthesis{Hoenen2018,
type = {Dissertation},
author = {Armin Hoenen},
title = {Tools, evaluation and preprocessing for stemmatology},
school = {Goethe University Frankfurt},
year = {2018}
}
2018.
Annotated Timelines and Stacked Area Plots for Visualization in Lexicography. Elexis workshop at EADH 2018.
2018.
Recurrence Analysis Function, a Dynamic Heatmap for the Visualization
of Verse Text and Beyond. In: Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools.
Heidelberg University Press.
BibTeX
@inbook{Hoenen:2018,
author = {Hoenen, Armin},
title = {Recurrence Analysis Function, a Dynamic Heatmap for the Visualization
of Verse Text and Beyond},
booktitle = {Visualisierung sprachlicher Daten: Visual Linguistics – Praxis – Tools},
publisher = {Heidelberg University Press},
abstract = {The Recurrence Analysis Function (ReAF) is a cross-linguistic
visualization tool for (historical) verse text, especially handwritten
epics. It can also provide a general visualization of various
aspects of prose text. It aims to enable intuitive understanding
through explorative data analysis of historical, especially bardic-oral
texts.1 The assumption behind this is that bardic/born-oral and
non-bardic/born-written texts differ drastically in the way they
employ repetition. The ReAF in its first implementation, as presented
here, is a language-independent tool that permits the visual exploration
of such structures. Firstly, general aspects and formal characteristics
of oral verse text are characterized, before the main technical
details and some additional applications of the ReAF are explained
and illustrated.},
year = {2018},
editors = {Bubenhofer, Noah and Kupietz, Marc},
place = {Heidelberg},
url = {https://heiup.uni-heidelberg.de/reader/download/345/345-69-80909-2-10-20180411.pdf}
}
November, 2018.
On the limit value of compactness of some graph classes. PLOS ONE, 13(11):1–8.
BibTeX
@article{Lokot:Mehler:Abramov:2018,
author = {Lokot, Tatiana and Mehler, Alexander and Abramov, Olga},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {On the limit value of compactness of some graph classes},
year = {2018},
month = {11},
volume = {13},
url = {https://doi.org/10.1371/journal.pone.0207536},
pages = {1-8},
abstract = {In this paper, we study the limit of compactness which is a graph
index originally introduced for measuring structural characteristics
of hypermedia. Applying compactness to large scale small-world
graphs (Mehler, 2008) observed its limit behaviour to be equal
1. The striking question concerning this finding was whether this
limit behaviour resulted from the specifics of small-world graphs
or was simply an artefact. In this paper, we determine the necessary
and sufficient conditions for any sequence of connected graphs
resulting in a limit value of CB = 1 which can be generalized
with some consideration for the case of disconnected graph classes
(Theorem 3). This result can be applied to many well-known classes
of connected graphs. Here, we illustrate it by considering four
examples. In fact, our proof-theoretical approach allows for quickly
obtaining the limit value of compactness for many graph classes
sparing computational costs.},
number = {11},
doi = {10.1371/journal.pone.0207536}
}
2018.
Corpus2Wiki: A MediaWiki based Annotation & Visualisation Tool
for the Digital Humanities. INF-DH-2018.
BibTeX
@inproceedings{Rutherford:et:al:2018,
author = {Rutherford, Eleanor AND Hemati, Wahed AND Mehler, Alexander},
title = {{Corpus2Wiki}: A MediaWiki based Annotation \& Visualisation Tool
for the Digital Humanities},
booktitle = {INF-DH-2018},
year = {2018},
editor = {Burghardt, Manuel AND Müller-Birn, Claudia},
publisher = {Gesellschaft für Informatik e.V.},
address = {Bonn}
}
2018.
TextAnnotator: A Browser-based Framework for Annotating Textual
Data in Digital Humanities. Proceedings of the Digital Humanities Austria 2018.
BibTeX
@inproceedings{Abrami:et:al:2018,
author = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb},
title = {{TextAnnotator}: A Browser-based Framework for Annotating Textual
Data in Digital Humanities},
booktitle = {Proceedings of the Digital Humanities Austria 2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TA__A_Browser_based_Framework_for_Annotating_Textual_Data_in_Digital_Humanities.pdf},
location = {Salzburg, Austria},
year = {2018}
}
2018.
Resource-Size matters: Improving Neural Named Entity Recognition
with Optimized Large Corpora. Proceedings of the 17th IEEE International Conference on Machine
Learning and Applications (ICMLA).
BibTeX
@inproceedings{Ahmed:Mehler:2018,
author = {Sajawel Ahmed and Alexander Mehler},
title = {{Resource-Size matters: Improving Neural Named Entity Recognition
with Optimized Large Corpora}},
abstract = {This study improves the performance of neural named entity recognition
by a margin of up to 11\% in terms of F-score on the example of
a low-resource language like German, thereby outperforming existing
baselines and establishing a new state-of-the-art on each single
open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank
2018). Rather than designing deeper and wider hybrid neural architectures,
we gather all available resources and perform a detailed optimization
and grammar-dependent morphological processing consisting of lemmatization
and part-of-speech tagging prior to exposing the raw data to any
training process. We test our approach in a threefold monolingual
experimental setup of a) single, b) joint, and c) optimized training
and shed light on the dependency of downstream-tasks on the size
of corpora used to compute word embeddings.},
booktitle = {Proceedings of the 17th IEEE International Conference on Machine
Learning and Applications (ICMLA)},
location = {Orlando, Florida, USA},
pdf = {https://arxiv.org/pdf/1807.10675.pdf},
year = {2018}
}
2018.
BioFID, a platform to enhance accessibility of biodiversity data. Proceedings of the 10th International Conference on Ecological Informatics.
BibTeX
@inproceedings{Weiland:et:al:2018,
author = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt
and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt
and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
title = {{BioFID}, a platform to enhance accessibility of biodiversity data},
booktitle = {Proceedings of the 10th International Conference on Ecological Informatics},
year = {2018},
url = {https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail},
location = {Jena, Germany}
}
2018.
Resources2City Explorer: A System for Generating Interactive
Walkable Virtual Cities out of File Systems. Proceedings of the 31st ACM User Interface Software and Technology Symposium.
BibTeX
@inproceedings{Kett:et:al:2018,
author = {Attila Kett and Giuseppe Abrami and Alexander Mehler and Christian Spiekermann},
title = {{Resources2City Explorer}: A System for Generating Interactive
Walkable Virtual Cities out of File Systems},
booktitle = {Proceedings of the 31st ACM User Interface Software and Technology Symposium},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/UIST2018Final.pdf},
location = {Berlin, Germany},
abstract = {We present Resources2City Explorer (R2CE), a tool for representing
file systems as interactive, walkable virtual cities. R2CE visualizes
file systems based on concepts of spatial, 3D information processing.
For this purpose, it extends the range of functions of conventional
file browsers considerably. Visual elements in a city generated
by R2CE represent (relations of) objects of the underlying file
system. The paper describes the functional spectrum of R2CE and
illustrates it by visualizing a sample of 940 files.},
year = {2018}
}
2018.
Witness-loaded and Witness-free Demonstratives. Atypical Demonstratives.
BibTeX
@incollection{Luecking:2018:a,
author = {Andy L\"{u}cking},
title = {Witness-loaded and Witness-free Demonstratives},
booktitle = {Atypical Demonstratives},
publisher = {De Gruyter},
year = {2018},
editor = {Marco Coniglio and Andrew Murphy and Eva Schlachter and Tonjes Veenstra},
isbn = {978-3-11-056029-9},
url = {https://www.degruyter.com/view/product/495228},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2020/05/Luecking-witness-loading-rg.pdf}
}
2018.
`Most people but not Bill': integrating sets, individuals and
negation into a cognitively plausible account of noun phrase interpretation. Proceedings of Cognitive Structures: Linguistic, Philosophical
and Psychological Perspectives.
BibTeX
@inproceedings{Luecking:Ginzburg:2018,
title = {`Most people but not {Bill}': integrating sets, individuals and
negation into a cognitively plausible account of noun phrase interpretation},
booktitle = {Proceedings of Cognitive Structures: Linguistic, Philosophical
and Psychological Perspectives},
series = {CoSt'18},
author = {L\"{u}cking, Andy and Ginzburg, Jonathan},
year = {2018}
}
2018.
PolyViz: a Visualization System for a Special Kind of Multipartite Graphs. Proceedings of the IEEE VIS 2018.
BibTeX
@inproceedings{Uslu:Mehler:2018,
author = {Tolga Uslu and Alexander Mehler},
title = {{PolyViz}: a Visualization System for a Special Kind of Multipartite Graphs},
booktitle = {Proceedings of the IEEE VIS 2018},
series = {IEEE VIS 2018},
location = {Berlin, Germany},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/07/polyviz-visualization-system.pdf},
year = {2018}
}
2018.
LTV: Labeled Topic Vector. Proceedings of COLING 2018, the 27th International Conference
on Computational Linguistics: System Demonstrations, August 20-26.
BibTeX
@inproceedings{Baumartz:Uslu:Mehler:2018,
author = {Daniel Baumartz and Tolga Uslu and Alexander Mehler},
title = {{LTV}: Labeled Topic Vector},
booktitle = {Proceedings of {COLING 2018}, the 27th International Conference
on Computational Linguistics: System Demonstrations, August 20-26},
year = {2018},
address = {Santa Fe, New Mexico, USA},
publisher = {The COLING 2018 Organizing Committee},
abstract = {In this paper, we present LTV, a website and an API that generate
labeled topic classifications based on the Dewey Decimal Classification
(DDC), an international standard for topic classification in libraries.
We introduce nnDDC, a largely language-independent neural network-based
classifier for DDC-related topic classification, which we optimized
using a wide range of linguistic features to achieve an F-score
of 87.4\%. To show that our approach is language-independent,
we evaluate nnDDC using up to 40 different languages. We derive
a topic model based on nnDDC, which generates probability distributions
over semantic units for any input on sense-, word- and text-level.
Unlike related approaches, however, these probabilities are estimated
by means of nnDDC so that each dimension of the resulting vector
representation is uniquely labeled by a DDC class. In this way,
we introduce a neural network-based Classifier-Induced Semantic
Space (nnCISS).},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/06/coling2018.pdf}
}
2018.
Workflow and Current Achievements of BIOfid, an Information Service
Mobilizing Biodiversity Data from Literature Sources. Biodiversity Information Science and Standards, 2:e25876.
BibTeX
@article{Driller:et:al:2018,
author = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland
and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed
and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler
and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster
and Gerwin Kasperek and Angela Hausinger},
title = {Workflow and Current Achievements of BIOfid, an Information Service
Mobilizing Biodiversity Data from Literature Sources},
volume = {2},
number = {},
year = {2018},
doi = {10.3897/biss.2.25876},
publisher = {Pensoft Publishers},
abstract = {BIOfid is a specialized information service currently being developed
to mobilize biodiversity data dormant in printed historical and
modern literature and to offer a platform for open access journals
on the science of biodiversity. Our team of librarians, computer
scientists and biologists produce high-quality text digitizations,
develop new text-mining tools and generate detailed ontologies
enabling semantic text analysis and semantic search by means of
user-specific queries. In a pilot project we focus on German publications
on the distribution and ecology of vascular plants, birds, moths
and butterflies extending back to the Linnaeus period about 250
years ago. The three organism groups have been selected according
to current demands of the relevant research community in Germany.
The text corpus defined for this purpose comprises over 400 volumes
with more than 100,000 pages to be digitized and will be complemented
by journals from other digitization projects, copyright-free and
project-related literature. With TextImager (Natural Language
Processing & Text Visualization) and TextAnnotator (Discourse
Semantic Annotation) we have already extended and launched tools
that focus on the text-analytical section of our project. Furthermore,
taxonomic and anatomical ontologies elaborated by us for the taxa
prioritized by the project’s target group - German institutions
and scientists active in biodiversity research - are constantly
improved and expanded to maximize scientific data output. Our
poster describes the general workflow of our project ranging from
literature acquisition via software development, to data availability
on the BIOfid web portal (http://biofid.de/), and the implementation
into existing platforms which serve to promote global accessibility
of biodiversity data.},
issn = {},
pages = {e25876},
url = {https://doi.org/10.3897/biss.2.25876},
eprint = {https://doi.org/10.3897/biss.2.25876},
journal = {Biodiversity Information Science and Standards},
keywords = {biofid}
}
2018.
VAnnotatoR: A Framework for Generating Multimodal Hypertexts. Proceedings of the 29th ACM Conference on Hypertext and Social Media.
BibTeX
@inproceedings{Mehler:Abrami:Spiekermann:Jostock:2018,
author = {Mehler, Alexander and Abrami, Giuseppe and Spiekermann, Christian
and Jostock, Matthias},
title = {{VAnnotatoR}: {A} Framework for Generating Multimodal Hypertexts},
booktitle = {Proceedings of the 29th ACM Conference on Hypertext and Social Media},
series = {Proceedings of the 29th ACM Conference on Hypertext and Social Media (HT '18)},
year = {2018},
location = {Baltimore, Maryland},
publisher = {ACM},
address = {New York, NY, USA},
pdf = {http://delivery.acm.org/10.1145/3210000/3209572/p150-mehler.pdf}
}
2018.
Evaluating and Integrating Databases in the Area of NLP. International Quantitative Linguistics Conference (QUALICO 2018).
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
author = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
and Giuseppe Abrami},
title = {Evaluating and Integrating Databases in the Area of {NLP}},
booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
year = {2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
location = {Wroclaw, Poland}
}
2018.
World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data
by means of a digital ontology based on OWLnotator. Proceedings of the Digital Humanities 2018.
BibTeX
@inproceedings{Abrami:Boden:Gleiss:2018,
author = {Abrami, Giuseppe and Boden, Gertrud and Glei\ss{}, Lisa},
title = {{World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data
by means of a digital ontology based on OWLnotator}},
booktitle = {Proceedings of the Digital Humanities 2018},
series = {DH2018},
location = {Mexico City, Mexico},
year = {2018}
}
2018.
VienNA: Auf dem Weg zu einer Infrastruktur für die verteilte
interaktive evolutionäre Verarbeitung natürlicher Sprache. Forschungsinfrastrukturen und digitale Informationssysteme in
der germanistischen Sprachwissenschaft, 6.
BibTeX
@incollection{Mehler:Hemati:Gleim:Baumartz:2018,
author = {Alexander Mehler and Wahed Hemati and Rüdiger Gleim and Daniel Baumartz},
title = {{VienNA: }{Auf dem Weg zu einer Infrastruktur für die verteilte
interaktive evolutionäre Verarbeitung natürlicher Sprache}},
booktitle = {Forschungsinfrastrukturen und digitale Informationssysteme in
der germanistischen Sprachwissenschaft},
publisher = {De Gruyter},
editor = {Henning Lobin and Roman Schneider and Andreas Witt},
volume = {6},
address = {Berlin},
year = {2018}
}
2018.
A Multidimensional Model of Syntactic Dependency Trees for Authorship
Attribution. Quantitative analysis of dependency structures.
BibTeX
@incollection{Mehler:Hemati:Uslu:Luecking:2018,
author = {Alexander Mehler and Wahed Hemati and Tolga Uslu and Andy Lücking},
title = {A Multidimensional Model of Syntactic Dependency Trees for Authorship
Attribution},
booktitle = {Quantitative analysis of dependency structures},
publisher = {De Gruyter},
editor = {Jingyang Jiang and Haitao Liu},
address = {Berlin/New York},
abstract = {Abstract: In this chapter we introduce a multidimensional model
of syntactic dependency trees. Our ultimate goal is to generate
fingerprints of such trees to predict the author of the underlying
sentences. The chapter makes a first attempt to create such fingerprints
for sentence categorization via the detour of text categorization.
We show that at text level, aggregated dependency structures actually
provide information about authorship. At the same time, we show
that this does not hold for topic detection. We evaluate our model
using a quarter of a million sentences collected in two corpora:
the first is sampled from literary texts, the second from Wikipedia
articles. As a second finding of our approach, we show that quantitative
models of dependency structure do not yet allow for detecting
syntactic alignment in written communication. We conclude that
this is mainly due to effects of lexical alignment on syntactic
alignment.},
keywords = {Dependency structure, Authorship attribution, Text
categorization, Syntactic Alignment},
year = {2018}
}
2018.
LitViz: Visualizing Literary Data by Means of text2voronoi. Proceedings of the Digital Humanities 2018.
BibTeX
@inproceedings{Uslu:Mehler:Meyer:2018,
author = {Tolga Uslu and Alexander Mehler and Dirk Meyer},
title = {{{LitViz}: Visualizing Literary Data by Means of text2voronoi}},
booktitle = {Proceedings of the Digital Humanities 2018},
series = {DH2018},
location = {Mexico City, Mexico},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/LitViz.pdf},
year = {2018}
}
2018.
VAnnotatoR: a Gesture-driven Annotation Framework for Linguistic
and Multimodal Annotation. Proceedings of the Annotation, Recognition and Evaluation of Actions
(AREA 2018) Workshop.
BibTeX
@inproceedings{Spiekerman:Abrami:Mehler:2018,
author = {Christian Spiekermann and Giuseppe Abrami and Alexander Mehler},
title = {{VAnnotatoR}: a Gesture-driven Annotation Framework for Linguistic
and Multimodal Annotation},
booktitle = {Proceedings of the Annotation, Recognition and Evaluation of Actions
(AREA 2018) Workshop},
series = {AREA},
location = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/VAnnotatoR.pdf},
year = {2018}
}
2018.
Automatic Classification in Memory Clinic Patients and in Depressive Patients. Proceedings of Resources and ProcessIng of linguistic, para-linguistic
and extra-linguistic Data from people with various forms of cognitive/psychiatric
impairments (RaPID-2).
BibTeX
@inproceedings{Uslu:et:al:2018:a,
author = {Tolga Uslu and Lisa Miebach and Steffen Wolfsgruber and Michael Wagner
and Klaus Fließbach and Rüdiger Gleim and Wahed Hemati and Alexander Henlein
and Alexander Mehler},
title = {{Automatic Classification in Memory Clinic Patients and in Depressive Patients}},
booktitle = {Proceedings of Resources and ProcessIng of linguistic, para-linguistic
and extra-linguistic Data from people with various forms of cognitive/psychiatric
impairments (RaPID-2)},
series = {RaPID},
location = {Miyazaki, Japan},
year = {2018}
}
2018.
On the Self-similarity of Wikipedia Talks: a Combined Discourse-analytical
and Quantitative Approach. Glottometrics, 40:1–44.
BibTeX
@article{Mehler:Gleim:Luecking:Uslu:Stegbauer:2018,
author = {Alexander Mehler and Rüdiger Gleim and Andy Lücking and Tolga Uslu
and Christian Stegbauer},
title = {On the Self-similarity of {Wikipedia} Talks: a Combined Discourse-analytical
and Quantitative Approach},
journal = {Glottometrics},
volume = {40},
pages = {1-44},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Glottometrics-Mehler.pdf},
year = {2018}
}
2018.
Towards a DDC-based Topic Network Model of Wikipedia. Proceedings of 2nd International Workshop on Modeling, Analysis,
and Management of Social Networks and their Applications (SOCNET
2018), February 28, 2018.
BibTeX
@inproceedings{Uslu:Mehler:Niekler:Baumartz:2018,
author = {Tolga Uslu and Alexander Mehler and Andreas Niekler and Daniel Baumartz},
title = {Towards a {DDC}-based Topic Network Model of Wikipedia},
booktitle = {Proceedings of 2nd International Workshop on Modeling, Analysis,
and Management of Social Networks and their Applications (SOCNET
2018), February 28, 2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TowardsDDC.pdf},
year = {2018}
}
2018.
fastSense: An Efficient Word Sense Disambiguation Classifier. Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Uslu:et:al:2018,
author = {Tolga Uslu and Alexander Mehler and Daniel Baumartz and Alexander Henlein
and Wahed Hemati},
title = {fastSense: An Efficient Word Sense Disambiguation Classifier},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/fastSense.pdf},
year = {2018}
}
2018.
WikiDragon: A Java Framework For Diachronic Content And Network
Analysis Of MediaWikis. Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Gleim:Mehler:Song:2018,
author = {R{\"u}diger Gleim and Alexander Mehler and Sung Y. Song},
title = {WikiDragon: A Java Framework For Diachronic Content And Network
Analysis Of MediaWikis},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/WikiDragon.pdf},
year = {2018}
}
2018.
TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations. Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Helfrich:et:al:2018,
author = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking
and Alexander Mehler},
title = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf},
year = {2018}
}
2018.
A UIMA Database Interface for Managing NLP-related Text Annotations. Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12.
BibTeX
@inproceedings{Abrami:Mehler:2018,
author = {Giuseppe Abrami and Alexander Mehler},
title = {A UIMA Database Interface for Managing NLP-related Text Annotations},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
keywords = {UIMA},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/UIMA-DI.pdf},
year = {2018}
}
2018.
Ferdinand de Saussure. 1916. Cours de linguistique générale.
Payot, Lausanne/Paris. In: Schlüsselwerke der Netzwerkforschung.
Springer VS.
BibTeX
@inbook{Mehler:Stegbauer:Frank-Job:2018,
author = {Alexander Mehler and Christian Stegbauer and Barbara Frank-Job},
editor = {Christian Stegbauer and Boris Holzer},
title = {{Ferdinand de Saussure. 1916. Cours de linguistique générale.
Payot, Lausanne/Paris}},
publisher = {Springer VS},
address = {Wiesbaden},
booktitle = {Schlüsselwerke der Netzwerkforschung},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2017/11/Saussure2.pdf},
year = {2018}
}
2018.
Integrating Computational Linguistic Analysis of Multilingual
Learning Data and Educational Measurement Approaches to Explore
Learning in Higher Education. In: Positive Learning in the Age of Information: A Blessing or a Curse?, 145–193.
Springer Fachmedien Wiesbaden.
BibTeX
@inbook{Mehler:et:al:2018,
abstract = {This chapter develops a computational linguistic model for analyzing
and comparing multilingual data as well as its application to
a large body of standardized assessment data from higher education.
The approach employs both an automatic and a manual annotation
of the data on several linguistic layers (including parts of speech,
text structure and content). Quantitative features of the textual
data are explored that are related to both the students' (domain-specific
knowledge) test results and their level of academic experience.
The respective analysis involves statistics of distance correlation,
text categorization with respect to text types (questions and
response options) as well as languages (English and German), and
network analysis to assess dependencies between features. The
correlation between correct test results of students and linguistic
features of the verbal presentations of tests indicate to what
extent language influences higher education test performance.
It has also been found that this influence relates to specialized
language. Thus, this integrative modeling approach contributes
a test basis for a large-scale analysis of learning data and points
to a number of subsequent, more detailed research questions.},
address = {Wiesbaden},
author = {Mehler, Alexander and Zlatkin-Troitschanskaia, Olga and Hemati, Wahed
and Molerov, Dimitri and L{\"u}cking, Andy and Schmidt, Susanne},
booktitle = {Positive Learning in the Age of Information: A Blessing or a Curse?},
doi = {10.1007/978-3-658-19567-0_10},
editor = {Zlatkin-Troitschanskaia, Olga and Wittum, Gabriel and Dengel, Andreas},
isbn = {978-3-658-19567-0},
pages = {145--193},
publisher = {Springer Fachmedien Wiesbaden},
title = {Integrating Computational Linguistic Analysis of Multilingual
Learning Data and Educational Measurement Approaches to Explore
Learning in Higher Education},
url = {https://doi.org/10.1007/978-3-658-19567-0_10},
year = {2018}
}
March, 2018.
Natural Language Processing and Text Mining for BIOfid.
BibTeX
@misc{Abrami:et:al:2018b,
author = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed
and Mehler, Alexander and Uslu Tolga},
title = {{Natural Language Processing and Text Mining for BIOfid}},
howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
adress = {Goethe-University, Frankfurt am Main, Germany},
year = {2018},
month = {March},
day = {08},
pdf = {}
}
2017
2017.
Modelle sozialer Netzwerke und Natural Language Processing: eine
methodologische Randnotiz. Soziologie, 46(1):43–47.
BibTeX
@article{Mehler:Luecking:2017,
author = {Alexander Mehler and Andy Lücking},
title = {Modelle sozialer Netzwerke und Natural Language Processing: eine
methodologische Randnotiz},
journal = {Soziologie},
volume = {46},
number = {1},
pages = {43-47},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/Soziologe-NetzwerkeundNLP.pdf},
year = {2017}
}
2017.
CRFVoter: Chemical Entity Mention, Gene and Protein Related
Object recognition using a conglomerate of CRF based tools. BioCreative V.5. Proceedings.
BibTeX
@inproceedings{Hemati:Mehler:Uslu:2017,
author = {Wahed Hemati and Alexander Mehler and Tolga Uslu},
title = {{CRFVoter}: Chemical Entity Mention, Gene and Protein Related
Object recognition using a conglomerate of CRF based tools},
booktitle = {BioCreative V.5. Proceedings},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/CRFVoter.pdf},
year = {2017}
}
2017.
TextImager as an interface to BeCalm. BioCreative V.5. Proceedings.
BibTeX
@inproceedings{Hemati:Uslu:Mehler:2017,
author = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
title = {{TextImager} as an interface to {BeCalm}},
booktitle = {BioCreative V.5. Proceedings},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager_BeCalm.pdf},
year = {2017}
}
2017.
Stolperwege: An App for a Digital Public History of the Holocaust. Proceedings of the 28th ACM Conference on Hypertext and Social Media, 319–320.
BibTeX
@inproceedings{Mehler:et:al:2017:a,
author = {Alexander Mehler and Giuseppe Abrami and Steffen Bruendel and Lisa Felder
and Thomas Ostertag and Christian Spiekermann},
title = {{Stolperwege:} An App for a Digital Public History of the {Holocaust}},
booktitle = {Proceedings of the 28th ACM Conference on Hypertext and Social Media},
series = {HT '17},
pages = {319--320},
address = {New York, NY, USA},
publisher = {ACM},
abstract = {We present the Stolperwege app, a web-based framework for ubiquitous
modeling of historical processes. Starting from the art project
Stolpersteine of Gunter Demnig, it allows for virtually connecting
these stumbling blocks with information about the biographies
of victims of Nazism. According to the practice of public history,
the aim of Stolperwege is to deepen public knowledge of the Holocaust
in the context of our everyday environment. Stolperwege uses an
information model that allows for modeling social networks of
agents starting from information about portions of their life.
The paper exemplifies how Stolperwege is informationally enriched
by means of historical maps and 3D animations of (historical)
buildings.},
acmid = {3078748},
doi = {10.1145/3078714.3078748},
isbn = {978-1-4503-4708-2},
keywords = {3d, geocaching, geotagging, historical maps,
historical processes, public history of the holocaust,
ubiquitous computing},
location = {Prague, Czech Republic},
numpages = {2},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2017/07/poster_ht2017.pdf},
url = {http://doi.acm.org/10.1145/3078714.3078748},
year = {2017}
}
2017.
Skalenfreie online soziale Lexika am Beispiel von Wiktionary. Proceedings of 53rd Annual Conference of the Institut für Deutsche
Sprache (IDS), March 14-16, Mannheim, Germany.
In German. Title translates into: Scale-free
online-social Lexika by Example of Wiktionary.
BibTeX
@inproceedings{Mehler:Gleim:Hemati:Uslu:2017,
author = {Alexander Mehler and Rüdiger Gleim and Wahed Hemati and Tolga Uslu},
title = {{Skalenfreie online soziale Lexika am Beispiel von Wiktionary}},
booktitle = {Proceedings of 53rd Annual Conference of the Institut für Deutsche
Sprache (IDS), March 14-16, Mannheim, Germany},
editor = {Stefan Engelberg and Henning Lobin and Kathrin Steyer and Sascha Wolfer},
address = {Berlin},
publisher = {De Gruyter},
note = {In German. Title translates into: Scale-free
online-social Lexika by Example of Wiktionary},
abstract = {In English: The paper deals with characteristics of the structural,
thematic and participatory dynamics of collaboratively generated
lexical networks. This is done by example of Wiktionary. Starting
from a network-theoretical model in terms of so-called multi-layer
networks, we describe Wiktionary as a scale-free lexicon. Systems
of this sort are characterized by the fact that their content-related
dynamics is determined by the underlying dynamics of collaborating
authors. This happens in a way that social structure imprints
on content structure. According to this conception, the unequal
distribution of the activities of authors results in a correspondingly
unequal distribution of the information units documented within
the lexicon. The paper focuses on foundations for describing such
systems starting from a parameter space which requires to deal
with Wiktionary as an issue in big data analysis. In German: Der
Beitrag thematisiert Eigenschaften der strukturellen, thematischen
und partizipativen Dynamik kollaborativ erzeugter lexikalischer
Netzwerke am Beispiel von Wiktionary. Ausgehend von einem netzwerktheoretischen
Modell in Form so genannter Mehrebenennetzwerke wird Wiktionary
als ein skalenfreies Lexikon beschrieben. Systeme dieser Art zeichnen
sich dadurch aus, dass ihre inhaltliche Dynamik durch die zugrundeliegende
Kollaborationsdynamik bestimmt wird, und zwar so, dass sich die
soziale Struktur der entsprechenden inhaltlichen Struktur aufprägt.
Dieser Auffassung gemäß führt die Ungleichverteilung der Aktivitäten
von Lexikonproduzenten zu einer analogen Ungleichverteilung der
im Lexikon dokumentierten Informationseinheiten. Der Beitrag thematisiert
Grundlagen zur Beschreibung solcher Systeme ausgehend von einem
Parameterraum, welcher die netzwerkanalytische Betrachtung von
Wiktionary als Big-Data-Problem darstellt.},
year = {2017}
}
2017.
How Many Stemmata with Root Degree k?. Proceedings of the 15th Meeting on the Mathematics of Language, 11–21.
BibTeX
@inproceedings{Hoenen:Eger:Gehrke:2017,
author = {Hoenen, Armin and Eger, Steffen and Gehrke, Ralf},
title = {{How Many Stemmata with Root Degree k?}},
booktitle = {Proceedings of the 15th Meeting on the Mathematics of Language},
pages = {11--21},
publisher = {Association for Computational Linguistics},
location = {London, UK},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/How_Many_Stemmata_with_Root_Degree_k.pdf},
url = {http://aclweb.org/anthology/W17-3402},
year = {2017}
}
2017.
Using Word Embeddings for Computing Distances Between Texts and
for Authorship Attribution. International Conference on Applications of Natural Language to
Information Systems, 274–277.
BibTeX
@inproceedings{Hoenen:2017:b,
author = {Hoenen, Armin},
title = {{Using Word Embeddings for Computing Distances Between Texts and
for Authorship Attribution}},
booktitle = {International Conference on Applications of Natural Language to
Information Systems},
pages = {274--277},
organization = {Springer},
url = {https://link.springer.com/chapter/10.1007/978-3-319-59569-6_33},
year = {2017}
}
2017.
TextImager as a Generic Interface to R. Software Demonstrations of the 15th Conference of the European
Chapter of the Association for Computational Linguistics (EACL
2017).
BibTeX
@inproceedings{Uslu:Hemati:Mehler:Baumartz:2017,
author = {Tolga Uslu and Wahed Hemati and Alexander Mehler and Daniel Baumartz},
title = {{TextImager} as a Generic Interface to {R}},
booktitle = {Software Demonstrations of the 15th Conference of the European
Chapter of the Association for Computational Linguistics (EACL
2017)},
location = {Valencia, Spain},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager.pdf},
year = {2017}
}
2017.
Beyond the tree – a theoretical model of contamination and a
software to generate multilingual stemmata. Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome.
BibTeX
@incollection{Hoenen:2017,
author = {Hoenen, Armin},
title = {{Beyond the tree – a theoretical model of contamination and a
software to generate multilingual stemmata}},
booktitle = {{Book of Abstracts of the annual conference of the AIUCD 2017, Sapienza, Rome}},
publisher = {AIUCD},
url = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf},
year = {2017}
}
2017.
Indexicals as Weak Descriptors. Proceedings of the 12th International Conference on Computational Semantics.
BibTeX
@inproceedings{Luecking:2017:c,
author = {L\"{u}cking, Andy},
title = {Indexicals as Weak Descriptors},
booktitle = {Proceedings of the 12th International Conference on Computational Semantics},
series = {IWCS 2017},
address = {Montpellier (France)},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/descriptive-indexicals_rev.pdf},
year = {2017}
}
2016
2016.
Language classification from bilingual word embedding graphs. Proceedings of COLING 2016.
BibTeX
@inproceedings{Eger:Hoenen:Mehler:2016,
author = {Steffen Eger and Armin Hoenen and Alexander Mehler},
title = {Language classification from bilingual word embedding graphs},
booktitle = {Proceedings of COLING 2016},
publisher = {ACL},
location = {Osaka},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/eger_hoenen_mehler_COLING2016.pdf},
year = {2016}
}
2016.
TextImager: a Distributed UIMA-based System for NLP. Proceedings of the COLING 2016 System Demonstrations.
BibTeX
@inproceedings{Hemati:Uslu:Mehler:2016,
author = {Wahed Hemati and Tolga Uslu and Alexander Mehler},
title = {TextImager: a Distributed UIMA-based System for NLP},
booktitle = {Proceedings of the COLING 2016 System Demonstrations},
organization = {Federated Conference on Computer Science and
Information Systems},
location = {Osaka, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TextImager2016.pdf},
year = {2016}
}
2016.
Modeling Co-Verbal Gesture Perception in Type Theory with Records. Proceedings of the 2016 Federated Conference on Computer Science
and Information Systems, 8:383–392.
Best Paper Award.
BibTeX
@inproceedings{Luecking:2016:b,
author = {L\"{u}cking, Andy},
title = {Modeling Co-Verbal Gesture Perception in Type Theory with Records},
booktitle = {Proceedings of the 2016 Federated Conference on Computer Science
and Information Systems},
editor = {M. Ganzha and L. Maciaszek and M. Paprzycki},
volume = {8},
series = {Annals of Computer Science and Information Systems},
pages = {383-392},
address = {Gdansk, Poland},
publisher = {IEEE},
note = {Best Paper Award},
doi = {10.15439/2016F83},
pdf = {http://annals-csis.org/Volume_8/pliks/83.pdf},
url = {http://annals-csis.org/Volume_8/drp/83.html},
year = {2016}
}
2016.
Text2voronoi: An Image-driven Approach to Differential Diagnosis. Proceedings of the 5th Workshop on Vision and Language (VL'16)
hosted by the 54th Annual Meeting of the Association for Computational
Linguistics (ACL), Berlin.
BibTeX
@inproceedings{Mehler:Uslu:Hemati:2016,
author = {Alexander Mehler and Tolga Uslu and Wahed Hemati},
title = {Text2voronoi: An Image-driven Approach to Differential Diagnosis},
booktitle = {Proceedings of the 5th Workshop on Vision and Language (VL'16)
hosted by the 54th Annual Meeting of the Association for Computational
Linguistics (ACL), Berlin},
pdf = {https://aclweb.org/anthology/W/W16/W16-3212.pdf},
year = {2016}
}
2016.
On the linearity of semantic change: Investigating meaning variation
via dynamic graph models. Proceedings of ACL 2016.
BibTeX
@inproceedings{Eger:Mehler:2016,
author = {Steffen Eger and Alexander Mehler},
title = {On the linearity of semantic change: {I}nvestigating meaning variation
via dynamic graph models},
booktitle = {Proceedings of ACL 2016},
location = {Berlin},
pdf = {https://www.aclweb.org/anthology/P/P16/P16-2009.pdf},
year = {2016}
}
2016.
A Comparison of Four Character-Level String-to-String Translation
Models for (OCR) Spelling Error Correction. The Prague Bulletin of Mathematical Linguistics, 105:77–99.
BibTeX
@article{Eger:vorDerBrueck:Mehler:2016,
author = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
title = {A Comparison of Four Character-Level String-to-String Translation
Models for (OCR) Spelling Error Correction},
journal = {The Prague Bulletin of Mathematical Linguistics},
volume = {105},
pages = {77-99},
doi = {10.1515/pralin-2016-0004},
pdf = {https://ufal.mff.cuni.cz/pbml/105/art-eger-vor-der-brueck.pdf},
year = {2016}
}
2016.
Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity
in Stemmas. Digital Humanities 2016: Conference Abstracts. Jagiellonian University
& Pedagogical University, 557–560.
BibTeX
@inproceedings{Hoenen:2016DH,
author = {Hoenen, Armin},
title = {{Silva Portentosissima – Computer-Assisted Reflections on Bifurcativity
in Stemmas}},
booktitle = {Digital Humanities 2016: Conference Abstracts. Jagiellonian University
\& Pedagogical University},
series = {DH 2016},
pages = {557-560},
abstract = {In 1928, the philologue Joseph Bédier explored contemporary stemmas
and found them to contain a suspiciously large amount of bifurcations.
In this paper, the argument is investigated that, with a large
amount of lost manuscripts, the amount of bifurcations in the
true stemmas would naturally be high because the probability for
siblings to survive becomes very low is assessed via a computer
simulation.},
location = {Kraków},
url = {http://dh2016.adho.org/abstracts/311},
year = {2016}
}
2016.
Wikidition: Towards A Multi-layer Network Model of Intertextuality. Proceedings of DH 2016, 12-16 July.
BibTeX
@inproceedings{Mehler:Wagner:Gleim:2016,
author = {Mehler, Alexander and Wagner, Benno and Gleim, R\"{u}diger},
title = {Wikidition: Towards A Multi-layer Network Model of Intertextuality},
booktitle = {Proceedings of DH 2016, 12-16 July},
series = {DH 2016},
abstract = {The paper presents Wikidition, a novel text mining tool for generating
online editions of text corpora. It explores lexical, sentential
and textual relations to span multi-layer networks (linkification)
that allow for browsing syntagmatic and paradigmatic relations
among the constituents of its input texts. In this way, relations
of text reuse can be explored together with lexical relations
within the same literary memory information system. Beyond that,
Wikidition contains a module for automatic lexiconisation to extract
author specific vocabularies. Based on linkification and lexiconisation,
Wikidition does not only allow for traversing input corpora on
different (lexical, sentential and textual) levels. Rather, its
readers can also study the vocabulary of authors on several levels
of resolution including superlemmas, lemmas, syntactic words and
wordforms. We exemplify Wikidition by a range of literary texts
and evaluate it by means of the apparatus of quantitative network
analysis.},
location = {Kraków},
url = {http://dh2016.adho.org/abstracts/250},
year = {2016}
}
2016.
TLT-CRF: A Lexicon-supported Morphological Tagger for Latin
Based on Conditional Random Fields. Proceedings of the 10th International Conference on Language Resources
and Evaluation.
BibTeX
@inproceedings{vorderBrueck:Mehler:2016,
author = {vor der Br\"{u}ck, Tim and Mehler, Alexander},
title = {{TLT-CRF}: A Lexicon-supported Morphological Tagger for {Latin}
Based on Conditional Random Fields},
booktitle = {Proceedings of the 10th International Conference on Language Resources
and Evaluation},
series = {LREC 2016},
location = {{Portoro\v{z} (Slovenia)}},
pdf = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016_tagger.pdf},
year = {2016}
}
2016.
Lemmatization and Morphological Tagging in German and Latin:
A comparison and a survey of the state-of-the-art. Proceedings of the 10th International Conference on Language Resources
and Evaluation.
BibTeX
@inproceedings{Eger:Mehler:Gleim:2016,
author = {Eger, Steffen and Gleim, R\"{u}diger and Mehler, Alexander},
title = {Lemmatization and Morphological Tagging in {German} and {Latin}:
A comparison and a survey of the state-of-the-art},
booktitle = {Proceedings of the 10th International Conference on Language Resources
and Evaluation},
series = {LREC 2016},
location = {Portoro\v{z} (Slovenia)},
pdf = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec_eger_gleim_mehler.pdf},
year = {2016}
}
2016.
Finding Recurrent Features of Image Schema Gestures: the FIGURE corpus. Proceedings of the 10th International Conference on Language Resources
and Evaluation.
BibTeX
@inproceedings{Luecking:Mehler:Walther:Mauri:Kurfuerst:2016,
author = {L\"{u}cking, Andy and Mehler, Alexander and Walther, D\'{e}sir\'{e}e
and Mauri, Marcel and Kurf\"{u}rst, Dennis},
title = {Finding Recurrent Features of Image Schema Gestures: the {FIGURE} corpus},
booktitle = {Proceedings of the 10th International Conference on Language Resources
and Evaluation},
series = {LREC 2016},
location = {Portoro\v{z} (Slovenia)},
pdf = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-gesture-study-final-version-short.pdf},
year = {2016}
}
2016.
TGermaCorp – A (Digital) Humanities Resource for (Computational) Linguistics. Proceedings of the 10th International Conference on Language Resources
and Evaluation.
BibTeX
@inproceedings{Luecking:Hoenen:Mehler:2016,
author = {L\"{u}cking, Andy and Hoenen, Armin and Mehler, Alexander},
title = {{TGermaCorp} -- A (Digital) Humanities Resource for (Computational) Linguistics},
booktitle = {Proceedings of the 10th International Conference on Language Resources
and Evaluation},
series = {LREC 2016},
islrn = {536-382-801-278-5},
location = {Portoro\v{z} (Slovenia)},
pdf = {http://www.texttechnologylab.org/wp-content/uploads/2016/04/lrec2016-ttgermacorp-final.pdf},
year = {2016}
}
2016.
Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische
Erschließung und digitale Visualisierung intertextueller Beziehungen
digitaler Korpora. DHd 2016.
BibTeX
@inproceedings{Wagner:Mehler:Biber:2016,
author = {Wagner, Benno and Mehler, Alexander and Biber, Hanno},
title = {{Transbiblionome Daten in der Literaturwissenschaft. Texttechnologische
Erschließung und digitale Visualisierung intertextueller Beziehungen
digitaler Korpora}},
booktitle = {DHd 2016},
url = {http://www.dhd2016.de/abstracts/sektionen-005.html#index.xml-body.1_div.4},
year = {2016}
}
2016.
Wikidition: Automatic Lexiconization and Linkification of Text Corpora. Information Technology, 58:70–79.
BibTeX
@article{Mehler:et:al:2016,
author = {Alexander Mehler and Rüdiger Gleim and Tim vor der Brück and Wahed Hemati
and Tolga Uslu and Steffen Eger},
title = {Wikidition: Automatic Lexiconization and Linkification of Text Corpora},
journal = {Information Technology},
volume = {58},
pages = {70-79},
abstract = {We introduce a new text technology, called Wikidition, which automatically
generates large scale editions of corpora of natural language
texts. Wikidition combines a wide range of text mining tools for
automatically linking lexical, sentential and textual units. This
includes the extraction of corpus-specific lexica down to the
level of syntactic words and their grammatical categories. To
this end, we introduce a novel measure of text reuse and exemplify
Wikidition by means of the capitularies, that is, a corpus of
Medieval Latin texts.},
doi = {10.1515/itit-2015-0035},
year = {2016}
}
2016.
Wikipedia Titles As Noun Tag Predictors. Proceedings of the 10th International Conference on Language Resources
and Evaluation.
BibTeX
@inproceedings{Hoenen:2016x,
author = {Hoenen, Armin},
title = {{Wikipedia Titles As Noun Tag Predictors}},
booktitle = {Proceedings of the 10th International Conference on Language Resources
and Evaluation},
series = {LREC 2016},
location = {Portoro\v{z} (Slovenia)},
pdf = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/18_Paper.pdf},
year = {2016}
}
2016.
Das erste dynamische Stemma, Pionier des digitalen Zeitalters?. Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum.
BibTeX
@inproceedings{Hoenen:2016y,
author = {Hoenen, Armin},
title = {Das erste dynamische Stemma, Pionier des digitalen Zeitalters?},
booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum},
url = {http://www.dhd2016.de/abstracts/posters-060.html},
year = {2016}
}
2016.
Corpora and Resources for (Historical) Low Resource Languages. 31(2).
JLCL.
BibTeX
@collection{GSCL:JLCL:2016:2,
bibsource = {GSCL, http://www.gscl.info/},
editor = {Armin Hoenen and Alexander Mehler and Jost Gippert},
image = {https://www.texttechnologylab.org/wp-content/uploads/2017/10/Titelblatt-Heft2-2016.png},
issn = {2190-6858},
number = {2},
pdf = {http://www.jlcl.org/2016_Heft2/Heft2-2016.pdf},
publisher = {JLCL},
title = {{Corpora and Resources for (Historical) Low Resource Languages}},
volume = {31},
year = {2016}
}
2016.
Editorial. JLCL, 31(2):iii–iv.
2016.
Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch
for Aiding Reconstruction. JLCL, 31(2):25–38.
BibTeX
@article{Hoenen:Samushia:2016,
author = {Armin Hoenen and Lela Samushia},
title = {{Gepi: An Epigraphic Corpus for Old Georgian and a Tool Sketch
for Aiding Reconstruction}},
journal = {JLCL},
volume = {31},
number = {2},
pages = {25--38},
year = {2016}
}
2015
2015.
A New LMF Schema Application by Example of an Austrian Lexicon
Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal. Historical Corpora.
BibTeX
@inproceedings{Hoenen:Mader:2015,
author = {Hoenen, Armin and Mader, Franziska},
title = {A New LMF Schema Application by Example of an Austrian Lexicon
Applied to the Historical Corpus of the Writer Hugo von Hofmannsthal},
booktitle = {Historical Corpora},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/HoenenMader2013-a-new-lmf-schema-application.pdf},
website = {http://www.narr-shop.de/historical-corpora.html},
year = {2015}
}
2015.
Text Mining: From Ontology Learning to Automated Text Processing
Applications. Festschrift in Honor of Gerhard Heyer. Theory and Applications of Natural Language Processing.
Springer.
BibTeX
@book{Biemann:Mehler:2015,
editor = {Biemann, Chris and Mehler, Alexander},
title = {{Text Mining: From Ontology Learning to Automated Text Processing
Applications. Festschrift in Honor of Gerhard Heyer}},
publisher = {Springer},
series = {Theory and Applications of Natural Language Processing},
address = {Heidelberg},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/TextMiningsmall.jpg},
year = {2015}
}
2015.
PhD Thesis: Multilingual text classification using information-theoretic features.
BibTeX
@phdthesis{Islam:2015,
author = {Mohammad Zahurul Islam},
title = {Multilingual text classification using information-theoretic features},
pages = {189},
year = {2015},
pdf = {http://publikationen.ub.uni-frankfurt.de/files/38157/thesis.pdf},
abstract = {The number of multilingual texts in the World Wide Web (WWW) is
increasing dramatically and a multilingual economic zone like
the European Union (EU) requires the availability of multilingual
Natural Language Processing (NLP) tools. Due to a rapid development
of NLP tools, many lexical, syntactic, semantic and other linguistic
features have been used in different NLP applications. However,
there are some situations where these features can not be used
due the application type or unavailability of NLP resources for
some of the languages. That is why an application that is intended
to handle multilingual texts must have features that are not dependent
on a particular language and specific linguistic tools. In this
thesis, we will focus on two such applications: text readability
and source and translation classification. In this thesis, we
provide 18 features that are not only suitable for both applications,
but are also language and linguistic tools independent. In order
to build a readability classifier, we use texts from three different
languages: English, German and Bangla. Our proposed features achieve
a classification accuracy that is comparable with a classifier
using 40 linguistic features. The readability classifier achieves
a classification F-score of 74.21\% on the English Wikipedia corpus,
an F-score of 75.47\% on the English textbook corpus, an F-score
of 86.46\% on the Bangla textbook corpus and an F-score of 86.26\%
on the German GEO/GEOLino corpus. We used more than two million
sentence pairs from 21 European languages in order to build the
source and translation classifier. The classifier using the same
eighteen features achieves a classification accuracy of 86.63\%.
We also used the same features to build a classifier that classifies
translated texts based on their origin. The classifier achieves
classification accuracy of 75\% for texts from 10 European languages.
In this thesis, we also provide four different corpora, three
for text readability analysis and one for corpus based translation
studies.}
}
2015.
A Parallel Corpus of the Old Georgian Gospel Manuscripts and
their Stemmatology. The Georgian Journal for Language Logic Computation, IV:176–185.
BibTeX
@article{Dundua:Hoenen:Samushia:2015,
author = {Dundua, Natia and Hoenen, Armin and Samushia, Lela},
title = {{A Parallel Corpus of the Old Georgian Gospel Manuscripts and
their Stemmatology}},
journal = {The Georgian Journal for Language Logic Computation},
volume = {IV},
pages = {176-185},
publisher = {CLLS, Tbilisi State University and Kurt G{\"o}del
Society},
year = {2015}
}
2015.
Complex Decomposition of the Negative Distance Kernel. IEEE International Conference on Machine Learning and Applications.
BibTeX
@inproceedings{vor:der:Bruck:Eger:Mehler:2015,
author = {vor der Br{\"u}ck, Tim and Eger, Steffen and Mehler, Alexander},
title = {Complex Decomposition of the Negative Distance Kernel},
booktitle = {IEEE International Conference on Machine Learning and Applications},
location = {Miami, Florida, USA},
year = {2015}
}
2015.
Do we need bigram alignment models? On the effect of alignment
quality on transduction accuracy in G2P. Proceedings of EMNLP.
2015.
Deriving a primal form for the quadratic power kernel. Proceedings of the 38th German Conference on Artificial Intelligence (KI).
BibTeX
@inproceedings{vorDerBrueck:Eger:2015,
author = {vor der Brück, Tim and Eger, Steffen},
title = {Deriving a primal form for the quadratic power kernel},
booktitle = {Proceedings of the 38th German Conference on Artificial Intelligence ({KI})},
year = {2015}
}
2015.
Improving G2P from Wiktionary and other (web) resources. Proceedings of Interspeech.
2015.
Lexicon-assisted tagging and lemmatization in Latin: A comparison
of six taggers and two lemmatization methods. Proceedings of the 9th Workshop on Language Technology for Cultural
Heritage, Social Sciences, and Humanities (LaTeCH 2015).
BibTeX
@inproceedings{Eger:vor:der:Brueck:Mehler:2015,
author = {Eger, Steffen and vor der Brück, Tim and Mehler, Alexander},
title = {Lexicon-assisted tagging and lemmatization in {Latin}: A comparison
of six taggers and two lemmatization methods},
booktitle = {Proceedings of the 9th Workshop on Language Technology for Cultural
Heritage, Social Sciences, and Humanities ({LaTeCH 2015})},
address = {Beijing, China},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Lexicon-assisted_tagging.pdf},
year = {2015}
}
2015.
Towards a Theoretical Framework for Analyzing Complex Linguistic Networks. Understanding Complex Systems.
Springer.
BibTeX
@book{Mehler:Luecking:Banisch:Blanchard:Frank-Job:2015,
editor = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
and Frank-Job, Barbara},
title = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
publisher = {Springer},
series = {Understanding Complex Systems},
adress = {Berlin and New York},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/UCS_17-2-tmp.png},
isbn = {978-36-662-47237-8},
year = {2015}
}
2015.
Linguistic Networks – An Online Platform for Deriving Collocation
Networks from Natural Language Texts. Towards a Theoretical Framework for Analyzing Complex Linguistic Networks.
BibTeX
@incollection{Mehler:Gleim:2015:a,
author = {Mehler, Alexander and Gleim, Rüdiger},
title = {Linguistic Networks -- An Online Platform for Deriving Collocation
Networks from Natural Language Texts},
booktitle = {Towards a Theoretical Framework for Analyzing Complex Linguistic Networks},
publisher = {Springer},
editor = {Mehler, Alexander and Lücking, Andy and Banisch, Sven and Blanchard, Philippe
and Frank-Job, Barbara},
series = {Understanding Complex Systems},
year = {2015}
}
2015.
Multiple Many-To-Many Sequence Alignment For Combining String-Valued
Variables: A G2P Experiment. ACL.
BibTeX
@inproceedings{Eger:2015_ACL,
author = {Eger, Steffen},
title = {Multiple Many-To-Many Sequence Alignment For Combining String-Valued
Variables: A G2P Experiment},
booktitle = {ACL},
publisher = {Association for Computational Linguistics},
year = {2015}
}
2015.
Designing and comparing G2P-type lemmatizers for a morphology-rich language. .
BibTeX
@inproceedings{Eger:2015_SFCM,
author = {Eger, Steffen},
title = {Designing and comparing G2P-type lemmatizers for a morphology-rich language},
publisher = {Fourth International Workshop on Systems and
Frameworks for Computational Morphology},
year = {2015}
}
June, 2015.
Towards Semantic Language Classification: Inducing and Clustering
Semantic Association Networks from Europarl. Proceedings of the Fourth Joint Conference on Lexical and Computational
Semantics, 127–136.
BibTeX
@inproceedings{Eger:Schenk:Mehler:2015,
author = {Eger, Steffen and Schenk, Niko and Mehler, Alexander},
title = {Towards Semantic Language Classification: Inducing and Clustering
Semantic Association Networks from Europarl},
booktitle = {Proceedings of the Fourth Joint Conference on Lexical and Computational
Semantics},
pages = {127--136},
publisher = {Association for Computational Linguistics},
month = {June},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/starsem2015-corrected-version.pdf},
url = {http://www.aclweb.org/anthology/S15-1014},
year = {2015}
}
2015.
Identities for Partial Bell Polynomials Derived from Identities
for Weighted Integer Compositions.. Aequationes Mathematicae.
2015.
Some Elementary Congruences for the Number of Weighted Integer Compositions.. Journal of Integer Sequences (electronic only), 18(4).
BibTeX
@article{Eger:2015a,
author = {Eger, Steffen},
title = {Some Elementary Congruences for the Number of Weighted Integer Compositions.},
journal = {Journal of Integer Sequences (electronic only)},
volume = {18},
number = {4},
pdf = {https://cs.uwaterloo.ca/journals/JIS/VOL18/Eger/eger11.pdf},
publisher = {School of Computer Science, University of Waterloo,
Waterloo, ON},
year = {2015}
}
2015.
Pointing and Reference Reconsidered. Journal of Pragmatics, 77:56–79.
BibTeX
@article{Luecking:Pfeiffer:Rieser:2015,
author = {Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes},
title = {Pointing and Reference Reconsidered},
journal = {Journal of Pragmatics},
volume = {77},
pages = {56-79},
abstract = {Current semantic theory on indexical expressions claims that demonstratively
used indexicals such as this lack a referent-determining meaning
but instead rely on an accompanying demonstration act like a pointing
gesture. While this view allows to set up a sound logic of demonstratives,
the direct-referential role assigned to pointing gestures has
never been scrutinized thoroughly in semantics or pragmatics.
We investigate the semantics and pragmatics of co-verbal pointing
from a foundational perspective combining experiments, statistical
investigation, computer simulation and theoretical modeling techniques
in a novel manner. We evaluate various referential hypotheses
with a corpus of object identification games set up in experiments
in which body movement tracking techniques have been extensively
used to generate precise pointing measurements. Statistical investigation
and computer simulations show that especially distal areas in
the pointing domain falsify the semantic direct-referential hypotheses
concerning pointing gestures. As an alternative, we propose that
reference involving pointing rests on a default inference which
we specify using the empirical data. These results raise numerous
problems for classical semantics–pragmatics interfaces: we argue
for pre-semantic pragmatics in order to account for inferential
reference in addition to classical post-semantic Gricean pragmatics.},
doi = {10.1016/j.pragma.2014.12.013},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Luecking_Pfeiffer_Rieser_Pointing_and_Reference_Reconsiderd.pdf},
website = {http://www.sciencedirect.com/science/article/pii/S037821661500003X},
year = {2015}
}
2015.
Towards a Network Model of the Coreness of Texts: An Experiment
in Classifying Latin Texts using the TTLab Latin Tagger. Text Mining: From Ontology Learning to Automated text Processing Applications, 87–112.
BibTeX
@incollection{Mehler:Brueck:Gleim:Geelhaar:2015,
author = {Mehler, Alexander and vor der Brück, Tim and Gleim, Rüdiger and Geelhaar, Tim},
title = {Towards a Network Model of the Coreness of Texts: An Experiment
in Classifying Latin Texts using the TTLab Latin Tagger},
booktitle = {Text Mining: From Ontology Learning to Automated text Processing Applications},
publisher = {Springer},
editor = {Chris Biemann and Alexander Mehler},
series = {Theory and Applications of Natural Language Processing},
pages = {87-112},
address = {Berlin/New York},
abstract = {The analysis of longitudinal corpora of historical texts requires
the integrated development of tools for automatically preprocessing
these texts and for building representation models of their genre-
and register-related dynamics. In this chapter we present such
a joint endeavor that ranges from resource formation via preprocessing
to network-based text representation and classification. We start
with presenting the so-called TTLab Latin Tagger (TLT) that preprocesses
texts of classical and medieval Latin. Its lexical resource in
the form of the Frankfurt Latin Lexicon (FLL) is also briefly
introduced. As a first test case for showing the expressiveness
of these resources, we perform a tripartite classification task
of authorship attribution, genre detection and a combination thereof.
To this end, we introduce a novel text representation model that
explores the core structure (the so-called coreness) of lexical
network representations of texts. Our experiment shows the expressiveness
of this representation format and mediately of our Latin preprocessor.},
website = {http://link.springer.com/chapter/10.1007/978-3-319-12655-5_5},
year = {2015}
}
2015.
Das artifizielle Manuskriptkorpus TASCFE. Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum.
BibTeX
@inproceedings{Hoenen:2015,
author = {Hoenen, Armin},
title = {Das artifizielle Manuskriptkorpus TASCFE},
booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Hoenen_tascfeDH2015.pdf},
year = {2015}
}
2015.
TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung
von Texten und deren Evaluation. Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum.
BibTeX
@inproceedings{Gleim:Mehler:2015,
author = {Gleim, Rüdiger and Mehler, Alexander},
title = {TTLab Preprocessor – Eine generische Web-Anwendung für die Vorverarbeitung
von Texten und deren Evaluation},
booktitle = {Accepted in the Proceedings of the Jahrestagung der Digital Humanities
im deutschsprachigen Raum},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Gleim_Mehler_PrePro_DHGraz2015.pdf},
year = {2015}
}
2015.
Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator. Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum.
BibTeX
@inproceedings{Abrami:Mehler:Zeunert:2015:a,
author = {Abrami, Giuseppe and Mehler, Alexander and Zeunert, Susanne},
title = {Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator},
booktitle = {Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Abrami_Mehler_Zeunert_DHd_2015_abstract.pdf},
year = {2015}
}
2015.
Fusing Text and Image Data with the Help of the OWLnotator. Human Interface and the Management of Information. Information
and Knowledge Design, 9172:261–272.
BibTeX
@incollection{Abrami:Mehler:Pravida:2015:b,
author = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar},
title = {Fusing Text and Image Data with the Help of the OWLnotator},
booktitle = {Human Interface and the Management of Information. Information
and Knowledge Design},
publisher = {Springer International Publishing},
editor = {Yamamoto, Sakae},
volume = {9172},
series = {Lecture Notes in Computer Science},
pages = {261-272},
doi = {10.1007/978-3-319-20612-7_25},
isbn = {978-3-319-20611-0},
language = {English},
website = {http://dx.doi.org/10.1007/978-3-319-20612-7_25},
year = {2015}
}
2015.
Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora. Proceedings of the 2015 Conference of the North American Chapter
of the Association for Computational Linguistics: Human Language
Technologies (NAACL HLT).
Citation: Trovato is published in 2014 not in 2009..
BibTeX
@inproceedings{Hoenen:2015a,
author = {Hoenen, Armin},
title = {Lachmannian Archetype Reconstruction for Ancient Manuscript Corpora},
booktitle = {Proceedings of the 2015 Conference of the North American Chapter
of the Association for Computational Linguistics: Human Language
Technologies (NAACL HLT)},
note = {Citation: Trovato is published in 2014 not in 2009.},
abstract = {Two goals are targeted by computer philology for ancient manuscript
corpora: firstly, making an edition, that is roughly speaking
one text version representing the whole corpus, which contains
variety induced through copy errors and other processes and secondly,
producing a stemma. A stemma is a graph-based visualization of
the copy history with manuscripts as nodes and copy events as
edges. Its root, the so-called archetype is the supposed original
text or urtext from which all subsequent copies are made. Our
main contribution is to present one of the first computational
approaches to automatic archetype reconstruction and to introduce
the first text-based evaluation for automatically produced archetypes.
We compare a philologically generated archetype with one generated
by bio-informatic software.},
website = {http://www.aclweb.org/anthology/N15-1127},
year = {2015}
}
2015.
Simulating Misreading. Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS
OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB).
BibTeX
@inproceedings{Hoenen:2015b,
author = {Hoenen, Armin},
title = {Simulating Misreading},
booktitle = {Proceedings of the 20TH INTERNATIONAL CONFERENCE ON APPLICATIONS
OF NATURAL LANGUAGE TO INFORMATION SYSTEMS (NLDB)},
abstract = {Physical misreading (as opposed to interpretational misreading)
is an unnoticed substitution in silent reading. Especially for
legally important documents or instruction manuals, this can lead
to serious consequences. We present a prototype of an automatic
highlighter targeting words which can most easily be misread in
a given text using a dynamic orthographic neighbour concept. We
propose measures of fit of a misread token based on Natural Language
Processing and detect a list of short most easily misread tokens
in the English language. We design a highlighting scheme for avoidance
of misreading.},
website = {http://link.springer.com/chapter/10.1007/978-3-319-19581-0_34},
year = {2015}
}
2015.
Managing and Annotating Historical Multimodal Corpora with the
eHumanities Desktop - An outline of the current state of the LOEWE
project Illustrations of Goethe s Faust. Historical Corpora, 353 – 363.
BibTeX
@inproceedings{Abrami:Freiberg:Warner:2015,
author = {Abrami, Giuseppe and Freiberg, Michael and Warner, Paul},
title = {Managing and Annotating Historical Multimodal Corpora with the
eHumanities Desktop - An outline of the current state of the LOEWE
project Illustrations of Goethe s Faust},
booktitle = {Historical Corpora},
pages = {353 - 363},
abstract = {Text corpora are structured sets of text segments that can be
annotated or interrelated. Expanding on this, we can define a
database of images as an iconographic multimodal corpus with annotated
images and the relations between images as well as between images
and texts. The Goethe-Museum in Frankfurt holds a significant
collection of art work and texts relating to Goethe’s Faust from
the early 19th century until the present. In this project we create
a database containing digitized items from this collection, and
extend a tool, the ImageDB in the eHumanities Desktop, to annotate
and provide relations between resources. This article gives an
overview of the project and provides some technical details. Furthermore
we show newly implemented features, explain the challenge of creating
an ontology on multimodal corpora and give a forecast for future
work.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/AbramiFreibergWarner_HC_2012.pdf},
website = {http://www.narr-shop.de/historical-corpora.html},
year = {2015}
}
2014
2014.
Stemmatology, an interdisciplinary endeavour. Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities.
BibTeX
@incollection{Hoenen:2014plz,
author = {Hoenen, Armin},
title = {{Stemmatology, an interdisciplinary endeavour}},
booktitle = {{Book of Abstracts zum DHd Workshop Informatik und die Digital Humanities}},
publisher = {DHd},
url = {http://dhd-wp.hab.de/files/book_of_abstracts.pdf},
year = {2014}
}
2014.
Language as a whole – A new framework for linguistic knowledge
integration: Comment on "Approaching human language with complex
networks" by Cong and Liu. Physics of Life Reviews, 11(4):628–629.
BibTeX
@article{Chen:2014:a,
author = {Chen, Xinying},
title = {Language as a whole -- A new framework for linguistic knowledge
integration: Comment on "Approaching human language with complex
networks" by {Cong} and {Liu}},
journal = {Physics of Life Reviews},
volume = {11},
number = {4},
pages = {628-629},
doi = {10.1016/j.plrev.2014.07.011},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Language-as-a-whole-Chen.pdf},
url = {http://www.sciencedirect.com/science/article/pii/S1571064514001249},
year = {2014}
}
2014.
Review: Evolutionary Linguistics in the Past Two Decades – EVOLANG10:
the 10th International Conference on Language Evolution. Journal of Chinese Linguistics, 42(2):499–530.
BibTeX
@article{Gong:Lam:Chen:Zhang:2014,
author = {Gong, Tao and Lam, Yau Wai and Chen, Xinying and Zhang, Menghan},
title = {Review: Evolutionary Linguistics in the Past Two Decades -- EVOLANG10:
the 10th International Conference on Language Evolution},
journal = {Journal of Chinese Linguistics},
volume = {42},
number = {2},
pages = {499-530},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/JCL-EvolangReview.pdf},
year = {2014}
}
December, 2014.
Rubrik: Neues aus dem Netz. Kunstchronik, 12:623.
BibTeX
@article{Abrami:Mehler:Pravida:Zeunert:2014,
author = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar and Zeunert, Susanne},
title = {Rubrik: Neues aus dem Netz},
journal = {Kunstchronik},
volume = {12},
pages = {623},
address = {München},
month = {12},
publisher = {Zentralinstitut für Kunstgeschichte},
website = {http://www.zikg.eu/publikationen/laufende-publikationen/kunstchronik},
year = {2014}
}
2014.
A proof of the Mann-Shanks primality criterion conjecture for
extended binomial coefficients. Integers: The Electronic Journal of Combinatorial
Number Theory, 14.
BibTeX
@article{Eger:2014:a,
author = {Eger, Steffen},
title = {A proof of the Mann-Shanks primality criterion conjecture for
extended binomial coefficients},
journal = {Integers: The Electronic Journal of Combinatorial
Number Theory},
volume = {14},
abstract = {We show that the Mann-Shanks primality criterion holds for weighted
extended binomial coefficients (which count the number of weighted
integer compositions), not only for the ordinary binomial coefficients.},
pdf = {http://www.emis.de/journals/INTEGERS/papers/o60/o60.pdf},
website = {http://www.emis.de/journals/INTEGERS/vol14.html},
year = {2014}
}
2014.
Stirling's approximation for central extended binomial coefficients.. The American Mathematical Monthly, 121(4):344–349.
BibTeX
@article{Eger:2014:b,
author = {Eger, Steffen},
title = {Stirling's approximation for central extended binomial coefficients.},
journal = {The American Mathematical Monthly},
volume = {121},
number = {4},
pages = {344-349},
abstract = {We derive asymptotic formulas for central extended binomial coefficients,
which are generalizations of binomial coefficients, using the
distribution of the sum of independent discrete uniform random
variables with the Central Limit Theorem and a local limit variant.},
website = {http://www.jstor.org/stable/10.4169/amer.math.monthly.121.04.344},
year = {2014}
}
2014.
On the Expressiveness, Validity and Reproducibility of Models
of Language Evolution. Comment on 'Modelling language evolution:
Examples and predictions' by Tao Gong, Shuai Lan, and Menghan
Zhang. Physics of Life Review.
BibTeX
@article{Mehler:2014,
author = {Mehler, Alexander},
title = {On the Expressiveness, Validity and Reproducibility of Models
of Language Evolution. Comment on 'Modelling language evolution:
Examples and predictions' by Tao Gong, Shuai Lan, and Menghan
Zhang},
journal = {Physics of Life Review},
abstract = {},
pdf = {http://www.sciencedirect.com/science/article/pii/S1571064514000529/pdfft?md5=6a2cbbfc083d7bc3adfd26d431cc55d8&pid=1-s2.0-S1571064514000529-main.pdf},
website = {https://www.researchgate.net/publication/261290946_On_the_expressiveness_validity_and_reproducibility_of_models_of_language_evolution_Comment_on_Modelling_language_evolution_Examples_and_predictions_by_Tao_Gong_Shuai_Lan_and_Menghan_Zhang},
year = {2014}
}
2014.
Computational Humanities - bridging the gap between Computer Science
and Digital Humanities (Dagstuhl Seminar 14301). Dagstuhl Reports, 4(7):80–111.
BibTeX
@article{Biemann:Crane:Fellbaum:Mehler:2014,
author = {Chris Biemann and Gregory R. Crane and Christiane D. Fellbaum
and Alexander Mehler},
title = {Computational Humanities - bridging the gap between Computer Science
and Digital Humanities (Dagstuhl Seminar 14301)},
journal = {Dagstuhl Reports},
volume = {4},
number = {7},
pages = {80-111},
abstract = {Research in the field of Digital Humanities, also known as Humanities
Computing, has seen a steady increase over the past years. Situated
at the intersection of computing science and the humanities, present
efforts focus on making resources such as texts, images, musical
pieces and other semiotic artifacts digitally available, searchable
and analysable. To this end, computational tools enabling textual
search, visual analytics, data mining, statistics and natural
language processing are harnessed to support the humanities researcher.
The processing of large data sets with appropriate software opens
up novel and fruitful approaches to questions in the traditional
humanities. This report summarizes the Dagstuhl seminar 14301
on “Computational Humanities – bridging the gap between Computer
Science and Digital Humanities”},
issn = {2192-5283},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dagrep_v004_i007_p080_s14301.pdf},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum für Informatik},
year = {2014}
}
2014.
Readability Classification of Bangla Texts. 15th International Conference on Intelligent Text Processing and
Computational Linguistics (cicLing), Kathmandu, Nepal.
BibTeX
@inproceedings{Islam:Rahman:Mehler:2014,
author = {Islam, Md. Zahurul and Rahman, Md. Rashedur and Mehler, Alexander},
title = {Readability Classification of Bangla Texts},
booktitle = {15th International Conference on Intelligent Text Processing and
Computational Linguistics (cicLing), Kathmandu, Nepal},
abstract = {Readability classification is an important application of Natural
Language Processing. It aims at judging the quality of documents
and to assist writers to identify possible problems. This paper
presents a readability classifier for Bangla textbooks using information-theoretic
and lexical features. All together 18 features are explored to
achieve an F-score of 86.46},
year = {2014}
}
2014.
Comparing Hand Gesture Vocabularies for HCI. Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece.
BibTeX
@incollection{Mehler:vor:der:Brueck:Luecking:2014,
author = {Mehler, Alexander and vor der Brück, Tim and Lücking, Andy},
title = {Comparing Hand Gesture Vocabularies for HCI},
booktitle = {Proceedings of HCI International 2014, 22 - 27 June 2014, Heraklion, Greece},
publisher = {Springer},
address = {Berlin/New York},
abstract = {HCI systems are often equipped with gestural interfaces drawing
on a predefined set of admitted gestures. We provide an assessment
of the fitness of such gesture vocabularies in terms of their
learnability and naturalness. This is done by example of rivaling
gesture vocabularies of the museum information system WikiNect.
In this way, we do not only provide a procedure for evaluating
gesture vocabularies, but additionally contribute to design criteria
to be followed by the gestures.},
keywords = {wikinect},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Comparing-Gesture-Vocabularies-1_1.pdf},
website = {{http://link.springer.com/chapter/10.1007/978-3-319-07230-2_8#page-1}},
year = {2014}
}
2014.
WikiNect: Image Schemata as a Basis of Gestural Writing for
Kinetic Museum Wikis. Universal Access in the Information Society, 1–17.
BibTeX
@article{Mehler:Luecking:Abrami:2014,
author = {Mehler, Alexander and Lücking, Andy and Abrami, Giuseppe},
title = {{WikiNect}: Image Schemata as a Basis of Gestural Writing for
Kinetic Museum Wikis},
journal = {Universal Access in the Information Society},
pages = {1-17},
abstract = {This paper provides a theoretical assessment of gestures in the
context of authoring image-related hypertexts by example of the
museum information system WikiNect. To this end, a first implementation
of gestural writing based on image schemata is provided (Lakoff
in Women, fire, and dangerous things: what categories reveal about
the mind. University of Chicago Press, Chicago, 1987). Gestural
writing is defined as a sort of coding in which propositions are
only expressed by means of gestures. In this respect, it is shown
that image schemata allow for bridging between natural language
predicates and gestural manifestations. Further, it is demonstrated
that gestural writing primarily focuses on the perceptual level
of image descriptions (Hollink et al. in Int J Hum Comput Stud
61(5):601–626, 2004). By exploring the metaphorical potential
of image schemata, it is finally illustrated how to extend the
expressiveness of gestural writing in order to reach the conceptual
level of image descriptions. In this context, the paper paves
the way for implementing museum information systems like WikiNect
as systems of kinetic hypertext authoring based on full-fledged
gestural writing.},
doi = {10.1007/s10209-014-0386-8},
issn = {1615-5289},
keywords = {wikinect},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/art_10.1007_s10209-014-0386-8.pdf},
website = {http://dx.doi.org/10.1007/s10209-014-0386-8},
year = {2014}
}
2014.
ColLex.EN: Automatically Generating and Evaluating a Full-form
Lexicon for English. Proceedings of LREC 2014.
BibTeX
@inproceedings{vor:der:Brueck:Mehler:Islam:2014,
author = {vor der Brück, Tim and Mehler, Alexander and Islam, Md. Zahurul},
title = {ColLex.EN: Automatically Generating and Evaluating a Full-form
Lexicon for English},
booktitle = {Proceedings of LREC 2014},
address = {Reykjavik, Iceland},
abstract = {Currently, a large number of different lexica is available for
English. However, substantial and freely available fullform lexica
with a high number of named entities are rather rare even in the
case of this lingua franca. Existing lexica are often limited
in several respects as explained in Section 2. What is missing
so far is a freely available substantial machine-readable lexical
resource of English that contains a high number of word forms
and a large collection of named entities. In this paper, we describe
a procedure to generate such a resource by example of English.
This lexicon, henceforth called ColLex.EN (for Collecting Lexica
for English ), will be made freely available to the public 1.
In this paper, we describe how ColLex.EN was collected from existing
lexical resources and specify the statistical procedures that
we developed to extend and adjust it. No manual modifications
were done on the generated word forms and lemmas. Our fully automatic
procedure has the advantage that whenever new versions of the
source lexica are available, a new version of ColLex.EN can be
automatically generated with low effort.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/vdbrueck_mehler_islam_collex_lrec.pdf},
website = {
http://aclanthology.info/papers/collex-en-automatically-generating-and-evaluating-a-full-form-lexicon-for-english},
year = {2014}
}
2014.
Simulation of Scribal Letter Substitution. Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches.
BibTeX
@inproceedings{Hoenen:2014,
author = {Hoenen, Armin},
title = {Simulation of Scribal Letter Substitution},
booktitle = {Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches},
editor = {T.L Andrews and C.Macé},
owner = {hoenen},
website = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503552682-1},
year = {2014}
}
2013
2013.
Semantic typologies by means of network analysis of bilingual dictionaries. Approaches to Measuring Linguistic Differences, 447–474.
BibTeX
@incollection{Sejane:Eger:2013,
author = {Sejane, Ineta and Eger, Steffen},
title = {Semantic typologies by means of network analysis of bilingual dictionaries},
booktitle = {Approaches to Measuring Linguistic Differences},
publisher = {De Gruyter},
editor = {Borin, Lars and Saxena, Anju},
pages = {447-474},
bibtexkey = {eger-sejane_network-typologies2013},
doi = {10.1515/9783110305258.447},
inlg = {English [eng]},
src = {degruyter},
srctrickle = {degruyter#/books/9783110305258/9783110305258.447/9783110305258.447.xml},
url = {http://www.degruyter.com/view/books/9783110305258/9783110305258.447/9783110305258.447.xml},
year = {2013}
}
2013.
Sequence Segmentation by Enumeration: An Exploration.. Prague Bull. Math. Linguistics, 100:113–131.
BibTeX
@article{Eger:2013:a,
author = {Eger, Steffen},
title = {Sequence Segmentation by Enumeration: An Exploration.},
journal = {Prague Bull. Math. Linguistics},
volume = {100},
pages = {113-131},
abstract = {We investigate exhaustive enumeration and subsequent language
model evaluation (E\&E approach) as an alternative to solving
the sequence segmentation problem. We show that, under certain
conditions (on string lengths and regarding a possibility to accurately
estimate the number of segments), which are satisfied for important
NLP applications, such as phonological segmentation, syllabification,
and morphological segmentation, the E\&E approach is feasible
and promises superior results than the standard sequence labeling
approach to sequence segmentation.},
pdf = {http://ufal.mff.cuni.cz/pbml/100/art-eger.pdf},
year = {2013}
}
2013.
A Contribution to the Theory of Word Length Distribution Based
on a Stochastic Word Length Distribution Model.. Journal of Quantitative Linguistics, 20(3):252–265.
BibTeX
@article{Eger:2013:b,
author = {Eger, Steffen},
title = {A Contribution to the Theory of Word Length Distribution Based
on a Stochastic Word Length Distribution Model.},
journal = {Journal of Quantitative Linguistics},
volume = {20},
number = {3},
pages = {252-265},
abstract = {We derive a stochastic word length distribution model based on
the concept of compound distributions and show its relationships
with and implications for Wimmer et al. ’s (1994) synergetic word
length distribution model.},
year = {2013}
}
2013.
Sequence alignment with arbitrary steps and further generalizations,
with applications to alignments in linguistics.. Information Sciences, 237:287–304.
BibTeX
@article{Eger:2013:c,
author = {Eger, Steffen},
title = {Sequence alignment with arbitrary steps and further generalizations,
with applications to alignments in linguistics.},
journal = {Information Sciences},
volume = {237},
pages = {287-304},
abstract = {We provide simple generalizations of the classical Needleman–Wunsch
algorithm for aligning two sequences. First, we let both sequences
be defined over arbitrary, potentially different alphabets. Secondly,
we consider similarity functions between elements of both sequences
with ranges in a semiring. Thirdly, instead of considering only
‘match’, ‘mismatch’ and ‘skip’ operations, we allow arbitrary
non-negative alignment ‘steps’ S. Next, we present novel combinatorial
formulas for the number of monotone alignments between two sequences
for selected steps S. Finally, we illustrate sample applications
in natural language processing that require larger steps than
available in the original Needleman–Wunsch sequence alignment
procedure such that our generalizations can be fruitfully adopted.},
website = {http://www.sciencedirect.com/science/article/pii/S0020025513001485},
year = {2013}
}
2013.
Restricted weighted integer compositions and extended binomial coefficients.. Journal of Integer Sequences (electronic only), 16(1).
BibTeX
@article{Eger:2013:d,
author = {Eger, Steffen},
title = {Restricted weighted integer compositions and extended binomial coefficients.},
journal = {Journal of Integer Sequences (electronic only)},
volume = {16},
number = {1},
abstract = {We prove a simple relationship between extended binomial coefficients
— natural extensions of the well-known binomial coefficients —
and weighted restricted integer compositions. Moreover, wegiveaveryuseful
interpretation ofextendedbinomial coefficients as representing
distributions of sums of independent discrete random variables.
We apply our results, e.g., to determine the distribution of the
sum of k logarithmically distributed random variables, and to
determining the distribution, specifying all moments, of the random
variable whose values are part-products of random restricted integer
compositions. Based on our findings and using the central limit
theorem, we also give generalized Stirling formulae for central
extended binomial coefficients. We enlarge the list of known properties
of extended binomial coefficients.},
issn = {1530-7638},
pdf = {https://cs.uwaterloo.ca/journals/JIS/VOL16/Eger/eger6.pdf},
publisher = {School of Computer Science, University of Waterloo,
Waterloo, ON},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.397.3745},
year = {2013}
}
2013.
Webkorpora in Computerlinguistik und Sprachforschung.
Journal for Language Technology and Computational
Linguistics (JLCL), 28(2).
JLCL.
BibTeX
@book{Schneider:Storrer:Mehler:2013,
author = {Mehler, Alexander and Schneider, Roman and Storrer, Angelika},
editor = {Roman Schneider and Angelika Storrer and Alexander Mehler},
title = {Webkorpora in Computerlinguistik und Sprachforschung},
publisher = {JLCL},
volume = {28},
number = {2},
series = {Journal for Language Technology and Computational
Linguistics (JLCL)},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Webkorpora-300-20.png},
issn = {2190-6858},
pagetotal = {107},
pdf = {http://www.jlcl.org/2013_Heft2/H2013-2.pdf},
year = {2013}
}
November, 2013.
WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors.
BibTeX
@misc{Mehler:Luecking:vor:der:Brueck:2013:a,
author = {Mehler, Alexander and Lücking, Andy and vor der Brück, Tim and Abrami, Giuseppe},
title = {WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors},
howpublished = {Poster Presentation at the Scientific Computing and
Cultural Heritage 2013 Conference, Heidelberg},
keywords = {wikinect},
month = {11},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SCCHPoster2013.pdf},
url = {http://scch2013.wordpress.com/},
year = {2013}
}
May, 2013.
Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz
von Gestik in der Aphasietherapie.
BibTeX
@misc{Luecking:2013:c,
author = {Lücking, Andy},
title = {Theoretische Bausteine für einen semiotischen Ansatz zum Einsatz
von Gestik in der Aphasietherapie},
howpublished = {Talk at the BKL workshop 2013, Bochum},
month = {05},
url = {http://www.bkl-ev.de/bkl_workshop/archiv/workshop13_programm.php},
year = {2013}
}
October, 2013.
Eclectic Semantics for Non-Verbal Signs.
BibTeX
@misc{Luecking:2013:d,
author = {Lücking, Andy},
title = {Eclectic Semantics for Non-Verbal Signs},
howpublished = {Talk at the Conference on Investigating semantics:
Empirical and philosophical approaches, Bochum},
month = {10},
url = {http://www.ruhr-uni-bochum.de/phil-lang/investigating/index.html},
year = {2013}
}
December, 2013.
Multimodal Propositions? From Semiotic to Semantic Considerations
in the Case of Gestural Deictics. Poster Abstracts of the Proceedings of the 17th Workshop on the
Semantics and Pragmatics of Dialogue, 221–223.
BibTeX
@inproceedings{Luecking:2013:e,
author = {Lücking, Andy},
title = {Multimodal Propositions? From Semiotic to Semantic Considerations
in the Case of Gestural Deictics},
booktitle = {Poster Abstracts of the Proceedings of the 17th Workshop on the
Semantics and Pragmatics of Dialogue},
editor = {Fernandez, Raquel and Isard, Amy},
series = {SemDial 2013},
pages = {221-223},
address = {Amsterdam},
month = {12},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dialdam2013.pdf},
year = {2013}
}
2013.
Source and Translation Classifiction using Most Frequent Words. Proceedings of the 6th International Joint Conference on Natural
Language Processing (IJCNLP).
BibTeX
@inproceedings{Islam:Hoenen:2013,
author = {Islam, Md. Zahurul and Hoenen, Armin},
title = {Source and Translation Classifiction using Most Frequent Words},
booktitle = {Proceedings of the 6th International Joint Conference on Natural
Language Processing (IJCNLP)},
abstract = {Recently, translation scholars have made some general claims about
translation properties. Some of these are source language independent
while others are not. Koppel and Ordan (2011) performed empirical
studies to validate both types of properties using English source
texts and other texts translated into English. Obviously, corpora
of this sort, which focus on a single language, are not adequate
for claiming universality of translation prop- erties. In this
paper, we are validating both types of translation properties
using original and translated texts from six European languages.},
pdf = {http://www.aclweb.org/anthology/I/I13/I13-1185.pdf},
website = {http://aclanthology.info/papers/source-and-translation-classification-using-most-frequent-words},
year = {2013}
}
2013.
On Three Notions of Grounding of Artificial Dialog Companions. Science, Technology & Innovation Studies, 10(1):31–36.
BibTeX
@article{Luecking:Mehler:2013:a,
author = {Lücking, Andy and Mehler, Alexander},
title = {On Three Notions of Grounding of Artificial Dialog Companions},
journal = {Science, Technology \& Innovation Studies},
volume = {10},
number = {1},
pages = {31-36},
abstract = {We provide a new, theoretically motivated evaluation grid for
assessing the conversational achievements of Artificial Dialog
Companions (ADCs). The grid is spanned along three grounding problems.
Firstly, it is argued that symbol grounding in general has to
be instrinsic. Current approaches in this context, however, are
limited to a certain kind of expression that can be grounded in
this way. Secondly, we identify three requirements for conversational
grounding, the process leading to mutual understanding. Finally,
we sketch a test case for symbol grounding in the form of the
philosophical grounding problem that involves the use of modal
language. Together, the three grounding problems provide a grid
that allows us to assess ADCs’ dialogical performances and to
pinpoint future developments on these grounds.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/STI-final-badge.pdf},
website = {http://www.sti-studies.de/ojs/index.php/sti/article/view/143},
year = {2013}
}
2013.
Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden
und empirische Untersuchungen an Beispielen des WWW.
Springer VS.
BibTeX
@book{FrankJob:Mehler:Sutter:2013,
editor = {Barbara Frank-Job and Alexander Mehler and Tilmann Sutter},
title = {Die Dynamik sozialer und sprachlicher Netzwerke: Konzepte, Methoden
und empirische Untersuchungen an Beispielen des WWW},
publisher = {Springer VS},
address = {Wiesbaden},
abstract = {In diesem Band pr{\"a}sentieren Medien- und Informationswissenschaftler,
Netzwerkforscher aus Informatik, Texttechnologie und Physik, Soziologen
und Linguisten interdisziplin{\"a}r Aspekte der Erforschung komplexer
Mehrebenen-Netzwerke. Im Zentrum ihres Interesses stehen Untersuchungen
zum Zusammenhang zwischen sozialen und sprachlichen Netzwerken
und ihrer Dynamiken, aufgezeigt an empirischen Beispielen aus
dem Bereich des Web 2.0, aber auch an historischen Dokumentenkorpora
sowie an Rezeptions-Netzwerken aus Kunst- und Literaturwissenschaft.},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/DieDynamikSozialerUndSprachlicherNetzwerke.jpg},
pagetotal = {240},
year = {2013}
}
2013.
Interfacing Speech and Co-Verbal Gesture: Exemplification. Proceedings of the 35th Annual Conference of the German Linguistic Society, 284–286.
BibTeX
@inproceedings{Luecking:2013:b,
author = {Lücking, Andy},
title = {Interfacing Speech and Co-Verbal Gesture: Exemplification},
booktitle = {Proceedings of the 35th Annual Conference of the German Linguistic Society},
series = {DGfS 2013},
pages = {284-286},
address = {Potsdam, Germany},
year = {2013}
}
2013.
Ikonische Gesten. Grundzüge einer linguistischen Theorie.
De Gruyter.
Zugl. Diss. Univ. Bielefeld (2011).
BibTeX
@book{Luecking:2013,
author = {Lücking, Andy},
title = {Ikonische Gesten. Grundzüge einer linguistischen Theorie},
publisher = {De Gruyter},
address = {Berlin and Boston},
note = {Zugl. Diss. Univ. Bielefeld (2011)},
abstract = {Nicht-verbale Zeichen, insbesondere sprachbegleitende Gesten,
spielen eine herausragende Rolle in der menschlichen Kommunikation.
Um eine Analyse von Gestik innerhalb derjenigen Disziplinen, die
sich mit der Erforschung und Modellierung von Dialogen besch{\"a}ftigen,
zu ermöglichen, bedarf es einer entsprechenden linguistischen
Rahmentheorie. „Ikonische Gesten“ bietet einen ersten zeichen-
und wahrnehmungstheoretisch motivierten Rahmen an, in dem eine
grammatische Analyse der Integration von Sprache und Gestik möglich
ist. Ausgehend von einem Abriss semiotischer Zug{\"a}nge zu ikonischen
Zeichen wird der vorherrschende {\"A}hnlichkeitsansatz unter Rückgriff
auf Wahrnehmungstheorien zugunsten eines Exemplifikationsansatzes
verworfen. Exemplifikation wird im Rahmen einer unifikationsbasierten
Grammatik umgesetzt. Dort werden u.a. multimodale Wohlgeformtheit,
Synchronie und multimodale Subkategorisierung als neue Gegenst{\"a}nde
linguistischer Forschung eingeführt und im Rahmen einer integrativen
Analyse von Sprache und Gestik modelliert.},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/ikonischeGesten.jpg},
year = {2013}
}
2013.
Automatic Readability Classification of Crowd-Sourced Data based
on Linguistic and Information-Theoretic Features. 14th International Conference on Intelligent Text Processing and
Computational Linguistics.
BibTeX
@inproceedings{Islam:Mehler:2013:a,
author = {Islam, Md. Zahurul and Mehler, Alexander},
title = {Automatic Readability Classification of Crowd-Sourced Data based
on Linguistic and Information-Theoretic Features},
booktitle = {14th International Conference on Intelligent Text Processing and
Computational Linguistics},
abstract = {This paper presents a classifier of text readability based on
information-theoretic features. The classifier was developed based
on a linguistic approach to readability that explores lexical,
syntactic and semantic features. For this evaluation we extracted
a corpus of 645 articles from Wikipedia together with their quality
judgments. We show that information-theoretic features perform
as well as their linguistic counterparts even if we explore several
linguistic levels at once.},
owner = {zahurul},
pdf = {http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/download/1516/1497},
timestamp = {2013.01.22},
website = {http://www.redalyc.org/articulo.oa?id=61527437002},
year = {2013}
}
2013.
English to Bangla Name Transliteration System (Abstract). The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013).
BibTeX
@inproceedings{Islam:Rahman:2013,
author = {Islam, Md. Zahurul and Rahman, Rashedur},
title = {English to Bangla Name Transliteration System (Abstract)},
booktitle = {The 23rd Meeting of Computational Linguistics in the Netherlands (CLIN 2013)},
abstract = {Machine translation systems always struggle transliterating names
and unknown words during the translation process. It becomes more
problematic when the source and the target language use different
scripts for writing. To handle this problem, transliteration systems
are becoming popular as additional modules of the MT systems.
In this abstract, we are presenting an English to Bangla name
transliteration system that outperforms Google’s transliteration
system. The transliteration system is the same as the phrase based
statistical machine translation system, but it works on character
level rather than on phrase level. The performance of a statistical
system is directly correlated with the size of the training corpus.
In this work, 2200 names are extracted from the Wikipedia cross
lingual links and from Geonames . Also 3694 names are manually
transliterated and added to the data. 4716 names are used for
training, 590 for tuning and 588 names are used for testing. If
we consider only the candidate transliterations, the system gives
64.28\% accuracy. The performance increases to more than 90\%,
if we consider only the top 5 transliterations. To compare with
the Google’s English to Bangla transliteration system, a list
of 100 names are randomly selected from the test data and translated
by both systems. Our system gives 63\% accuracy where the Google’s
transliteration system does not transliterate a single name correctly.
We have found significant improvement in terms of BLUE and TER
score when we add the transliteration module with an English to
Bangla machine transliteration system.},
owner = {zahurul},
timestamp = {2013.01.22},
website = {https://www.academia.edu/3955036/English_to_Bangla_Name_Transliteration_System},
year = {2013}
}
2013.
Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation
am Beispiel des GuttenPlag Wiki: eine Vorstudie. Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden
und empirische Untersuchungen am Beispiel des WWW.
BibTeX
@incollection{Mehler:Stegbauer:Gleim:2013,
author = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
title = {Zur Struktur und Dynamik der kollaborativen Plagiatsdokumentation
am Beispiel des GuttenPlag Wiki: eine Vorstudie},
booktitle = {Die Dynamik sozialer und sprachlicher Netzwerke. Konzepte, Methoden
und empirische Untersuchungen am Beispiel des WWW},
publisher = {VS Verlag},
editor = {Frank-Job, Barbara and Mehler, Alexander and Sutter, Tilman},
address = {Wiesbaden},
year = {2013}
}
2013.
Data-based Analysis of Speech and Gesture: The Bielefeld Speech
and Gesture Alignment Corpus (SaGA) and its Applications. Journal of Multimodal User Interfaces, 7(1-2):5–18.
BibTeX
@article{Luecking:Bergmann:Hahn:Kopp:Rieser:2012,
author = {Lücking, Andy and Bergman, Kirsten and Hahn, Florian and Kopp, Stefan
and Rieser, Hannes},
title = {Data-based Analysis of Speech and Gesture: The Bielefeld Speech
and Gesture Alignment Corpus (SaGA) and its Applications},
journal = {Journal of Multimodal User Interfaces},
volume = {7},
number = {1-2},
pages = {5-18},
abstract = {Communicating face-to-face, interlocutors frequently produce multimodal
meaning packages consisting of speech and accompanying gestures.
We discuss a systematically annotated speech and gesture corpus
consisting of 25 route-and-landmark-description dialogues, the
Bielefeld Speech and Gesture Alignment corpus (SaGA), collected
in experimental face-to-face settings. We first describe the primary
and secondary data of the corpus and its reliability assessment.
Then we go into some of the projects carried out using SaGA demonstrating
the wide range of its usability: on the empirical side, there
is work on gesture typology, individual and contextual parameters
influencing gesture production and gestures’ functions for dialogue
structure. Speech-gesture interfaces have been established extending
unification-based grammars. In addition, the development of a
computational model of speech-gesture alignment and its implementation
constitutes a research line we focus on.},
doi = {10.1007/s12193-012-0106-8},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/MMUI-SaGA-revision2.pdf},
website = {http://www.springerlink.com/content/a547448u86h3116x/?MUD=MP},
year = {2013}
}
2013.
Using Complex Network Analysis in the Cognitive Sciences. Proceedings of the 35th Annual Meeting of the Cognitive Science
Society, CogSci 2013, Berlin, Germany, July 31 - August 3, 2013.
BibTeX
@inproceedings{Beckage:et:al:2013,
author = {Nicole Beckage and Michael S. Vitevitch and Alexander Mehler and Eliana Colunga},
title = {Using Complex Network Analysis in the Cognitive Sciences},
booktitle = {Proceedings of the 35th Annual Meeting of the Cognitive Science
Society, CogSci 2013, Berlin, Germany, July 31 - August 3, 2013},
editor = {Markus Knauff and Michael Pauen and Natalie Sebanz and Ipke Wachsmuth},
publisher = {cognitivesciencesociety.org},
year = {2013}
}
2012
2012.
Handbook of Technical Communication.
De Gruyter Mouton.
BibTeX
@book{Mehler:Romary:2012,
author = {Mehler, Alexander and Romary, Laurent},
title = {Handbook of Technical Communication},
publisher = {De Gruyter Mouton},
address = {Berlin},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/HandbookTechnicalCommunication.jpg},
pagetotal = {839},
year = {2012}
}
2012.
PhD Thesis: Network theory applied to linguistics: new advances in language
classification and typology.
BibTeX
@phdthesis{Abramov:2012,
author = {Abramov, Olga},
title = {Network theory applied to linguistics: new advances in language
classification and typology},
school = {Bielefeld University, Germany},
abstract = {This thesis bridges between two scientific fields -- linguistics
and computer science -- in terms of Linguistic Networks. From
the linguistic point of view we examine whether languages can
be distinguished when looking at network topology of different
linguistic networks. We deal with up to 17 languages and ask how
far the methods of network theory reveal the peculiarities of
single languages. We present and apply network models from different
levels of linguistic representation: syntactic, phonological and
morphological. The network models presented here allow to integrate
various linguistic features at once, which enables a more abstract,
holistic view at the particular language. From the point of view
of computer science we elaborate the instrumentarium of network
theory applying it to a new field. We study the expressiveness
of different network features and their ability to characterize
language structure. We evaluate the interplay of these features
and their goodness in the task of classifying languages genealogically.
Among others we compare network features related to: average degree,
average geodesic distance, clustering, entropy-based indices,
assortativity, centrality, compactness etc. We also propose some
new indices that can serve as additional characteristics of networks.
The results obtained show that network models succeed in classifying
related languages, and allow to study language structure in general.
The mathematical analysis of the particular network indices brings
new insights into the nature of these indices and their potential
when applied to different networks.},
pdf = {https://pub.uni-bielefeld.de/download/2538828/2542368},
website = {http://pub.uni-bielefeld.de/publication/2538828},
year = {2012}
}
2012.
Measuring Repetitiveness in Texts, a Preliminary Investigation. Sprache und Datenverarbeitung. International Journal
for Language Data Processing, 36(2):93–104.
BibTeX
@article{Hoenen:2012:a,
author = {Hoenen, Armin},
title = {Measuring Repetitiveness in Texts, a Preliminary Investigation},
journal = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
volume = {36},
number = {2},
pages = {93-104},
abstract = {In this paper, a model is presented for the automatic measurement
that can systematically describe the usage and function of the
phenomenon of repetition in written text. The motivating hypothesis
for this study is that the more repetitive a text is, the easier
it is to memorize. Therefore, an automated measurement index can
provide feedback to writers and for those who design texts that
are often memorized including songs, holy texts, theatrical plays,
and advertising slogans. The potential benefits of this kind of
systematic feedback are numerous, the main one being that content
creators would be able to employ a standard threshold of memorizability.
This study explores multiple ways of implementing and calculating
repetitiveness across levels of analysis (such as paragraph-level
or sub-word level) genres (such as songs, holy texts, and other
genres) and languages, integrating these into the a model for
the automatic measurement of repetitiveness. The Avestan language
and some of its idiosyncratic features are explored in order to
illuminate how the proposed index is applied in the ranking of
texts according to their repetitiveness.},
website = {http://www.linse.uni-due.de/jahrgang-36-2012/articles/measuring-repetitiveness-in-texts-a-preliminary-investigation.html},
year = {2012}
}
2012.
The Combinatorics of String Alignments: Reconsidering the Problem.. Journal of Quantitative Linguistics, 19(1):32–53.
BibTeX
@article{Eger:2012:a,
author = {Eger, Steffen},
title = {The Combinatorics of String Alignments: Reconsidering the Problem.},
journal = {Journal of Quantitative Linguistics},
volume = {19},
number = {1},
pages = {32-53},
abstract = {In recent work, Covington discusses the number of alignments of
two strings. Thereby, Covington defines an alignment as “a way
of pairing up elements of two strings, optionally skipping some
but preserving the order”. This definition has drawbacks as it
excludes many relevant situations. In this work, we specify the
notion of an alignment so that many linguistically interesting
situations are covered. To this end, we define an alignment in
an abstract manner as a set of pairs and then define three properties
on such sets. Secondly, we specify the numbers of possibilities
of aligning two strings in each case.},
website = {
http://www.tandfonline.com/doi/full/10.1080/09296174.2011.638792#tabModule},
year = {2012}
}
2012.
S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications. Proceedings of COLING 2012, 781–798.
BibTeX
@inproceedings{Eger:2012:b,
author = {Eger, Steffen},
title = {S-Restricted Monotone Alignments: Algorithm, Search Space, and Applications},
booktitle = {Proceedings of COLING 2012},
pages = {781-798},
address = {Mumbai, India},
publisher = {The COLING 2012 Organizing Committee},
abstract = {We present a simple and straightforward alignment algorithm for
monotone many-to-many alignments in grapheme-to-phoneme conversion
and related fields such as morphology, and discuss a few noteworthy
extensions. Moreover, we specify combinatorial formulas for monotone
many-to-many alignments and decoding in G2P which indicate that
exhaustive enumeration is generally possible, so that some limitations
of our approach can easily be overcome. Finally, we present a
decoding scheme, within the monotone many-to-many alignment paradigm,
that relates the decoding problem to restricted integer compositions
and that is, putatively, superior to alternatives suggested in
the literatur},
pdf = {http://aclweb.org/anthology/C/C12/C12-1048.pdf},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.370.5941},
year = {2012}
}
2012.
Lexical semantic typologies from bilingual corpora - A framework. SEM 2012: The First Joint Conference on Lexical and Computational
Semantics – Volume 1: Proceedings of the main conference and
the shared task, and Volume 2: Proceedings of the Sixth International
Workshop on Semantic Evaluation (SemEval 2012), 90–94.
BibTeX
@inproceedings{Eger:2012:c,
author = {Eger, Steffen},
title = {Lexical semantic typologies from bilingual corpora - A framework},
booktitle = {SEM 2012: The First Joint Conference on Lexical and Computational
Semantics -- Volume 1: Proceedings of the main conference and
the shared task, and Volume 2: Proceedings of the Sixth International
Workshop on Semantic Evaluation (SemEval 2012)},
pages = {90-94},
address = {Montreal, Canada},
publisher = {Association for Computational Linguistics},
abstract = {We present a framework, based on Sejane and Eger (2012), for inducing
lexical semantic typologies for groups of languages. Our framework
rests on lexical semantic association networks derived from encoding,
via bilingual corpora, each language in a common reference language,
the tertium comparationis, so that distances between languages
can easily be determined.},
pdf = {http://www.aclweb.org/anthology/S12-1015},
website = {http://dl.acm.org/citation.cfm?id=2387653},
year = {2012}
}
July, 2012.
Latent Barriers in Wiki-based Collaborative Writing. Proceedings of the Wikipedia Academy: Research and Free Knowledge.
June 29 - July 1 2012.
BibTeX
@inproceedings{Mehler:Stegbauer:Gleim:2012:b,
author = {Mehler, Alexander and Stegbauer, Christian and Gleim, Rüdiger},
title = {Latent Barriers in Wiki-based Collaborative Writing},
booktitle = {Proceedings of the Wikipedia Academy: Research and Free Knowledge.
June 29 - July 1 2012},
address = {Berlin},
month = {July},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/12_Paper_Alexander_Mehler_Christian_Stegbauer_Ruediger_Gleim.pdf},
year = {2012}
}
2012.
Altüberlieferte Sprachen als Gegenstand der Texttechnologie –
Ancient Languages as the Object of Text Technology.
27(2).
JLCL.
BibTeX
@book{Hoenen:Jügel:2012,
author = {Hoenen, Armin and Jügel, Thomas},
editor = {Armin Hoenen and Thomas Jügel},
title = {Altüberlieferte Sprachen als Gegenstand der Texttechnologie --
Ancient Languages as the Object of Text Technology},
publisher = {JLCL},
volume = {27},
number = {2},
abstract = {‘Avestan’ is the name of the ritual language of Zor oastrianism,
which was the state religion of the Iranian empire in Achaemenid,
Arsacid and Sasanid times, covering a time span of more than 1200
years. [1] It is named after the ‘Avesta’, i.e., the collection
of holy scriptures that form the basis of the religion which was
allegedly founded by Zarathushtra, also known as Zoroaster, by
about the beginning of the first millennium B.C. Together with
Vedic Sanskrit, Avestan represents one of the most archaic witnesses
of the Indo-Iranian branch of the Indo-European languages, which
makes it especially interesting for historical-comparative linguistics.
This is why the texts of the Avesta were among the first objects
of electronic corpus building that were undertaken in the framework
of Indo-European studies, leading to the establishment of the
TITUS database (‘Thesaurus indogermanischer Text- u nd Sprachmaterialien’).
[2] Today, the complete Avestan corpus is available, together
with elaborate search functions [3] and an extended version of
the subcorpus of the so-called ‘Yasna’, which covers a great deal
of the attestation of variant readings. [4] Right from the beginning
of their computational work concerning the Avesta, the compilers
[5] had to cope with the fact that the texts contained in it have
been transmitted in a special script written from right to left,
which was also used for printing them in the scholarly editions
used until today. [6] It goes without saying that there was no
way in the middle of the 1980s to encode the Avestan scriptures
exactly as they are found in the manuscripts. Instead, we had
to rely upon transcriptional devices that were dictated by the
restrictions of character encoding as provided by the computer
systems used. As the problems we had to face in this respect and
the solutions we could apply are typical for the development of
computational work on ancient languages, it seems worthwhile to
sketch them out here.},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AltueberlieferteSprachen-300-20.png},
issn = {2190-6858},
pdf = {http://www.jlcl.org/2012_Heft2/H2012-2.pdf},
year = {2012}
}
2012.
Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen
semantischen Repräsentationen.
Springer.
BibTeX
@book{vor:der:Brueck:2012:a,
author = {vor der Brück, Tim},
title = {Wissensakquisition mithilfe maschineller Lernverfahren auf tiefen
semantischen Repr{\"a}sentationen},
publisher = {Springer},
address = {Heidelberg, Germany},
abstract = {Eine gro{\ss}e Wissensbasis ist eine Voraussetzung für eine Vielzahl
von Anwendungen im Bereich der automatischen Sprachverarbeitung,
wie Frage-Antwort- oder Information-Retrieval-Systeme. Ein Mensch
hat sich das erforderliche Wissen, um Informationen zu suchen
oder Fragen zu beantworten, im Laufe seines Lebens angeeignet.
Einem Computer muss dieses Wissen explizit mitgeteilt werden.
Tim vor der Brück beschreibt einen Ansatz, wie ein Computer dieses
Wissen {\"a}hnlich wie ein Mensch durch die Lektüre von Texten
erwerben kann. Dabei kommen Methoden der Logik und des maschinellen
Lernens zum Einsatz.},
school = {FernUniversit{\"a}t in Hagen},
year = {2012}
}
2012.
Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods. Proceedings of the 20th European Conference on Artificial Intelligence (ECAI), 822–827.
BibTeX
@inproceedings{vor:der:Brueck:Wang:2012,
author = {vor der Brück, Tim and Wang, Yu-Fang},
title = {Synonymy Extraction from Semantic Networks Using String and Graph Kernel Methods},
booktitle = {Proceedings of the 20th European Conference on Artificial Intelligence (ECAI)},
pages = {822--827},
address = {Montpellier, France},
abstract = {Synonyms are a highly relevant information source for natural
language processing. Automatic synonym extraction methods have
in common that they are either applied on the surface representation
of the text or on a syntactical structure derived from it. In
this paper, however, we present a semantic synonym extraction
approach that operates directly on semantic networks (SNs), which
were derived from text by a deep syntactico-semantic analysis.
Synonymy hypotheses are extracted from the SNs by graph matching.
These hypotheses are then validated by a support vector machine
(SVM) employing a combined graph and string kernel. Our method
was compared to several other approaches and the evaluation has
shown that our results are considerably superior},
pdf = {http://www.vdb1.de/papers/ECAI_535.pdf},
website = {http://ebooks.iospress.nl/publication/7076},
year = {2012}
}
2012.
Hyponym Extraction Employing a Weighted Graph Kernel. Statistical and Machine Learning Approaches for Network Analysis.
BibTeX
@incollection{vor:der:Brueck:2012:b,
author = {vor der Brück, Tim},
title = {Hyponym Extraction Employing a Weighted Graph Kernel},
booktitle = {Statistical and Machine Learning Approaches for Network Analysis},
publisher = {Wiley},
editor = {Matthias Dehmer and Subhash C. Basak},
address = {Hoboken, New Jersey},
year = {2012}
}
2012.
Text Readability Classification of Textbooks of a Low-Resource Language. Accepted in the 26th Pacific Asia Conference on Language, Information,
and Computation (PACLIC 26).
BibTeX
@inproceedings{Islam:Mehler:Rahman:2012,
author = {Islam, Md. Zahurul and Mehler, Alexander and Rahman, Rashedur},
title = {Text Readability Classification of Textbooks of a Low-Resource Language},
booktitle = {Accepted in the 26th Pacific Asia Conference on Language, Information,
and Computation (PACLIC 26)},
abstract = {There are many languages considered to be low-density languages,
either because the population speaking the language is not very
large, or because insufficient digitized text material is available
in the language even though millions of people speak the language.
Bangla is one of the latter ones. Readability classification is
an important Natural Language Processing (NLP) application that
can be used to judge the quality of documents and assist writers
to locate possible problems. This paper presents a readability
classifier of Bangla textbook documents based on information-theoretic
and lexical features. The features proposed in this paper result
in an F-score that is 50\% higher than that for traditional readability
formulas.},
owner = {zahurul},
pdf = {http://www.aclweb.org/anthology/Y12-1059},
timestamp = {2012.08.14},
website = {http://www.researchgate.net/publication/256648250_Text_Readability_Classification_of_Textbooks_of_a_Low-Resource_Language},
year = {2012}
}
2012.
Introduction: Framing Technical Communication. Handbook of Technical Communication, 8:1–26.
BibTeX
@incollection{Mehler:Romary:Gibbon:2012,
author = {Mehler, Alexander and Romary, Laurent and Gibbon, Dafydd},
title = {Introduction: Framing Technical Communication},
booktitle = {Handbook of Technical Communication},
publisher = {De Gruyter Mouton},
editor = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
volume = {8},
series = {Handbooks of Applied Linguistics},
pages = {1-26},
address = {Berlin and Boston},
year = {2012}
}
2012.
Pathways of Alignment between Gesture and Speech: Assessing Information
Transmission in Multimodal Ensembles. Proceedings of the International Workshop on Formal and Computational
Approaches to Multimodal Communication under the auspices of ESSLLI
2012, Opole, Poland, 6-10 August.
BibTeX
@inproceedings{Mehler:Luecking:2012:d,
author = {Mehler, Alexander and Lücking, Andy},
title = {Pathways of Alignment between Gesture and Speech: Assessing Information
Transmission in Multimodal Ensembles},
booktitle = {Proceedings of the International Workshop on Formal and Computational
Approaches to Multimodal Communication under the auspices of ESSLLI
2012, Opole, Poland, 6-10 August},
editor = {Gianluca Giorgolo and Katya Alahverdzhieva},
abstract = {We present an empirical account of multimodal ensembles based
on Hjelmslev’s notion of selection. This is done to get measurable
evidence for the existence of speech-and-gesture ensembles. Utilizing
information theory, we show that there is an information transmission
that makes a gestures’ representation technique predictable when
merely knowing its lexical affiliate – in line with the notion
of the primacy of language. Thus, there is evidence for a one-way
coupling – going from words to gestures – that leads to speech-and-gesture
alignment and underlies the constitution of multimodal ensembles.},
keywords = {wikinect},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2016/06/Mehler_Luecking_FoCoMC2012-2.pdf},
website = {http://www.researchgate.net/publication/268368670_Pathways_of_Alignment_between_Gesture_and_Speech_Assessing_Information_Transmission_in_Multimodal_Ensembles},
year = {2012}
}
2012.
Towards a Conceptual, Unification-based Speech-Gesture Interface. Proceedings of the International Workshop on Formal and Computational
Approaches to Multimodal Communication under the auspices of ESSLLI
2012, Opole, Poland, 6-10 August.
BibTeX
@inproceedings{Luecking:2012,
author = {Lücking, Andy},
title = {Towards a Conceptual, Unification-based Speech-Gesture Interface},
booktitle = {Proceedings of the International Workshop on Formal and Computational
Approaches to Multimodal Communication under the auspices of ESSLLI
2012, Opole, Poland, 6-10 August},
editor = {Gianluca Giorgolo and Katya Alahverdzhieva},
abstract = {A framework for grounding the semantics of co-verbal iconic gestures
is presented. A resemblance account to iconicity is discarded
in favor of an exemplification approach. It is sketched how exemplification
can be captured within a unification-based grammar that provides
a conceptual interface. Gestures modeled as vector sequences are
the exemplificational base. Some hypotheses that follow from the
general account are pointed at and remaining challenges are discussed.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/FoCoMoC2012-1.pdf},
year = {2012}
}
2012.
WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis. Proceedings of the International Workshop On User Experience in
e-Learning and Augmented Technologies in Education (UXeLATE 2012)
in Conjunction with ACM Multimedia 2012, 29 October- 2 November,
Nara, Japan, 7–12.
BibTeX
@inproceedings{Mehler:Luecking:2012:c,
author = {Mehler, Alexander and Lücking, Andy},
title = {WikiNect: Towards a Gestural Writing System for Kinetic Museum Wikis},
booktitle = {Proceedings of the International Workshop On User Experience in
e-Learning and Augmented Technologies in Education (UXeLATE 2012)
in Conjunction with ACM Multimedia 2012, 29 October- 2 November,
Nara, Japan},
pages = {7-12},
abstract = {We introduce WikiNect as a kinetic museum information system that
allows museum visitors to give on-site feedback about exhibitions.
To this end, WikiNect integrates three approaches to Human-Computer
Interaction (HCI): games with a purpose, wiki-based collaborative
writing and kinetic text-technologies. Our aim is to develop kinetic
technologies as a new paradigm of HCI. They dispense with classical
interfaces (e.g., keyboards) in that they build on non-contact
modes of communication like gestures or facial expressions as
input displays. In this paper, we introduce the notion of gestural
writing as a kinetic text-technology that underlies WikiNect to
enable museum visitors to communicate their feedback. The basic
idea is to explore sequences of gestures that share the semantic
expressivity of verbally manifested speech acts. Our task is to
identify such gestures that are learnable on-site in the usage
scenario of WikiNect. This is done by referring to so-called transient
gestures as part of multimodal ensembles, which are candidate
gestures of the desired functionality.},
keywords = {wikinect},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/UXeLATE2012-copyright.pdf},
website = {http://www.researchgate.net/publication/262319200_WikiNect_towards_a_gestural_writing_system_for_kinetic_museum_wikis},
year = {2012}
}
2012.
SOA implementation of the eHumanities Desktop. Proceedings of the Workshop on Service-oriented Architectures
(SOAs) for the Humanities: Solutions and Impacts, Digital Humanities
2012, Hamburg, Germany.
BibTeX
@inproceedings{Gleim:Mehler:Ernst:2012,
author = {Gleim, Rüdiger and Mehler, Alexander and Ernst, Alexandra},
title = {SOA implementation of the eHumanities Desktop},
booktitle = {Proceedings of the Workshop on Service-oriented Architectures
(SOAs) for the Humanities: Solutions and Impacts, Digital Humanities
2012, Hamburg, Germany},
abstract = {The eHumanities Desktop is a system which allows users to upload,
organize and share resources using a web interface. Furthermore
resources can be processed, annotated and analyzed in various
ways. Registered users can organize themselves in groups and collaboratively
work on their data. The eHumanities Desktop is platform independent
and runs in a web browser. This paper presents the system focusing
on its service orientation and process management.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/dhc2012.pdf},
year = {2012}
}
2012.
On the Self-similarity of Intertextual Structures in Wikipedia. Proceedings of the HotSocial '12: The First ACM International
Workshop on Hot Topics on Interdisciplinary Social Networks Research, 65–68.
BibTeX
@inproceedings{Mehler:Stegbauer:2012,
author = {Mehler, Alexander and Stegbauer, Christian},
title = {On the Self-similarity of Intertextual Structures in Wikipedia},
booktitle = {Proceedings of the HotSocial '12: The First ACM International
Workshop on Hot Topics on Interdisciplinary Social Networks Research},
editor = {Xiaoming Fu and Peter Gloor and Jie Tang},
pages = {65-68},
address = {Beijing, China},
pdf = {http://wan.poly.edu/KDD2012/forms/workshop/HotSocial12/doc/p64_mehler.pdf},
website = {http://dl.acm.org/citation.cfm?id=2392633&bnc=1},
year = {2012}
}
2012.
Inducing Linguistic Networks from Historical Corpora: Towards
a New Method in Historical Semantics. Proceedings of the Conference on New Methods in Historical Corpora, 3:257–274.
BibTeX
@incollection{Mehler:Schwandt:Gleim:Ernst:2012,
author = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Ernst, Alexandra},
title = {Inducing Linguistic Networks from Historical Corpora: Towards
a New Method in Historical Semantics},
booktitle = {Proceedings of the Conference on New Methods in Historical Corpora},
publisher = {Narr},
editor = {Paul Bennett and Martin Durrell and Silke Scheible and Richard J. Whitt},
volume = {3},
series = {Corpus linguistics and Interdisciplinary perspectives
on language (CLIP)},
pages = {257--274},
address = {Tübingen},
year = {2012}
}
2012.
Assessing Agreement on Segmentations by Means of Staccato, the
Segmentation Agreement Calculator according to Thomann. Gesture and Sign Language in Human-Computer Interaction and Embodied
Communication, 7206:129–138.
BibTeX
@incollection{Luecking:Ptock:Bergmann:2012,
author = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
title = {Assessing Agreement on Segmentations by Means of Staccato, the
Segmentation Agreement Calculator according to Thomann},
booktitle = {Gesture and Sign Language in Human-Computer Interaction and Embodied
Communication},
publisher = {Springer},
editor = {Eleni Efthimiou and Georgios Kouroupetroglou and Stavroula-Evita Fotina},
volume = {7206},
series = {Lecture Notes in Artificial Intelligence},
pages = {129-138},
address = {Berlin and Heidelberg},
abstract = {Staccato, the Segmentation Agreement Calculator According to Thomann
, is a software tool for assessing the degree of agreement of
multiple segmentations of some time-related data (e.g., gesture
phases or sign language constituents). The software implements
an assessment procedure developed by Bruno Thomann and will be
made publicly available. The article discusses the rationale of
the agreement assessment procedure and points at future extensions
of Staccato.},
booksubtitle = {9th International Gesture Workshop, GW 2011, Athens,
Greece, May 2011, Revised Selected Papers},
website = {http://link.springer.com/chapter/10.1007/978-3-642-34182-3_12},
year = {2012}
}
2012.
Assessing Cognitive Alignment in Different Types of Dialog by
means of a Network Model. Neural Networks, 32:159–164.
BibTeX
@article{Mehler:Luecking:Menke:2012,
author = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
title = {Assessing Cognitive Alignment in Different Types of Dialog by
means of a Network Model},
journal = {Neural Networks},
volume = {32},
pages = {159-164},
abstract = {We present a network model of dialog lexica, called TiTAN (Two-layer
Time-Aligned Network) series. TiTAN series capture the formation
and structure of dialog lexica in terms of serialized graph representations.
The dynamic update of TiTAN series is driven by the dialog-inherent
timing of turn-taking. The model provides a link between neural,
connectionist underpinnings of dialog lexica on the one hand and
observable symbolic behavior on the other. On the neural side,
priming and spreading activation are modeled in terms of TiTAN
networking. On the symbolic side, TiTAN series account for cognitive
alignment in terms of the structural coupling of the linguistic
representations of dialog partners. This structural stance allows
us to apply TiTAN in machine learning of data of dialogical alignment.
In previous studies, it has been shown that aligned dialogs can
be distinguished from non-aligned ones by means of TiTAN -based
modeling. Now, we simultaneously apply this model to two types
of dialog: task-oriented, experimentally controlled dialogs on
the one hand and more spontaneous, direction giving dialogs on
the other. We ask whether it is possible to separate aligned dialogs
from non-aligned ones in a type-crossing way. Starting from a
recent experiment (Mehler, Lücking, \& Menke, 2011a), we show
that such a type-crossing classification is indeed possible. This
hints at a structural fingerprint left by alignment in networks
of linguistic items that are routinely co-activated during conversation.},
doi = {10.1016/j.neunet.2012.02.013},
website = {http://www.sciencedirect.com/science/article/pii/S0893608012000421},
year = {2012}
}
2012.
Customization of the Europarl Corpus for Translation Studies. Proceedings of the 8th International Conference on Language Resources
and Evaluation (LREC).
BibTeX
@inproceedings{Islam:Mehler:2012:a,
author = {Islam, Md. Zahurul and Mehler, Alexander},
title = {Customization of the Europarl Corpus for Translation Studies},
booktitle = {Proceedings of the 8th International Conference on Language Resources
and Evaluation (LREC)},
abstract = {Currently, the area of translation studies lacks corpora by which
translation scholars can validate their theoretical claims, for
example, regarding the scope of the characteristics of the translation
relation. In this paper, we describe a customized resource in
the area of translation studies that mainly addresses research
on the properties of the translation relation. Our experimental
results show that the Type-Token-Ratio (TTR) is not a universally
valid indicator of the simplification of translation.},
owner = {zahurul},
pdf = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/729_Paper.pdf},
timestamp = {2012.02.02},
year = {2012}
}
2012.
Framing Multimodal Technical Communication. With Focal Points
in Speech-Gesture-Integration and Gaze Recognition. Handbook of Technical Communication, 8:591–644.
BibTeX
@incollection{Luecking:Pfeiffer:2012,
author = {Lücking, Andy and Pfeiffer, Thies},
title = {Framing Multimodal Technical Communication. With Focal Points
in Speech-Gesture-Integration and Gaze Recognition},
booktitle = {Handbook of Technical Communication},
publisher = {De Gruyter Mouton},
editor = {Alexander Mehler and Laurent Romary and Dafydd Gibbon},
volume = {8},
series = {Handbooks of Applied Linguistics},
chapter = {18},
pages = {591-644},
website = {http://www.degruyter.com/view/books/9783110224948/9783110224948.591/9783110224948.591.xml},
year = {2012}
}
2012.
Barrier-free Communication. Handbook of Technical Communication, 8:645–706.
BibTeX
@incollection{Kubina:Abramov:Luecking:2012,
author = {Kubina, Petra and Abramov, Olga and Lücking, Andy},
title = {Barrier-free Communication},
booktitle = {Handbook of Technical Communication},
publisher = {De Gruyter Mouton},
editor = {Alexander Mehler and Laurent Romary},
volume = {8},
series = {Handbooks of Applied Linguistics},
chapter = {19},
pages = {645-706},
address = {Berlin and Boston},
editora = {Dafydd Gibbon},
editoratype = {collaborator},
website = {http://www.degruyter.com/view/books/9783110224948/9783110224948.645/9783110224948.645.xml},
year = {2012}
}
2012.
What's the Scope of the Naming Game? Constraints on Semantic Categorization. Proceedings of the 9th International Conference on the Evolution of Language, 196–203.
BibTeX
@inproceedings{Luecking:Mehler:2012,
author = {Lücking, Andy and Mehler, Alexander},
title = {What's the Scope of the Naming Game? Constraints on Semantic Categorization},
booktitle = {Proceedings of the 9th International Conference on the Evolution of Language},
pages = {196-203},
address = {Kyoto, Japan},
abstract = {The Naming Game (NG) has become a vivid research paradigm for
simulation studies on language evolution and the establishment
of naming conventions. Recently, NGs were used for reconstructing
the creation of linguistic categories, most notably for color
terms. We recap the functional principle of NGs and the latter
Categorization Games (CGs) and evaluate them in the light of semantic
data of linguistic categorization outside the domain of colors.
This comparison reveals two specifics of the CG paradigm: Firstly,
the emerging categories draw basically on the predefined topology
of the learning domain. Secondly, the kind of categories that
can be learnt in CGs is bound to context-independent intersective
categories. This suggests that the NG and the CG focus on a special
aspect of natural language categorization, which disregards context-sensitive
categories used in a non-compositional manner.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Evolang2012-AL_AM.pdf},
url = {http://kyoto.evolang.org/},
website = {https://www.researchgate.net/publication/267858061_WHAT'S_THE_SCOPE_OF_THE_NAMING_GAME_CONSTRAINTS_ON_SEMANTIC_CATEGORIZATION},
year = {2012}
}
2012.
A Three-step Model of Language Detection in Multilingual Ancient Texts. Proceedings of Workshop on Annotation of Corpora for Research in the Humanities.
BibTeX
@inproceedings{Sukhareva:Islam:Hoenen:Mehler:2012,
author = {Sukhareva, Maria and Islam, Md. Zahurul and Hoenen, Armin and Mehler, Alexander},
title = {A Three-step Model of Language Detection in Multilingual Ancient Texts},
booktitle = {Proceedings of Workshop on Annotation of Corpora for Research in the Humanities},
address = {Heidelberg, Germany},
abstract = {Ancient corpora contain various multilingual patterns. This imposes
numerous problems on their manual annotation and automatic processing.
We introduce a lexicon building system, called Lexicon Expander,
that has an integrated language detection module, Language Detection
(LD) Toolkit. The Lexicon Expander post-processes the output of
the LD Toolkit which leads to the improvement of f-score and accuracy
values. Furthermore, the functionality of the Lexicon Expander
also includes manual editing of lexical entries and automatic
morphological expansion by means of a morphological grammar.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/sukhareva_islam_hoenen_mehler_2011.pdf},
website = {https://www.academia.edu/2236625/A_Three-step_Model_of_Language_Detection_in_Multilingual_Ancient_Texts},
year = {2012}
}
2011
2011.
A Model of Complexity Levels of Meaning Constitution in Simulation
Models of Language Evolution. International Journal of Signs and Semiotic Systems, 1(1):18–38.
BibTeX
@article{Luecking:Mehler:2011,
author = {Lücking, Andy and Mehler, Alexander},
title = {A Model of Complexity Levels of Meaning Constitution in Simulation
Models of Language Evolution},
journal = {International Journal of Signs and Semiotic Systems},
volume = {1},
number = {1},
pages = {18-38},
abstract = {Currently, some simulative accounts exist within dynamic or evolutionary
frameworks that are concerned with the development of linguistic
categories within a population of language users. Although these
studies mostly emphasize that their models are abstract, the paradigm
categorization domain is preferably that of colors. In this paper,
the authors argue that color adjectives are special predicates
in both linguistic and metaphysical terms: semantically, they
are intersective predicates, metaphysically, color properties
can be empirically reduced onto purely physical properties. The
restriction of categorization simulations to the color paradigm
systematically leads to ignoring two ubiquitous features of natural
language predicates, namely relativity and context-dependency.
Therefore, the models for simulation models of linguistic categories
are not able to capture the formation of categories like perspective-dependent
predicates ‘left’ and ‘right’, subsective predicates like ‘small’
and ‘big’, or predicates that make reference to abstract objects
like ‘I prefer this kind of situation’. The authors develop a
three-dimensional grid of ascending complexity that is partitioned
according to the semiotic triangle. They also develop a conceptual
model in the form of a decision grid by means of which the complexity
level of simulation models of linguistic categorization can be
assessed in linguistic terms.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_article_IJSSS.pdf},
year = {2011}
}
2011.
Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf
Hypothesis in the Context of Wikipedia. Computer Speech and Language, 25(3):716–740.
BibTeX
@article{Mehler:Abramov:Diewald:2011:a,
author = {Mehler, Alexander and Abramov, Olga and Diewald, Nils},
title = {Geography of Social Ontologies: Testing a Variant of the Sapir-Whorf
Hypothesis in the Context of Wikipedia},
journal = {Computer Speech and Language},
volume = {25},
number = {3},
pages = {716-740},
abstract = {In this article, we test a variant of the Sapir-Whorf Hypothesis
in the area of complex network theory. This is done by analyzing
social ontologies as a new resource for automatic language classification.
Our method is to solely explore structural features of social
ontologies in order to predict family resemblances of languages
used by the corresponding communities to build these ontologies.
This approach is based on a reformulation of the Sapir-Whorf Hypothesis
in terms of distributed cognition. Starting from a corpus of 160
Wikipedia-based social ontologies, we test our variant of the
Sapir-Whorf Hypothesis by several experiments, and find out that
we outperform the corresponding baselines. All in all, the article
develops an approach to classify linguistic networks of tens of
thousands of vertices by exploring a small range of mathematically
well-established topological indices.},
doi = {10.1016/j.csl.2010.05.006},
website = {http://www.sciencedirect.com/science/article/pii/S0885230810000434},
year = {2011}
}
2011.
Social Ontologies as Generalized Nearly Acyclic Directed Graphs:
A Quantitative Graph Model of Social Ontologies by Example of
Wikipedia. Towards an Information Theory of Complex Networks: Statistical
Methods and Applications, 259–319.
BibTeX
@incollection{Mehler:2011:c,
author = {Mehler, Alexander},
title = {Social Ontologies as Generalized Nearly Acyclic Directed Graphs:
A Quantitative Graph Model of Social Ontologies by Example of
Wikipedia},
booktitle = {Towards an Information Theory of Complex Networks: Statistical
Methods and Applications},
publisher = {Birkh{\"a}user},
editor = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
pages = {259-319},
address = {Boston/Basel},
year = {2011}
}
May, 2011.
Staccato: Segmentation Agreement Calculator. Gesture in Embodied Communication and Human-Computer Interaction.
Proceedings of the 9th International Gesture Workshop, 50–53.
BibTeX
@inproceedings{Luecking:Ptock:Bergmann:2011,
author = {Lücking, Andy and Ptock, Sebastian and Bergmann, Kirsten},
title = {Staccato: Segmentation Agreement Calculator},
booktitle = {Gesture in Embodied Communication and Human-Computer Interaction.
Proceedings of the 9th International Gesture Workshop},
editor = {Eleni Efthimiou and Georgios Kouroupetroglou},
series = {GW 2011},
pages = {50--53},
address = {Athens, Greece},
publisher = {National and Kapodistrian University of Athens},
month = {5},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/LueckingEA_final.pdf},
year = {2011}
}
September, 2011.
A Graph Model of Alignment in Multilog. Proceedings of IEEE Africon 2011.
BibTeX
@inproceedings{Mehler:Luecking:2011,
author = {Mehler, Alexander and Lücking, Andy},
title = {A Graph Model of Alignment in Multilog},
booktitle = {Proceedings of IEEE Africon 2011},
series = {IEEE Africon},
address = {Zambia},
organization = {IEEE},
month = {9},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/africon2011-paper-Alexander_Mehler_Andy_Luecking.pdf},
website = {https://www.researchgate.net/publication/267941012_A_Graph_Model_of_Alignment_in_Multilog},
year = {2011}
}
2011.
Positionssensitive Dekomposition von Potenzgesetzen am Beispiel
von Wikipedia-basierten Kollaborationsnetzwerken. Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK
2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin.
BibTeX
@inproceedings{Stegbauer:Mehler:2011,
author = {Stegbauer, Christian and Mehler, Alexander},
title = {Positionssensitive Dekomposition von Potenzgesetzen am Beispiel
von Wikipedia-basierten Kollaborationsnetzwerken},
booktitle = {Proceedings of the 4th Workshop Digital Social Networks at INFORMATIK
2011: Informatik schafft Communities, Oct 4-7, 2011, Berlin},
pdf = {http://www.user.tu-berlin.de/komm/CD/paper/090423.pdf},
specialnote = {Best Paper Award},
specialnotewebsite = {http://www.digitale-soziale-netze.de/gi-workshop/index.php?site=review2011},
year = {2011}
}
2011.
Building a DDC-annotated Corpus from OAI Metadata. Journal of Digital Information, 12(2).
BibTeX
@article{Loesch:Waltinger:Horstmann:Mehler:2011,
author = {Lösch, Mathias and Waltinger, Ulli and Horstmann, Wolfram and Mehler, Alexander},
title = {Building a DDC-annotated Corpus from OAI Metadata},
journal = {Journal of Digital Information},
volume = {12},
number = {2},
abstract = {Checking for readability or simplicity of texts is important for
many institutional and individual users. Formulas for approximately
measuring text readability have a long tradition. Usually, they
exploit surface-oriented indicators like sentence length, word
length, word frequency, etc. However, in many cases, this information
is not adequate to realistically approximate the cognitive difficulties
a person can have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The syntactic information
is represented by a dependency tree, the semantic information
by a semantic network. Both representations are automatically
generated by a deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor algorithm on
3,000 ratings of 300 test persons. The evaluation showed that
the deep syntactic and semantic indicators lead to promising results
comparable to the best surface-based indicators. The combination
of deep and shallow indicators leads to an improvement over shallow
indicators alone. Finally, a graphical user interface was developed
which highlights difficult passages, depending on the individual
indicator values, and displays a global readability score.},
bibsource = {DBLP, http://dblp.uni-trier.de},
pdf = {https://journals.tdl.org/jodi/index.php/jodi/article/download/1765/1767},
website = {http://journals.tdl.org/jodi/article/view/1765},
year = {2011}
}
2011.
An Online Platform for Visualizing Time Series in Linguistic Networks. Proceedings of the Demonstrations Session of the 2011 IEEE / WIC
/ ACM International Conferences on Web Intelligence and Intelligent
Agent Technology, 22 - 27 August 2011, Lyon, France.
BibTeX
@inproceedings{Lux:Laussmann:Mehler:Menssen:2011,
author = {Lux, Markus and Lau{\ss}mann, Jan and Mehler, Alexander and Men{\ss}en, Christian},
title = {An Online Platform for Visualizing Time Series in Linguistic Networks},
booktitle = {Proceedings of the Demonstrations Session of the 2011 IEEE / WIC
/ ACM International Conferences on Web Intelligence and Intelligent
Agent Technology, 22 - 27 August 2011, Lyon, France},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wi-iat-poster-2011.pdf},
website = {http://dl.acm.org/citation.cfm?id=2052396},
year = {2011}
}
2011.
Evolution of Romance Language in Written Communication: Network
Analysis of Late Latin and Early Romance Corpora. Leonardo, 44(3).
BibTeX
@article{Mehler:Diewald:Waltinger:et:al:2010,
author = {Mehler, Alexander and Diewald, Nils and Waltinger, Ulli and Gleim, Rüdiger
and Esch, Dietmar and Job, Barbara and Küchelmann, Thomas and Abramov, Olga
and Blanchard, Philippe},
title = {Evolution of Romance Language in Written Communication: Network
Analysis of Late Latin and Early Romance Corpora},
journal = {Leonardo},
volume = {44},
number = {3},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_diewald_waltinger_gleim_esch_job_kuechelmann_pustylnikov_blanchard_2010.pdf},
publisher = {MIT Press},
year = {2011}
}
2011.
From Neural Activation to Symbolic Alignment: A Network-Based
Approach to the Formation of Dialogue Lexica. Proceedings of the International Joint Conference on Neural Networks
(IJCNN 2011), San Jose, California, July 31 – August 5.
BibTeX
@inproceedings{Mehler:Luecking:Menke:2011,
author = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
title = {From Neural Activation to Symbolic Alignment: A Network-Based
Approach to the Formation of Dialogue Lexica},
booktitle = {Proceedings of the International Joint Conference on Neural Networks
(IJCNN 2011), San Jose, California, July 31 -- August 5},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/neural-align-final.pdf},
website = {{http://dx.doi.org/10.1109/IJCNN.2011.6033266}},
year = {2011}
}
2011.
The Bielefeld Jigsaw Map Game (JMG) Corpus. Abstracts of the Corpus Linguistics Conference 2011.
BibTeX
@inproceedings{Luecking:Abramov:Mehler:Menke:2011,
author = {Lücking, Andy and Abramov, Olga and Mehler, Alexander and Menke, Peter},
title = {The Bielefeld Jigsaw Map Game (JMG) Corpus},
booktitle = {Abstracts of the Corpus Linguistics Conference 2011},
series = {CL2011},
address = {Birmingham},
pdf = {http://www.birmingham.ac.uk/documents/college-artslaw/corpus/conference-archives/2011/Paper-137.pdf},
website = {http://www.birmingham.ac.uk/research/activity/corpus/publications/conference-archives/2011-birmingham.aspx},
year = {2011}
}
2011.
Modeling, Building and Maintaining Lexica for Corpus Linguistic
Studies by Example of Late Latin. Corpus Linguistics 2011, 20-22 July, Birmingham.
BibTeX
@inproceedings{Gleim:Hoenen:Diewald:Mehler:Ernst:2011,
author = {Gleim, Rüdiger and Hoenen, Armin and Diewald, Nils and Mehler, Alexander
and Ernst, Alexandra},
title = {Modeling, Building and Maintaining Lexica for Corpus Linguistic
Studies by Example of Late Latin},
booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Paper-48.pdf},
year = {2011}
}
2011.
From experiments to corpora: The Ariadne Corpus Management System. Corpus Linguistics 2011, 20-22 July, Birmingham.
BibTeX
@inproceedings{Menke:Mehler:2011,
author = {Menke, Peter and Mehler, Alexander},
title = {From experiments to corpora: The Ariadne Corpus Management System},
booktitle = {Corpus Linguistics 2011, 20-22 July, Birmingham},
website = {https://www.researchgate.net/publication/260186214_From_Experiments_to_Corpora_The_Ariadne_Corpus_Management_System},
year = {2011}
}
2011.
Towards an Information Theory of Complex Networks: Statistical
Methods and Applications.
Birkhäuser.
BibTeX
@book{Dehmer:EmmertStreib:Mehler:2009:a,
editor = {Dehmer, Matthias and Emmert-Streib, Frank and Mehler, Alexander},
title = {Towards an Information Theory of Complex Networks: Statistical
Methods and Applications},
publisher = {Birkh{\"a}user},
address = {Boston/Basel},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/InformationTheoryComplexNetworks.jpg},
pagetotal = {395},
website = {http://link.springer.com/book/10.1007/978-0-8176-4904-3/page/1},
year = {2011}
}
2011.
Assessing Lexical Alignment in Spontaneous Direction Dialogue
Data by Means of a Lexicon Network Model. Proceedings of 12th International Conference on Intelligent Text
Processing and Computational Linguistics (CICLing), February 20–26,
Tokyo, 368–379.
BibTeX
@inproceedings{Mehler:Luecking:Menke:2011:a,
author = {Mehler, Alexander and Lücking, Andy and Menke, Peter},
title = {Assessing Lexical Alignment in Spontaneous Direction Dialogue
Data by Means of a Lexicon Network Model},
booktitle = {Proceedings of 12th International Conference on Intelligent Text
Processing and Computational Linguistics (CICLing), February 20--26,
Tokyo},
series = {CICLing'11},
pages = {368-379},
address = {Berlin/New York},
publisher = {Springer},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/titan-cicling-camera-ready.pdf},
website = {http://www.springerlink.com/content/g7p2250025u20010/},
year = {2011}
}
2011.
Learning Methods for Graph Models of Document Structure. Modeling, Learning and Processing of Text Technological Data Structures.
BibTeX
@incollection{Geibel:Mehler:Kuehnberger:2011:a,
author = {Geibel, Peter and Mehler, Alexander and Kühnberger, Kai-Uwe},
title = {Learning Methods for Graph Models of Document Structure},
booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
publisher = {Springer},
editor = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
and Storrer, Angelika and Witt, Andreas},
series = {Studies in Computational Intelligence},
address = {Berlin/New York},
website = {http://www.springerlink.com/content/p095331472h76v56/},
year = {2011}
}
2011.
Integrating Content and Structure Learning: A Model of Hypertext
Zoning and Sounding. Modeling, Learning and Processing of Text Technological Data Structures.
BibTeX
@incollection{Mehler:Waltinger:2011:a,
author = {Mehler, Alexander and Waltinger, Ulli},
title = {Integrating Content and Structure Learning: A Model of Hypertext
Zoning and Sounding},
booktitle = {Modeling, Learning and Processing of Text Technological Data Structures},
publisher = {Springer},
editor = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
and Storrer, Angelika and Witt, Andreas},
series = {Studies in Computational Intelligence},
address = {Berlin/New York},
website = {http://rd.springer.com/chapter/10.1007/978-3-642-22613-7_15},
year = {2011}
}
2011.
Automatic Language Classification by Means of Syntactic Dependency Networks. Journal of Quantitative Linguistics, 18(4):291–336.
BibTeX
@article{Abramov:Mehler:2011:a,
author = {Abramov, Olga and Mehler, Alexander},
title = {Automatic Language Classification by Means of Syntactic Dependency Networks},
journal = {Journal of Quantitative Linguistics},
volume = {18},
number = {4},
pages = {291-336},
abstract = {This article presents an approach to automatic language classification
by means of linguistic networks. Networks of 11 languages were
constructed from dependency treebanks, and the topology of these
networks serves as input to the classification algorithm. The
results match the genealogical similarities of these languages.
In addition, we test two alternative approaches to automatic language
classification – one based on n-grams and the other on quantitative
typological indices. All three methods show good results in identifying
genealogical groups. Beyond genetic similarities, network features
(and feature combinations) offer a new source of typological information
about languages. This information can contribute to a better understanding
of the interplay of single linguistic phenomena observed in language.},
website = {http://www.researchgate.net/publication/220469321_Automatic_Language_Classification_by_means_of_Syntactic_Dependency_Networks},
year = {2011}
}
2011.
Modeling, Learning and Processing of Text Technological Data Structures.
Studies in Computational Intelligence.
Springer.
BibTeX
@book{Mehler:Kuehnberger:Lobin:Luengen:Storrer:Witt:2011,
author = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
and Storrer, Angelika and Witt, Andreas},
editor = {Mehler, Alexander and Kühnberger, Kai-Uwe and Lobin, Henning and Lüngen, Harald
and Storrer, Angelika and Witt, Andreas},
title = {Modeling, Learning and Processing of Text Technological Data Structures},
publisher = {Springer},
series = {Studies in Computational Intelligence},
address = {Berlin/New York},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/modelingLearningAndProcessing-medium.jpg},
pagetotal = {400},
website = {/books/texttechnologybook/},
year = {2011}
}
2011.
On Social Semantics in Information Retrieval.
Südwestdeutscher Verlag für Hochschulschriften.
Zugl. Diss Univ. Bielefeld (2010).
BibTeX
@book{Waltinger:2011,
author = {Waltinger, Ulli},
title = {On Social Semantics in Information Retrieval},
publisher = {Südwestdeutscher Verlag für Hochschulschriften},
address = {Saarbrücken},
note = {Zugl. Diss Univ. Bielefeld (2010)},
abstract = {In this thesis we analyze the performance of social semantics
in textual information retrieval. By means of collaboratively
constructed knowledge derived from web-based social networks,
inducing both common-sense and domain-specific knowledge as constructed
by a multitude of users, we will establish an improvement in performance
of selected tasks within different areas of information retrieval.
This work connects the concepts and the methods of social networks
and the semantic web to support the analysis of a social semantic
web that combines human intelligence with machine learning and
natural language processing. In this context, social networks,
as instances of the social web, are capable in delivering social
network data and document collections on a tremendous scale, inducing
thematic dynamics that cannot be achieved by traditional expert
resources. The question of an automatic conversion, annotation
and processing, however, is central to the debate of the benefits
of the social semantic web. Which kind of technologies and methods
are available, adequate and contribute to the processing of this
rapidly rising flood of information and at the same time being
capable of using the wealth of information in this large, but
more importantly decentralized internet. The present work researches
the performance of social semantic-induced categorization by means
of different document models. We will shed light on the question,
to which level social networks and social ontologies contribute
to selected areas within the information retrieval area, such
as automatically determining term and text associations, identifying
topics, text and web genre categorization, and also the domain
of sentiment analysis. We will show in extensive evaluations,
comparing the classical apparatus of text categorization -- Vector
Space Model, Latent Semantic Analysis and Support Vector Maschine
-- that significant improvements can be obtained by considering
the collaborative knowledge derived from the social web.},
pdf = {https://pub.uni-bielefeld.de/download/2302025/2302028},
website = {http://www.ulliwaltinger.de/on-social-semantics-in-information-retrieval/},
year = {2011}
}
Sept., 2011.
Conscious learning semiotics systems to assist human persons (CLS2H). AFRICON, 2011, 1 –7.
BibTeX
@inproceedings{Doebenhenisch:Abrami:Pfaff:Struwe:2011,
author = {Doeben-Henisch, Gerd and Abrami, Giuseppe and Pfaff, Marcus and Struwe, Marvin},
title = {Conscious learning semiotics systems to assist human persons (CLS2H)},
booktitle = {AFRICON, 2011},
volume = {},
number = {},
pages = {1 -7},
abstract = {Challenged by the growing societal demand for Ambient Assistive
Living (AAL) technologies, we are dedicated to develop intelligent
technical devices which are able to communicate with human persons
in a truly human-like manner. The core of the project is a simulation
environment which enables the development of conscious learning
semiotic agents which will be able to assist human persons in
their daily life. We are reporting first results and future perspectives.},
doi = {10.1109/AFRCON.2011.6072043},
issn = {2153-0025},
keywords = {ambient assistive living;conscious learning semiotic
agents;conscious learning semiotics systems;human
persons;intelligent technical devices;simulation
environment;learning (artificial
intelligence);multi-agent systems;},
month = {sept.},
pdf = {http://www.doeben-henisch.de/gdhnp/csg/africon2011.pdf},
website = {http://www.researchgate.net/publication/261451874_Conscious_Learning_Semiotics_Systems_to_Assist_Human_Persons_(CLS(2)H)},
year = {2011}
}
2011.
Hierarchical Classification of OAI Metadata Using the DDC Taxonomy. Advanced Language Technologies for Digital Libraries (ALT4DL), 29–40.
BibTeX
@incollection{Waltinger:Mehler:Loesch:Horstmann:2011,
author = {Waltinger, Ulli and Mehler, Alexander and Lösch, Mathias and Horstmann, Wolfram},
title = {Hierarchical Classification of OAI Metadata Using the DDC Taxonomy},
booktitle = {Advanced Language Technologies for Digital Libraries (ALT4DL)},
publisher = {Springer},
editor = {Raffaella Bernardi and Sally Chambers and Bjoern Gottfried and Frederique Segond
and Ilya Zaihrayeu},
series = {LNCS},
pages = {29-40},
address = {Berlin},
abstract = {In the area of digital library services, the access to subject-specific
metadata of scholarly publications is of utmost interest. One
of the most prevalent approaches for metadata exchange is the
XML-based Open Archive Initiative (OAI) Protocol for Metadata
Harvesting (OAI-PMH). However, due to its loose requirements regarding
metadata content there is no strict standard for consistent subject
indexing specified, which is furthermore needed in the digital
library domain. This contribution addresses the problem of automatic
enhancement of OAI metadata by means of the most widely used universal
classification schemes in libraries—the Dewey Decimal Classification
(DDC). To be more specific, we automatically classify scientific
documents according to the DDC taxonomy within three levels using
a machine learning-based classifier that relies solely on OAI
metadata records as the document representation. The results show
an asymmetric distribution of documents across the hierarchical
structure of the DDC taxonomy and issues of data sparseness. However,
the performance of the classifier shows promising results on all
three levels of the DDC.},
website = {http://www.springerlink.com/content/x20257512g818377/},
year = {2011}
}
2011.
Der eHumanities Desktop als Werkzeug in der historischen Semantik:
Funktionsspektrum und Einsatzszenarien. Journal for Language Technology and Computational
Linguistics (JLCL), 26(1):97–117.
BibTeX
@article{Mehler:Schwandt:Gleim:Jussen:2011,
author = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},
title = {Der eHumanities Desktop als Werkzeug in der historischen Semantik:
Funktionsspektrum und Einsatzszenarien},
journal = {Journal for Language Technology and Computational
Linguistics (JLCL)},
volume = {26},
number = {1},
pages = {97-117},
abstract = {Die Digital Humanities bzw. die Computational Humanities entwickeln
sich zu eigenst{\"a}ndigen Disziplinen an der Nahtstelle von Geisteswissenschaft
und Informatik. Diese Entwicklung betrifft zunehmend auch die
Lehre im Bereich der geisteswissenschaftlichen Fachinformatik.
In diesem Beitrag thematisieren wir den eHumanities Desktop als
ein Werkzeug für diesen Bereich der Lehre. Dabei geht es genauer
um einen Brückenschlag zwischen Geschichtswissenschaft und Informatik:
Am Beispiel der historischen Semantik stellen wir drei Lehrszenarien
vor, in denen der eHumanities Desktop in der geschichtswissenschaftlichen
Lehre zum Einsatz kommt. Der Beitrag schliesst mit einer Anforderungsanalyse
an zukünftige Entwicklungen in diesem Bereich.},
pdf = {http://media.dwds.de/jlcl/2011_Heft1/8.pdf },
year = {2011}
}
2011.
Qualitative Spatial Knowledge Acquisition Based on the Connection Relation. Proceedings of the 3rd International Conference on Advanced Cognitive
Technologies and Applications (COGNITIVE), 70–75.
BibTeX
@inproceedings{Dong:vor:der:Brueck:2011,
author = {Dong, Tiansi and vor der Brück, Tim},
title = {Qualitative Spatial Knowledge Acquisition Based on the Connection Relation},
booktitle = {Proceedings of the 3rd International Conference on Advanced Cognitive
Technologies and Applications (COGNITIVE)},
editor = {Terry Bossomaier and Pascal Lorenz},
pages = {70--75},
address = {Rome, Italy},
abstract = {Research in cognitive psychology shows that the connection relation
is the primitive spatial relation. This paper proposes a novel
spatial knowledge representation of indoor environments based
on the connection relation, and demonstrates how deictic orientation
relations can be acquired from a map, which is constructed purely
on connection relations between extended objects. Without loss
of generality, we restrict indoor environments to be constructed
by a set of rectangles, each representing either a room or a corridor.
The term fiat cell is coined to represent a subjective partition
along a corridor. Spatial knowledge includes rectangles, sides
information of rectangles, connection relations among rectangles,
and fiat cells of rectangles. Efficient algorithms are given for
identifying one shortest path between two locations, transforming
paths into fiat paths, and acquiring deictic orientations.},
pdf = {http://www.thinkmind.org/download.php?articleid=cognitive_2011_3_40_40123},
website = {http://www.thinkmind.org/index.php?view=article&articleid=cognitive_2011_3_40_40123},
year = {2011}
}
2011.
Multilingualism in Ancient Texts: Language Detection by Example
of Old High German and Old Saxon. GSCL conference on Multilingual Resources and Multilingual Applications
(GSCL 2011), 28-30 September, Hamburg, Germany.
BibTeX
@inproceedings{Zahurul:Mittmann:Mehler:2011,
author = {Islam, Md. Zahurul and Mittmann, Roland and Mehler, Alexander},
title = {Multilingualism in Ancient Texts: Language Detection by Example
of Old High German and Old Saxon},
booktitle = {GSCL conference on Multilingual Resources and Multilingual Applications
(GSCL 2011), 28-30 September, Hamburg, Germany},
abstract = {In this paper, we present an approach to language d etection in
streams of multilingual ancient texts. We introduce a supervised
classifier that detects, amongst others, Old High G erman (OHG)
and Old Saxon (OS). We evaluate our mod el by means of three experiments
that show that language detection is po ssible even for dead languages.
Finally, we present an experiment in unsupervised language detection
as a tertium comparationis for o ur supervised classifier.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Multilingualism_in_Ancient_Texts_Language_Detection_by_Example_of_Old_High_German_and_Old_Saxon.pdf},
timestamp = {2011.08.25},
year = {2011}
}
2011.
Multilingual Resources and Multilingual Applications: Proceedings
of the German Society for Computational Linguistics 2011, 207–210.
BibTeX
@inproceedings{Ries:Luecking:2011,
author = {Ries, Veronika and Lücking, Andy},
booktitle = {Multilingual Resources and Multilingual Applications: Proceedings
of the German Society for Computational Linguistics 2011},
year = {2011},
pages = {207--210},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Ries_Luecking.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SoSaBiEC-poster.pdf}
}
2010
2010.
Minimum Spanning Markovian Trees: Introducing Context-Sensitivity
into the Generation of Spanning Trees. Structural Analysis of Complex Networks, 381–401.
BibTeX
@incollection{Mehler:2010:a,
author = {Mehler, Alexander},
title = {Minimum Spanning Markovian Trees: Introducing Context-Sensitivity
into the Generation of Spanning Trees},
booktitle = {Structural Analysis of Complex Networks},
publisher = {Birkh{\"a}user Publishing},
editor = {Dehmer, Matthias},
pages = {381-401},
address = {Basel},
abstract = {This chapter introduces a novel class of graphs: Minimum Spanning
Markovian Trees (MSMTs). The idea behind MSMTs is to provide spanning
trees that minimize the costs of edge traversals in a Markovian
manner, that is, in terms of the path starting with the root of
the tree and ending at the vertex under consideration. In a second
part, the chapter generalizes this class of spanning trees in
order to allow for damped Markovian effects in the course of spanning.
These two effects, (1) the sensitivity to the contexts generated
by consecutive edges and (2) the decreasing impact of more antecedent
(or 'weakly remembered') vertices, are well known in cognitive
modeling [6, 10, 21, 23]. In this sense, the chapter can also
be read as an effort to introduce a graph model to support the
simulation of cognitive systems. Note that MSMTs are not to be
confused with branching Markov chains or Markov trees [20] as
we focus on generating spanning trees from given weighted undirected
networks.},
website = {https://www.researchgate.net/publication/226700676_Minimum_Spanning_Markovian_Trees_Introducing_Context-Sensitivity_into_the_Generation_of_Spanning_Trees},
year = {2010}
}
2010.
Computational Linguistics for Mere Mortals – Powerful but Easy-to-use
Linguistic Processing for Scientists in the Humanities. Proceedings of LREC 2010.
BibTeX
@inproceedings{Gleim:Mehler:2010:b,
author = {Gleim, Rüdiger and Mehler, Alexander},
title = {Computational Linguistics for Mere Mortals – Powerful but Easy-to-use
Linguistic Processing for Scientists in the Humanities},
booktitle = {Proceedings of LREC 2010},
address = {Malta},
publisher = {ELDA},
abstract = {Delivering linguistic resources and easy-to-use methods to a broad
public in the humanities is a challenging task. On the one hand
users rightly demand easy to use interfaces but on the other hand
want to have access to the full flexibility and power of the functions
being offered. Even though a growing number of excellent systems
exist which offer convenient means to use linguistic resources
and methods, they usually focus on a specific domain, as for example
corpus exploration or text categorization. Architectures which
address a broad scope of applications are still rare. This article
introduces the eHumanities Desktop, an online system for corpus
management, processing and analysis which aims at bridging the
gap between powerful command line tools and intuitive user interfaces.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_2010.pdf},
year = {2010}
}
2010.
A Network Model of Interpersonal Alignment. Entropy, 12(6):1440–1483.
BibTeX
@article{Mehler:Weiss:Luecking:2010:a,
author = {Mehler, Alexander and Lücking, Andy and Wei{\ss}, Petra},
title = {A Network Model of Interpersonal Alignment},
journal = {Entropy},
volume = {12},
number = {6},
pages = {1440-1483},
abstract = {In dyadic communication, both interlocutors adapt to each other
linguistically, that is, they align interpersonally. In this article,
we develop a framework for modeling interpersonal alignment in
terms of the structural similarity of the interlocutors’ dialog
lexica. This is done by means of so-called two-layer time-aligned
network series, that is, a time-adjusted graph model. The graph
model is partitioned into two layers, so that the interlocutors’
lexica are captured as subgraphs of an encompassing dialog graph.
Each constituent network of the series is updated utterance-wise.
Thus, both the inherent bipartition of dyadic conversations and
their gradual development are modeled. The notion of alignment
is then operationalized within a quantitative model of structure
formation based on the mutual information of the subgraphs that
represent the interlocutor’s dialog lexica. By adapting and further
developing several models of complex network theory, we show that
dialog lexica evolve as a novel class of graphs that have not
been considered before in the area of complex (linguistic) networks.
Additionally, we show that our framework allows for classifying
dialogs according to their alignment status. To the best of our
knowledge, this is the first approach to measuring alignment in
communication that explores the similarities of graph-like cognitive
representations.},
doi = {10.3390/e12061440},
pdf = {http://www.mdpi.com/1099-4300/12/6/1440/pdf},
website = {http://www.mdpi.com/1099-4300/12/6/1440/},
year = {2010}
}
2010.
Genres on the Web: Computational Models and Empirical Studies.
Springer.
BibTeX
@book{Mehler:Sharoff:Santini:2010:a,
author = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
editor = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
title = {Genres on the Web: Computational Models and Empirical Studies},
publisher = {Springer},
address = {Dordrecht},
abstract = {The volume 'Genres on the Web' has been designed for a wide audience,
from the expert to the novice. It is a required book for scholars,
researchers and students who want to become acquainted with the
latest theoretical, empirical and computational advances in the
expanding field of web genre research. The study of web genre
is an overarching and interdisciplinary novel area of research
that spans from corpus linguistics, computational linguistics,
NLP, and text-technology, to web mining, webometrics, social network
analysis and information studies. This book gives readers a thorough
grounding in the latest research on web genres and emerging document
types. The book covers a wide range of web-genre focussed subjects,
such as: -The identification of the sources of web genres -Automatic
web genre identification -The presentation of structure-oriented
models -Empirical case studies One of the driving forces behind
genre research is the idea of a genre-sensitive information system,
which incorporates genre cues complementing the current keyword-based
search and retrieval applications.},
booktitle = {Genres on the Web: Computational Models and Empirical Studies},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/GenresOnTheWeb.jpg},
pagetotal = {376},
review = {http://www.springerlink.com/content/ym07440380524721/},
website = {http://www.springer.com/computer/ai/book/978-90-481-9177-2},
year = {2010}
}
2010.
Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
zum Web 2.0.
Verlag für Sozialwissenschaften.
BibTeX
@book{Sutter:Mehler:2010,
author = {Sutter, Tilmann and Mehler, Alexander},
editor = {Sutter, Tilmann and Mehler, Alexander},
title = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
zum Web 2.0},
publisher = {Verlag für Sozialwissenschaften},
address = {Wiesbaden},
abstract = {Die Beitr{\"a}ge des Bandes untersuchen den Medienwandel von frühen
europ{\"a}ischen Medienkulturen bis zu aktuellen Formen der Internetkommunikation
unter soziologischer, kulturwissenschaftlicher und linguistischer
Perspektive. Zwar haben sich die Massenmedien von den Beschr{\"a}nkungen
sozialer Interaktionen gelöst, sie weisen dem Publikum aber eine
distanzierte, blo{\ss} rezipierende Rolle zu. Dagegen eröffnen
neue Formen 'interaktiver' Medien gesteigerte Möglichkeiten der
Rückmeldung und der Mitgestaltung für die Nutzer. Der vorliegende
Band fragt nach der Qualit{\"a}t dieses Medienwandels: Werden
Medien tats{\"a}chlich interaktiv? Was bedeutet die Interaktivit{\"a}t
neuer Medien? Werden die durch neue Medien eröffneten Beteiligungsmöglichkeiten
realisiert?},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/Medienwandel.jpg},
pagetotal = {289},
website = {http://www.springer.com/de/book/9783531156422},
year = {2010}
}
2010.
Logical Ontology Validation Using an Automatic Theorem Prover. Proceedings of the 19th European Conference on Artificial Intelligence (ECAI), 491–496.
BibTeX
@inproceedings{vor:der:Brueck:Stenzhorn:2010,
author = {vor der Brück, Tim and Stenzhorn, Holger},
title = {Logical Ontology Validation Using an Automatic Theorem Prover},
booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence (ECAI)},
pages = {491--496},
address = {Lisbon, Portugal},
abstract = {Ontologies are utilized for a wide range of tasks, like information
retrieval/extraction or text generation, and in a multitude of
domains, such as biology, medicine or business and commerce. To
be actually usable in such real-world scenarios, ontologies usually
have to encompass a large number of factual statements. However,
with increasing size, it becomes very diffcult to ensure their
complete correctness. This is particularly true in the case when
an ontology is not hand-crafted but constructed (semi)automatically
through text mining, for example. As a consequence, when inference
mechanisms are applied on these ontologies, even minimal inconsistencies
of tentimes lead to serious errors and are hard to trace back
and find. This paper addresses this issue and describes a method
to validate ontologies using an automatic theorem prover and MultiNet
axioms. This logic-based approach allows to detect many inconsistencies,
which are diffcult or even impossible to identify through statistical
methods or by manual investigation in reasonable time. To make
this approach accessible for ontology developers, a graphical
user interface is provided that highlights erroneous axioms directly
in the ontology for quicker fixing.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ECAI-216.pdf},
year = {2010}
}
2010.
Hypernymy Extraction Using a Semantic Network Representation. International Journal of Computational Linguistics and
Applications, 1(1):105–119.
BibTeX
@article{vor:der:Brueck:2010,
author = {vor der Brück, Tim},
title = {Hypernymy Extraction Using a Semantic Network Representation},
journal = {International Journal of Computational Linguistics and
Applications},
volume = {1},
number = {1},
pages = {105--119},
abstract = {There are several approaches to detect hypernymy relations from
texts by text mining. Usually these approaches are based on supervised
learning and in a first step are extracting several patterns.
These patterns are then applied to previously unseen texts and
used to recognize hypernym/hyponym pairs. Normally these approaches
are only based on a surface representation or a syntactical tree
structure, i.e., constituency or dependency trees derived by a
syntactical parser. In this work, however, we present an approach
that operates directly on a semantic network (SN), which is generated
by a deep syntactico-semantic analysis. Hyponym/hypernym pairs
are then extracted by the application of graph matching. This
algorithm is combined with a shallow approach enriched with semantic
information.},
pdf = {http://www.gelbukh.com/ijcla/2010-1-2/Hypernymy
Extraction Using.pdf},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
year = {2010}
}
2010.
Learning Deep Semantic Patterns for Hypernymy Extraction Following
the Minimum Description Length Principle. Proceedings of the 29th International Conference on Lexis and Grammar (LGC), 39–49.
BibTeX
@inproceedings{vor:der:Brueck:2010:a,
author = {vor der Brück, Tim},
title = {Learning Deep Semantic Patterns for Hypernymy Extraction Following
the Minimum Description Length Principle},
booktitle = {Proceedings of the 29th International Conference on Lexis and Grammar (LGC)},
pages = {39--49},
address = {Belgrade, Serbia},
abstract = {Current approaches of hypernymy acquisition are mostly based on
syntactic or surface representations and extract hypernymy relations
between surface word forms and not word readings. In this paper
we present a purely semantic approach for hypernymy extraction
based on semantic networks (SNs). This approach employs a set
of patterns sub0 (a1,a2) <-- premise where the premise part of
a pattern is given by a SN. Furthermore this paper describes how
the patterns can be derived by relational statistical learning
following the Minimum Description Length principle (MDL). The
evaluation demonstrates the usefulness of the learned patterns
and also of the entire hypernymy extraction system.},
year = {2010}
}
2010.
Learning Semantic Network Patterns for Hypernymy Extraction. Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex), 38–47.
BibTeX
@inproceedings{vor:der:Brueck:2010:b,
author = {vor der Brück, Tim},
title = {Learning Semantic Network Patterns for Hypernymy Extraction},
booktitle = {Proceedings of the 6th Workshop on Ontologies and Lexical Resources (OntoLex)},
pages = {38--47},
address = {Beijing, China},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/ontolex_brueck_13_2010.pdf},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.358.533},
year = {2010}
}
2010.
Detecting Duplicates with Shallow and Parser-based Methods. Proceedings of the 6th International Conference on Natural Language
Processing and Knowledge Engineering (NLPKE), 142–149.
BibTeX
@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:a,
author = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
title = {Detecting Duplicates with Shallow and Parser-based Methods},
booktitle = {Proceedings of the 6th International Conference on Natural Language
Processing and Knowledge Engineering (NLPKE)},
pages = {142--149},
address = {Beijing, China},
abstract = {Identifying duplicate texts is important in many areas like plagiarism
detection, information retrieval, text summarization, and question
answering. Current approaches are mostly surface-oriented (or
use only shallow syntactic representations) and see each text
only as a token list. In this work however, we describe a deep,
semantically oriented method based on semantic networks which
are derived by a syntactico-semantic parser. Semantically identical
or similar semantic networks for each sentence of a given base
text are efficiently retrieved by using a specialized semantic
network index. In order to detect many kinds of paraphrases the
current base semantic network is varied by applying inferences:
lexico-semantic relations, relation axioms, and meaning postulates.
Some important phenomena occurring in difficult-to-detect duplicates
are discussed. The deep approach profits from background knowledge,
whose acquisition from corpora like Wikipedia is explained briefly.
This deep duplicate recognizer is combined with two shallow duplicate
recognizers in order to guarantee high recall for texts which
are not fully parsable. The evaluation shows that the combined
approach preserves recall and increases precision considerably,
in comparison to traditional shallow methods. For the evaluation,
a standard corpus of German plagiarisms was extended by four diverse
components with an emphasis on duplicates (and not just plagiarisms),
e.g., news feed articles from different web sources and two translations
of the same short story.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-ieee.pdf},
website = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5587838&abstractAccess=no&userType=inst},
year = {2010}
}
September, 2010.
Semantic Duplicate Identification with Parsing and Machine Learning. Proceedings of the 13th International Conference on Text, Speech
and Dialogue (TSD 2010), 6231:84–92.
BibTeX
@inproceedings{vor:der:Brueck:Hartrumpf:Eichhorn:2010:b,
author = {Hartrumpf, Sven and vor der Brück, Tim and Eichhorn, Christian},
title = {Semantic Duplicate Identification with Parsing and Machine Learning},
booktitle = {Proceedings of the 13th International Conference on Text, Speech
and Dialogue (TSD 2010)},
editor = {Petr Sojka and Aleš Horák and Ivan Kopeček and Karel Pala},
volume = {6231},
series = {Lecture Notes in Artificial Intelligence},
pages = {84--92},
address = {Brno, Czech Republic},
abstract = {Identifying duplicate texts is important in many areas like plagiarism
detection, information retrieval, text summarization, and question
answering. Current approaches are mostly surface-oriented (or
use only shallow syntactic representations) and see each text
only as a token list. In this work however, we describe a deep,
semantically oriented method based on semantic networks which
are derived by a syntacticosemantic parser. Semantically identical
or similar semantic networks for each sentence of a given base
text are efficiently retrieved by using a specialized index. In
order to detect many kinds of paraphrases the semantic networks
of a candidate text are varied by applying inferences: lexico-
semantic relations, relation axioms, and meaning postulates. Important
phenomena occurring in difficult duplicates are discussed. The
deep approach profits from background knowledge, whose acquisition
from corpora is explained briefly. The deep duplicate recognizer
is combined with two shallow duplicate recognizers in order to
guarantee a high recall for texts which are not fully parsable.
The evaluation shows that the combined approach preserves recall
and increases precision considerably in comparison to traditional
shallow methods.},
month = {September},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/semdupl-paper.pdf},
website = {http://link.springer.com/chapter/10.1007/978-3-642-15760-8_12},
year = {2010}
}
2010.
Retrieving Meronyms from Texts Using An Automated Theorem Prover. Journal for Language Technology and Computational
Linguistics (JLCL), 25(1):57–81.
BibTeX
@article{vor:der:Brueck:Helbig:2010:b,
author = {vor der Brück, Tim and Helbig, Hermann},
title = {Retrieving Meronyms from Texts Using An Automated Theorem Prover},
journal = {Journal for Language Technology and Computational
Linguistics (JLCL)},
volume = {25},
number = {1},
pages = {57--81},
abstract = {In this paper we present a truly semantic-oriented approach for
meronymy relation extraction. It directly operates, instead of
syntactic trees or surface representations, on semantic networks
(SNs). These SNs are derived from texts (in our case, the German
Wikip edia) by a deep linguistic syntactico-semantic analysis.
The extraction of meronym/holonym pairs is carried out by using,
among other components, an automated theorem prover, whose work
is based on a set of logical axioms. The corresponding algorithm
is combined with a shallow approach enriched with semantic information.
Through the employment of logical methods, the recall and precision
of the semantic patterns pertinent to the extracted relations
can be increased considerably.},
pdf = {http://www.jlcl.org/2010_Heft1/tim_vorderbrueck.pdf},
year = {2010}
}
July, 2010.
Introducing the Bielefeld SaGA Corpus.
BibTeX
@misc{Luecking:Bergmann:2010,
author = {Andy L\"{u}cking and Kirsten Bergmann},
title = {Introducing the {B}ielefeld {SaGA} Corpus},
howpublished = {Talk given at \textit{Gesture: Evolution, Brain, and
Linguistic Structures.} 4th Conference of the
International Society for Gesture Studies (ISGS).
Europa Universit\"{a}t Viadrina Frankfurt/Oder},
abstract = {People communicate multimodally. Most prominently, they co-produce
speech and gesture. How do they do that? Studying the interplay
of both modalities has to be informed by empirically observed
communication behavior. We present a corpus built of speech and
gesture data gained in a controlled study. We describe 1) the
setting underlying the data; 2) annotation of the data; 3) reliability
evalution methods and results; and 4) applications of the corpus
in the research domain of speech and gesture alignment.},
address = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
day = {28},
month = {07},
year = {2010}
}
July, 2010.
A Semantic Account for Iconic Gestures. Gesture: Evolution, Brain, and Linguistic Structures, 210.
BibTeX
@inproceedings{Luecking:2010,
author = {Lücking, Andy},
title = {A Semantic Account for Iconic Gestures},
booktitle = {Gesture: Evolution, Brain, and Linguistic Structures},
pages = {210},
address = {Europa Universit{\"a}t Viadrina Frankfurt/Oder},
organization = {4th Conference of the International Society for
Gesture Studies (ISGS)},
keywords = {own},
month = {7},
pdf = {https://pub.uni-bielefeld.de/download/2318565/2319962},
website = {http://pub.uni-bielefeld.de/publication/2318565},
year = {2010}
}
May, 2010.
The Bielefeld Speech and Gesture Alignment Corpus (SaGA). Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality, 92–98.
BibTeX
@inproceedings{Luecking:et:al:2010,
author = {Lücking, Andy and Bergmann, Kirsten and Hahn, Florian and Kopp, Stefan
and Rieser, Hannes},
title = {The Bielefeld Speech and Gesture Alignment Corpus (SaGA)},
booktitle = {Multimodal Corpora: Advances in Capturing, Coding and Analyzing Multimodality},
pages = {92--98},
address = {Malta},
organization = {7th International Conference for Language Resources
and Evaluation (LREC 2010)},
abstract = {People communicate multimodally. Most prominently, they co-produce
speech and gesture. How do they do that? Studying the interplay
of both modalities has to be informed by empirically observed
communication behavior. We present a corpus built of speech and
gesture data gained in a controlled study. We describe 1) the
setting underlying the data; 2) annotation of the data; 3) reliability
evalution methods and results; and 4) applications of the corpus
in the research domain of speech and gesture alignment.},
keywords = {own},
month = {5},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/saga-corpus.pdf},
website = {http://pub.uni-bielefeld.de/publication/2001935},
year = {2010}
}
2010.
English to Bangla Phrase – Based Machine Translation. The 14th Annual Conference of The European Association for Machine
Translation. Saint-Raphaël, France, 27-28 May.
BibTeX
@inproceedings{Zahurul:Tiedemann:Eisele:2010,
author = {Islam, Md. Zahurul and Tiedemann, Jörg and Eisele, Andreas},
title = {English to Bangla Phrase – Based Machine Translation},
booktitle = {The 14th Annual Conference of The European Association for Machine
Translation. Saint-Raphaël, France, 27-28 May},
owner = {zahurul},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/English_to_Bangla_Phrase–Based_Machine_Translation.pdf},
timestamp = {2011.08.02},
year = {2010}
}
May, 2010.
GermanPolarityClues: A Lexical Resource for German Sentiment Analysis. Proceedings of the Seventh conference on International Language
Resources and Evaluation (LREC '10).
BibTeX
@inproceedings{Waltinger:2010:a,
author = {Waltinger, Ulli},
title = {GermanPolarityClues: A Lexical Resource for German Sentiment Analysis},
booktitle = {Proceedings of the Seventh conference on International Language
Resources and Evaluation (LREC '10)},
editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Bente Maegaard
and Joseph Mariani and Jan Odjik, Stelios Piperidis and Mike Rosner
and Daniel Tapias},
address = {Valletta, Malta},
publisher = {European Language Resources Association (ELRA)},
date_0 = {2010-05},
isbn = {2-9517408-6-7},
language = {english},
month = {may},
pdf = {http://www.ulliwaltinger.de/pdf/91_Paper.pdf},
website = {http://www.ulliwaltinger.de/sentiment/},
year = {2010}
}
2010.
Towards a Simulation Model of Dialogical Alignment. Proceedings of the 8th International Conference on the Evolution
of Language (Evolang8), 14-17 April 2010, Utrecht, 238–245.
BibTeX
@inproceedings{Mehler:Weiss:Menke:Luecking:2010,
author = {Mehler, Alexander and Wei{\ss}, Petra and Menke, Peter and Lücking, Andy},
title = {Towards a Simulation Model of Dialogical Alignment},
booktitle = {Proceedings of the 8th International Conference on the Evolution
of Language (Evolang8), 14-17 April 2010, Utrecht},
pages = {238-245},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Alexander_Mehler_Petra_Weiss_Peter_Menke_Andy_Luecking.pdf},
website = {http://www.let.uu.nl/evolang2010.nl/},
year = {2010}
}
2010.
On the Notion of Genre in Digital Preservation. Automation in Digital Preservation.
BibTeX
@inproceedings{Foscarini:Kim:Lee:Mehler:Oliver:Ross:2010,
author = {Foscarini, Fiorella and Kim, Yunhyong and Lee, Christopher A.
and Mehler, Alexander and Oliver, Gillian and Ross, Seamus},
title = {On the Notion of Genre in Digital Preservation},
booktitle = {Automation in Digital Preservation},
editor = {Chanod, Jean-Pierre and Dobreva, Milena and Rauber, Andreas and Ross, Seamus},
number = {10291},
series = {Dagstuhl Seminar Proceedings},
address = {Dagstuhl, Germany},
publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik,
Germany},
annote = {Keywords: Digital preservation, genre analysis,
context modeling, diplomatics, information retrieval},
issn = {1862-4405},
pdf = {http://drops.dagstuhl.de/opus/volltexte/2010/2763/pdf/10291.MehlerAlexander.Paper.2763.pdf},
website = {http://drops.dagstuhl.de/opus/volltexte/2010/2763},
year = {2010}
}
2010.
Time Series of Linguistic Networks by Example of the Patrologia Latina. Proceedings of INFORMATIK 2010: Service Science, September 27
- October 01, 2010, Leipzig, 2:609–616.
BibTeX
@inproceedings{Mehler:Gleim:Waltinger:Diewald:2010,
author = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Diewald, Nils},
title = {Time Series of Linguistic Networks by Example of the Patrologia Latina},
booktitle = {Proceedings of INFORMATIK 2010: Service Science, September 27
- October 01, 2010, Leipzig},
editor = {F{\"a}hnrich, Klaus-Peter and Franczyk, Bogdan},
volume = {2},
series = {Lecture Notes in Informatics},
pages = {609-616},
publisher = {GI},
pdf = {http://subs.emis.de/LNI/Proceedings/Proceedings176/586.pdf},
year = {2010}
}
2010.
eHumanities Desktop - An Architecture for Flexible Annotation
in Iconographic Research. Proceedings of the 6th International Conference on Web Information
Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia.
BibTeX
@inproceedings{Gleim:Warner:Mehler:2010,
author = {Gleim, Rüdiger and Warner, Paul and Mehler, Alexander},
title = {eHumanities Desktop - An Architecture for Flexible Annotation
in Iconographic Research},
booktitle = {Proceedings of the 6th International Conference on Web Information
Systems and Technologies (WEBIST '10), April 7-10, 2010, Valencia},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_warner_mehler_2010.pdf},
website = {https://www.researchgate.net/publication/220724277_eHumanities_Desktop_-_An_Architecture_for_Flexible_Annotation_in_Iconographic_Research},
year = {2010}
}
2010.
The Ariadne System: A flexible and extensible framework for the
modeling and storage of experimental data in the humanities. Proceedings of LREC 2010.
BibTeX
@inproceedings{Menke:Mehler:2010,
author = {Menke, Peter and Mehler, Alexander},
title = {The Ariadne System: A flexible and extensible framework for the
modeling and storage of experimental data in the humanities},
booktitle = {Proceedings of LREC 2010},
address = {Malta},
publisher = {ELDA},
abstract = {This paper introduces the Ariadne Corpus Management System. First,
the underlying data model is presented which enables users to
represent and process heterogeneous data sets within a single,
consistent framework. Secondly, a set of automatized procedures
is described that offers assistance to researchers in various
data-related use cases. Finally, an approach to easy yet powerful
data retrieval is introduced in form of a specialised querying
language for multimodal data.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/menke_mehler_2010.pdf},
website = {http://arnetminer.org/publication/the-ariadne-system-a-flexible-and-extensible-framework-for-the-modeling-and-storage-of-experimental-data-in-the-humanities-2839925.html},
year = {2010}
}
2010.
Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplinären
Medienwissenschaft. In: Medienwandel als Wandel von Interaktionsformen, 7–16.
VS Verlag für Sozialwissenschaften.
BibTeX
@inbook{Sutter2010,
author = {Sutter, Tilmann and Mehler, Alexander},
editor = {Sutter, Tilmann and Mehler, Alexander},
title = {Einleitung: Der aktuelle Medienwandel im Blick einer interdisziplin{\"a}ren
Medienwissenschaft},
pages = {7--16},
publisher = {VS Verlag f{\"u}r Sozialwissenschaften},
address = {Wiesbaden},
abstract = {Die Herausforderung, die der Wandel von Kommunikationsmedien f{\"u}r
die Medienwissenschaft darstellt, resultiert nicht nur aus der
ungeheuren Beschleunigung des Medienwandels. Die Herausforderung
stellt sich auch mit der Frage, welches die neuen Formen und Strukturen
sind, die aus dem Wandel der Medien hervorgehen. R{\"u}ckt man
diese Frage in den Fokus der {\"U}berlegungen, kommen erstens
Entwicklungen im Wechsel von Massenmedien zu neuen, „interaktiven``
Medien in den Blick. Dies betrifft den Wandel von den alten Medien
in Form von Einwegkommunikation zu den neuen Medien in Form von
Netzkommunikation. Dieser Wandel wurde in zahlreichen Analysen
als eine Revolution beschrieben: Im Unterschied zur einseitigen,
r{\"u}ckkopplungsarmen Kommunikationsform der Massenmedien sollen
neue, computergest{\"u}tzte Formen der Medienkommunikation „interaktiv``
sein, d.h. gesteigerte R{\"u}ckkopplungs- und Eingriffsm{\"o}glichkeiten
f{\"u}r die Adressaten und Nutzer bieten. Sozialwissenschaftlich
bedeutsam ist dabei die Einsch{\"a}tzung der Qualit{\"a}t und
des Umfangs dieser neuen M{\"o}glichkeiten und Leistungen. Denn
bislang bedeutete Medienwandel im Kern eine zunehmende Ausdifferenzierung
alter und neuer Medien mit je spezifischen Leistungen, d.h. neue
Medien ersetzen die {\"a}lteren nicht, sondern sie erg{\"a}nzen
und erweitern sie. Allerdings wird im Zuge des aktuellen Medienwandels
immer deutlicher, dass die neuen Medien durchaus imstande sind,
die Leistungen massenmedialer Verbreitung von Kommunikation zu
{\"u}bernehmen. Stehen wir also, wie das schon seit l{\"a}ngerem
k{\"u}hn vorhergesagt wird, vor der Etablierung eines Universalmediums,
das in der Lage ist, die Formen und Funktionen anderer Medien
zu {\"u}bernehmen?},
booktitle = {Medienwandel als Wandel von Interaktionsformen},
doi = {10.1007/978-3-531-92292-8_1},
isbn = {978-3-531-92292-8},
url = {https://doi.org/10.1007/978-3-531-92292-8_1},
year = {2010}
}
2010.
Computing Semantic Similarity from Bilingual Dictionaries. Proceedings of the 10th International Conference on the Statistical
Analysis of Textual Data (JADT-2010), 1217–1225.
BibTeX
@inproceedings{Eger:Sejane:2010,
author = {Eger, Steffen and Sejane, Ineta},
title = {Computing Semantic Similarity from Bilingual Dictionaries},
booktitle = {Proceedings of the 10th International Conference on the Statistical
Analysis of Textual Data (JADT-2010)},
pages = {1217-1225},
address = {Rome, Italy},
publisher = {JADT-2010},
pdf = {http://www.ledonline.it/ledonline/JADT-2010/allegati/JADT-2010-1217-1226_167-Eger.pdf},
year = {2010}
}
2010.
Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels. Proceedings of the Ninth International Conference on Machine Learning
and Applications (ICMLA), 243–250.
BibTeX
@inproceedings{vor:der:Brueck:Helbig:2010:a,
author = {vor der Brück, Tim and Helbig, Hermann},
title = {Validating Meronymy Hypotheses with Support Vector Machines and Graph Kernels},
booktitle = {Proceedings of the Ninth International Conference on Machine Learning
and Applications (ICMLA)},
pages = {243--250},
address = {Washington, D.C.},
publisher = {IEEE Press},
abstract = {There is a substantial body of work on the extraction of relations
from texts, most of which is based on pattern matching or on applying
tree kernel functions to syntactic structures. Whereas pattern
application is usually more efficient, tree kernels can be superior
when assessed by the F-measure. In this paper, we introduce a
hybrid approach to extracting meronymy relations, which is based
on both patterns and kernel functions. In a first step, meronymy
relation hypotheses are extracted from a text corpus by applying
patterns. In a second step these relation hypotheses are validated
by using several shallow features and a graph kernel approach.
In contrast to other meronymy extraction and validation methods
which are based on surface or syntactic representations we use
a purely semantic approach based on semantic networks. This involves
analyzing each sentence of the Wikipedia corpus by a deep syntactico-semantic
parser and converting it into a semantic network. Meronymy relation
hypotheses are extracted from the semantic networks by means of
an automated theorem prover, which employs a set of logical axioms
and patterns in the form of semantic networks. The meronymy candidates
are then validated by means of a graph kernel approach based on
common walks. The evaluation shows that this method achieves considerably
higher accuracy, recall, and F-measure than a method using purely
shallow validation.},
website = {http://www.computer.org/csdl/proceedings/icmla/2010/4300/00/4300a243-abs.html},
year = {2010}
}
2009
2009.
Riding the Rough Waves of Genre on the Web: Concepts and Research Questions. Genres on the Web: Computational Models and Empirical Studies, 3–32.
BibTeX
@incollection{Santini:Mehler:Sharoff:2009,
author = {Santini, Marina and Mehler, Alexander and Sharoff, Serge},
title = {Riding the Rough Waves of Genre on the Web: Concepts and Research Questions},
booktitle = {Genres on the Web: Computational Models and Empirical Studies},
publisher = {Springer},
editor = {Mehler, Alexander and Sharoff, Serge and Santini, Marina},
pages = {3-32},
address = {Berlin/New York},
abstract = {This chapter outlines the state of the art of empirical and computational
webgenre research. First, it highlights why the concept of genre
is profitable for a range of disciplines. At the same time, it
lists a number of recent interpretations that can inform and influence
present and future genre research. Last but not least, it breaks
down a series of open issues that relate to the modelling of the
concept of webgenre in empirical and computational studies.},
year = {2009}
}
2009.
eHumanities Desktop – eine webbasierte Arbeitsumgebung für die
geisteswissenschaftliche Fachinformatik. Proceedings of the Symposium "Sprachtechnologie und eHumanities",
26.–27. Februar, Duisburg-Essen University.
BibTeX
@inproceedings{Mehler:Gleim:Waltinger:Ernst:Esch:Feith:2009,
author = {Mehler, Alexander and Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra
and Esch, Dietmar and Feith, Tobias},
title = {eHumanities Desktop – eine webbasierte Arbeitsumgebung für die
geisteswissenschaftliche Fachinformatik},
booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
26.–27. Februar, Duisburg-Essen University},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_gleim_waltinger_ernst_esch_feith_2009.pdf},
website = {http://duepublico.uni-duisburg-essen.de/servlets/DocumentServlet?id=37041},
year = {2009}
}
2009.
Bausteine eines Literary Memory Information System (LiMeS) am
Beispiel der Kafka-Forschung. Proceedings of the Symposium "Sprachtechnologie und eHumanities",
26.–27. Februar, Duisburg-Essen University.
BibTeX
@inproceedings{Wagner:Mehler:Wolff:Dotzler:2009,
author = {Wagner, Benno and Mehler, Alexander and Wolff, Christian and Dotzler, Bernhard},
title = {Bausteine eines Literary Memory Information System (LiMeS) am
Beispiel der Kafka-Forschung},
booktitle = {Proceedings of the Symposium "Sprachtechnologie und eHumanities",
26.–27. Februar, Duisburg-Essen University},
abstract = {In dem Paper beschreiben wir Bausteine eines Literary Memory Information
System (LiMeS), das die literaturwissenschaftliche Erforschung
von so genannten Matrixtexten – das sind Prim{\"a}rtexte eines
bestimmten literarischen Gesamtwerks – unter dem Blickwinkel gro{\ss}er
Mengen so genannter Echotexte (Topia 1984; Wagner/Reinhard 2007)
– das sind Subtexte im Sinne eines literaturwissenschaftlichen
Intertextualit{\"a}tsbegriffs – ermöglicht. Den Ausgangspunkt
dieses computerphilologischen Informationssystems bildet ein Text-Mining-Modell
basierend auf dem Intertextualit{\"a}tsbegriff in Verbindung mit
dem Begriff des Semantic Web (Mehler, 2004b, 2005a, b, Wolff 2005).
Wir zeigen, inwiefern dieses Modell über bestehende Informationssystemarchitekturen
hinausgeht und schlie{\ss}en einen Brückenschlag zur derzeitigen
Entwicklung von Arbeitsumgebungen in der geisteswissenschaftlichen
Fachinformatik in Form eines eHumanities Desktop.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/wagner_mehler_wolff_dotzler_2009.pdf},
website = {http://epub.uni-regensburg.de/6795/},
year = {2009}
}
2009.
A Two-Level Approach to Web Genre Classification. Proceedings of the 5th International Conference on Web Information
Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa.
BibTeX
@inproceedings{Waltinger:Mehler:Wegner:2009,
author = {Waltinger, Ulli and Mehler, Alexander and Wegner, Armin},
title = {A Two-Level Approach to Web Genre Classification},
booktitle = {Proceedings of the 5th International Conference on Web Information
Systems and Technologies (WEBIST '09), March 23-26, 2009, Lisboa},
abstract = {This paper presents an approach of two-level categorization of
web pages. In contrast to related approaches the model additionally
explores and categorizes functionally and thematically demarcated
segments of the hypertext types to be categorized. By classifying
these segments conclusions can be drawn about the type of the
corresponding compound web document.},
pdf = {http://www.ulliwaltinger.de/pdf/Webist_2009_TwoLevel_Genre_Classification_WaltingerMehlerWegner.pdf},
year = {2009}
}
2009.
Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types. Linguistic Modeling of Information and Markup Languages. Contributions
to Language Technology.
BibTeX
@incollection{Mehler:2009:b,
author = {Mehler, Alexander},
title = {Structure Formation in the Web. A Graph-Theoretical Model of Hypertext Types},
booktitle = {Linguistic Modeling of Information and Markup Languages. Contributions
to Language Technology},
publisher = {Springer},
editor = {Witt, Andreas and Metzing, Dieter},
series = {Text, Speech and Language Technology},
address = {Dordrecht},
abstract = {In this chapter we develop a representation model of web document
networks. Based on the notion of uncertain web document structures,
the model is defined as a template which grasps nested manifestation
levels of hypertext types. Further, we specify the model on the
conceptual, formal and physical level and exemplify it by reconstructing
competing web document models.},
website = {http://www.springerlink.com/content/t27782w8j2125112/},
year = {2009}
}
2009.
eHumanities Desktop – An extensible Online System for Corpus Management
and Analysis. 5th Corpus Linguistics Conference, University of Liverpool.
BibTeX
@inproceedings{Gleim:Mehler:Waltinger:Menke:2009,
author = {Gleim, Rüdiger and Mehler, Alexander and Waltinger, Ulli and Menke, Peter},
title = {eHumanities Desktop – An extensible Online System for Corpus Management
and Analysis},
booktitle = {5th Corpus Linguistics Conference, University of Liverpool},
abstract = {This paper presents the eHumanities Desktop - an online system
for corpus management and analysis in support of computing in
the humanities. Design issues and the overall architecture are
described, as well as an outline of the applications offered by
the system.},
pdf = {http://www.ulliwaltinger.de/pdf/eHumanitiesDesktop-AnExtensibleOnlineSystem-CL2009.pdf},
website = {http://www.ulliwaltinger.de/ehumanities-desktop-an-extensible-online-system-for-corpus-management-and-analysis/},
year = {2009}
}
2009.
A Structural Model of Semiotic Alignment: The Classification of
Multimodal Ensembles as a Novel Machine Learning Task. Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya.
BibTeX
@inproceedings{Mehler:Luecking:2009,
author = {Mehler, Alexander and Lücking, Andy},
title = {A Structural Model of Semiotic Alignment: The Classification of
Multimodal Ensembles as a Novel Machine Learning Task},
booktitle = {Proceedings of IEEE Africon 2009, September 23-25, Nairobi, Kenya},
publisher = {IEEE},
abstract = {In addition to the well-known linguistic alignment processes in
dyadic communication – e.g., phonetic, syntactic, semantic alignment
– we provide evidence for a genuine multimodal alignment process,
namely semiotic alignment. Communicative elements from different
modalities 'routinize into' cross-modal 'super-signs', which we
call multimodal ensembles. Computational models of human communication
are in need of expressive models of multimodal ensembles. In this
paper, we exemplify semiotic alignment by means of empirical examples
of the building of multimodal ensembles. We then propose a graph
model of multimodal dialogue that is expressive enough to capture
multimodal ensembles. In line with this model, we define a novel
task in machine learning with the aim of training classifiers
that can detect semiotic alignment in dialogue. This model is
in support of approaches which need to gain insights into realistic
human-machine communication.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_luecking_2009.pdf},
website = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?reload=true&arnumber=5308098},
year = {2009}
}
2009.
Generalized Shortest Paths Trees: A Novel Graph Class Applied
to Semiotic Networks. Analysis of Complex Networks: From Biology to Linguistics, 175–220.
BibTeX
@incollection{Mehler:2009:c,
author = {Mehler, Alexander},
title = {Generalized Shortest Paths Trees: A Novel Graph Class Applied
to Semiotic Networks},
booktitle = {Analysis of Complex Networks: From Biology to Linguistics},
publisher = {Wiley-VCH},
editor = {Dehmer, Matthias and Emmert-Streib, Frank},
pages = {175-220},
address = {Weinheim},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2009_b.pdf},
website = {https://www.researchgate.net/publication/255666602_1_Generalised_Shortest_Paths_Trees_A_Novel_Graph_Class_Applied_to_Semiotic_Networks},
year = {2009}
}
2009.
A Readability Checker Based on Deep Semantic Indicators. Human Language Technology. Challenges of the Information Society, 5603:232–244.
BibTeX
@incollection{vor:der:Brueck:Hartrumpf:2009,
author = {vor der Brück, Tim and Hartrumpf, Sven},
title = {A Readability Checker Based on Deep Semantic Indicators},
booktitle = {Human Language Technology. Challenges of the Information Society},
publisher = {Springer},
editor = {Zygmunt Vetulani and Hans Uszkoreit},
volume = {5603},
series = {Lecture Notes in Computer Science (LNCS)},
pages = {232--244},
address = {Berlin, Germany},
abstract = {One major reason that readability checkers are still far away
from judging the understandability of texts consists in the fact
that no semantic information is used. Syntactic, lexical, or morphological
information can only give limited access for estimating the cognitive
difficulties for a human being to comprehend a text. In this paper
however, we present a readability checker which uses semantic
information in addition. This information is represented as semantic
networks and is derived by a deep syntactico-semantic analysis.
We investigate in which situations a semantic readability indicator
can lead to superior results in comparison with ordinary surface
indicators like sentence length. Finally, we compute the weights
of our semantic indicators in the readability function based on
the user ratings collected in an online evaluation.},
website = {http://rd.springer.com/chapter/10.1007/978-3-642-04235-5_20},
year = {2009}
}
2009.
Hypernymy Extraction Based on Shallow and Deep Patterns. From Form To Meaning: Processing Texts Automatically, Proceedings
of the Biennial GSCL Conference 2009, 41–52.
BibTeX
@inproceedings{vor:der:Brueck:2009:b,
author = {vor der Brück, Tim},
title = {Hypernymy Extraction Based on Shallow and Deep Patterns},
booktitle = {From Form To Meaning: Processing Texts Automatically, Proceedings
of the Biennial GSCL Conference 2009},
editor = {Christian Chiarcos and Richard Eckart de Castilho},
pages = {41--52},
address = {Potsdam, Germany},
abstract = {There exist various approaches to construct taxonomies by text
mining. Usually these approaches are based on supervised learning
and extract in a first step several patterns. These patterns are
then applied to previously unseen texts and used to recognize
hypernym/hyponym pairs. Normally these approaches are only based
on a surface representation or a syntactic tree structure, i.e.,
a constituency or dependency tree derived by a syntactical parser.
In this work we present an approach which, additionally to shallow
patterns, directly operates on semantic networks which are derived
by a deep linguistic syntacticosemantic analysis. Furthermore,
the shallow approach heavily depends on semantic information,
too. It is shown that recall and precision can be improved considerably
than by relying on shallow patterns alone.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gscl09_12_brueck.pdf},
year = {2009}
}
2009.
Cross-lingual Alignment and Completion of Wikipedia Templates. Third International Workshop on Cross Lingual Information Access:
Addressing the Information Need of Multilingual Societies (CLIAWS3),
Boulder, Colorado, USA, June 4.
BibTeX
@inproceedings{Bouma:Duarte:Zahurul:2009,
author = {Bouma, Gosse and Duarte, Sergio and Islam, Md. Zahurul},
title = {Cross-lingual Alignment and Completion of Wikipedia Templates},
booktitle = {Third International Workshop on Cross Lingual Information Access:
Addressing the Information Need of Multilingual Societies (CLIAWS3),
Boulder, Colorado, USA, June 4},
abstract = {For many languages, the size of Wikipedia is an order of magnitude
smaller than the English Wikipedia. We present a method for cross-lingual
alignment of template and infobox attributes in Wikipedia. The
alignment is used to add and complete templates and infoboxes
in one language with information derived from Wikipedia in another
language. We show that alignment between English and Dutch Wikipedia
is accurate and that the result can be used to expand the number
of template attribute-value pairs in Dutch Wikipedia by 50\%.
Furthermore, the alignment provides valuable information for normalization
of template and attribute names and can be used to detect potential
inconsistencies},
owner = {zahurul},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Cross-lingual_Alignment_and_Completion_of_Wikipedia_Templates.pdf},
timestamp = {2011.08.02},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.148.1418},
year = {2009}
}
2009.
Polarity Reinforcement: Sentiment Polarity Identification By Means
Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.
BibTeX
@inproceedings{Waltinger:2009:a,
author = {Waltinger, Ulli},
title = {Polarity Reinforcement: Sentiment Polarity Identification By Means
Of Social Semantics},
booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
date_0 = {2009},
pdf = {http://www.ulliwaltinger.de/pdf/AfriconIEEE_2009_SentimentPolarity_Waltinger.pdf},
website = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5308104},
year = {2009}
}
2009.
From Social Networks To Distributional Properties: A Comparative
Study On Computing Semantic Relatedness. Proceedings of the 31th Annual Conference of the Cognitive Science Society, 3016–3021.
BibTeX
@inproceedings{Waltinger:Cramer:Wandmacher:2009:a,
author = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
title = {From Social Networks To Distributional Properties: A Comparative
Study On Computing Semantic Relatedness},
booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
editor = {Taatgen, N.A. and van Rijn, H.},
pages = {3016-3021},
address = {Austin, TX},
publisher = {Cognitive Science Society},
date_0 = {2009},
pdf = {http://csjarchive.cogsci.rpi.edu/proceedings/2009/papers/661/paper661.pdf},
year = {2009}
}
2009.
Polarity Reinforcement: Sentiment Polarity Identification By Means
Of Social Semantics. Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya.
BibTeX
@inproceedings{Waltinger:2009:b,
author = {Waltinger, Ulli},
title = {Polarity Reinforcement: Sentiment Polarity Identification By Means
Of Social Semantics},
booktitle = {Proceedings of the IEEE Africon 2009, September 23-25, Nairobi, Kenya},
date_0 = {2009},
year = {2009}
}
2009.
From Social Networks To Distributional Properties: A Comparative
Study On Computing Semantic Relatedness. Proceedings of the 31th Annual Conference of the Cognitive Science Society, 3016–3021.
BibTeX
@inproceedings{Waltinger:Cramer:Wandmacher:2009:b,
author = {Waltinger, Ulli and Cramer, Irene and Wandmacher, Tonio},
title = {From Social Networks To Distributional Properties: A Comparative
Study On Computing Semantic Relatedness},
booktitle = {Proceedings of the 31th Annual Conference of the Cognitive Science Society},
editor = {N.A. Taatgen and H. van Rijn},
pages = {3016-3021},
address = {Austin, TX},
publisher = {Cognitive Science Society},
date_0 = {2009},
year = {2009}
}
2009.
Enhancing Document Modeling by Means of Open Topic Models: Crossing
the Frontier of Classification Schemes in Digital Libraries by
Example of the DDC. Library Hi Tech, 27(4):520–539.
BibTeX
@article{Mehler:Waltinger:2009:b,
author = {Mehler, Alexander and Waltinger, Ulli},
title = {Enhancing Document Modeling by Means of Open Topic Models: Crossing
the Frontier of Classification Schemes in Digital Libraries by
Example of the DDC},
journal = {Library Hi Tech},
volume = {27},
number = {4},
pages = {520-539},
abstract = {Purpose: We present a topic classification model using the Dewey
Decimal Classification (DDC) as the target scheme. This is done
by exploring metadata as provided by the Open Archives Initiative
(OAI) to derive document snippets as minimal document representations.
The reason is to reduce the effort of document processing in digital
libraries. Further, we perform feature selection and extension
by means of social ontologies and related web-based lexical resources.
This is done to provide reliable topic-related classifications
while circumventing the problem of data sparseness. Finally, we
evaluate our model by means of two language-specific corpora.
This paper bridges digital libraries on the one hand and computational
linguistics on the other. The aim is to make accessible computational
linguistic methods to provide thematic classifications in digital
libraries based on closed topic models as the DDC. Design/methodology/approach:
text classification, text-technology, computational linguistics,
computational semantics, social semantics. Findings: We show that
SVM-based classifiers perform best by exploring certain selections
of OAI document metadata. Research limitations/implications: The
findings show that it is necessary to further develop SVM-based
DDC-classifiers by using larger training sets possibly for more
than two languages in order to get better F-measure values. Practical
implications: We can show that DDC-classifications come into reach
which primarily explore OAI metadata. Originality/value: We provide
algorithmic and formal-mathematical information how to build DDC-classifiers
for digital libraries.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_waltinger_2009_b.pdf},
website = {http://biecoll.ub.uni-bielefeld.de/frontdoor.php?source_opus=5001&la=de},
year = {2009}
}
2009.
The eHumanities Desktop – An Online System for Corpus Management
and Analysis in Support of Computing in the Humanities. Proceedings of the Demonstrations Session of the 12th Conference
of the European Chapter of the Association for Computational Linguistics
EACL 2009, 30 March – 3 April, Athens.
BibTeX
@inproceedings{Gleim:Waltinger:Ernst:Mehler:Esch:Feith:2009,
author = {Gleim, Rüdiger and Waltinger, Ulli and Ernst, Alexandra and Mehler, Alexander
and Esch, Dietmar and Feith, Tobias},
title = {The eHumanities Desktop – An Online System for Corpus Management
and Analysis in Support of Computing in the Humanities},
booktitle = {Proceedings of the Demonstrations Session of the 12th Conference
of the European Chapter of the Association for Computational Linguistics
EACL 2009, 30 March – 3 April, Athens},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_waltinger_ernst_mehler_esch_feith_2009.pdf},
year = {2009}
}
2009.
Artifizielle Interaktivität. Eine semiotische Betrachtung. Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
zum Web 2.0.
BibTeX
@incollection{Mehler:2009:d,
author = {Mehler, Alexander},
title = {Artifizielle Interaktivit{\"a}t. Eine semiotische Betrachtung},
booktitle = {Medienwandel als Wandel von Interaktionsformen – von frühen Medienkulturen
zum Web 2.0},
publisher = {VS},
editor = {Sutter, Tilmann and Mehler, Alexander},
address = {Wiesbaden},
year = {2009}
}
2009.
The Feature Difference Coefficient: Classification by Means of
Feature Distributions. Proceedings of the Conference on Text Mining Services (TMS 2009), 159–168.
BibTeX
@inproceedings{Waltinger:Mehler:2009:a,
author = {Waltinger, Ulli and Mehler, Alexander},
title = {The Feature Difference Coefficient: Classification by Means of
Feature Distributions},
booktitle = {Proceedings of the Conference on Text Mining Services (TMS 2009)},
series = {Leipziger Beitr{\"a}ge zur Informatik: Band XIV},
pages = {159–168},
address = {Leipzig},
publisher = {Leipzig University},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/waltinger_mehler_2009_a.pdf},
year = {2009}
}
2009.
Automatic Genre Identification: Issues and Prospects.
Journal for Language Technology and Computational
Linguistics (JLCL), 24(1).
GSCL.
BibTeX
@book{Santini:Rehm:Sharoff:Mehler:2009,
author = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
editor = {Santini, Marina and Rehm, Georg and Sharoff, Serge and Mehler, Alexander},
title = {Automatic Genre Identification: Issues and Prospects},
publisher = {GSCL},
volume = {24(1)},
series = {Journal for Language Technology and Computational
Linguistics (JLCL)},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/AutomaticGenreIdentification.png},
pagetotal = {148},
pdf = {http://www.jlcl.org/2009_Heft1/JLCL24(1).pdf},
year = {2009}
}
2009.
Social Semantics And Its Evaluation By Means of Closed Topic Models:
An SVM-Classification Approach Using Semantic Feature Replacement
By Topic Generalization. Proceedings of the Biennial GSCL Conference 2009, September 30
– October 2, Universität Potsdam.
BibTeX
@inproceedings{Waltinger:Mehler:Gleim:2009:a,
author = {Waltinger, Ulli and Mehler, Alexander and Gleim, Rüdiger},
title = {Social Semantics And Its Evaluation By Means of Closed Topic Models:
An SVM-Classification Approach Using Semantic Feature Replacement
By Topic Generalization},
booktitle = {Proceedings of the Biennial GSCL Conference 2009, September 30
– October 2, Universit{\"a}t Potsdam},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/GSCL_2009_WaltingerMehlerGleim_camera_ready.pdf},
year = {2009}
}
2009.
Social Semantics and Its Evaluation By Means Of Semantic Relatedness
And Open Topic Models. IEEE/WIC/ACM International Conference on Web Intelligence, September
15–18, Milano.
BibTeX
@inproceedings{Waltinger:Mehler:2009:c,
author = {Waltinger, Ulli and Mehler, Alexander},
title = {Social Semantics and Its Evaluation By Means Of Semantic Relatedness
And Open Topic Models},
booktitle = {IEEE/WIC/ACM International Conference on Web Intelligence, September
15–18, Milano},
abstract = {This paper presents an approach using social semantics for the
task of topic labelling by means of Open Topic Models. Our approach
utilizes a social ontology to create an alignment of documents
within a social network. Comprised category information is used
to compute a topic generalization. We propose a feature-frequency-based
method for measuring semantic relatedness which is needed in order
to reduce the number of document features for the task of topic
labelling. This method is evaluated against multiple human judgement
experiments comprising two languages and three different resources.
Overall the results show that social ontologies provide a rich
source of terminological knowledge. The performance of the semantic
relatedness measure with correlation values of up to .77 are quite
promising. Results on the topic labelling experiment show, with
an accuracy of up to .79, that our approach can be a valuable
method for various NLP applications.},
website = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=5284920&abstractAccess=no&userType=inst},
year = {2009}
}
2009.
Approximation of the Parameters of a Readability Formula by Robust Regression. Machine Learning and Data Mining in Pattern recognition: Poster
Proceedings of the International Conference on Machine Learning
and Data Mining (MLDM), 115–125.
BibTeX
@inproceedings{vor:der:Brueck:2009,
author = {vor der Brück, Tim},
title = {Approximation of the Parameters of a Readability Formula by Robust Regression},
booktitle = {Machine Learning and Data Mining in Pattern recognition: Poster
Proceedings of the International Conference on Machine Learning
and Data Mining (MLDM)},
pages = {115--125},
address = {Leipzig, Germany},
abstract = {Most readability formulas calculate a global readability score
by combining several indicator values by a linear combination.
Typical indicators are Average sentence length, Average number
of syllables per word, etc. Usually the parameters of the linear
combination are determined by a linear OLS (ordinary least square
estimation) minimizing the sum of the squared residuals in comparison
with human ratings for a given set of texts. The usage of OLS
leads to several drawbacks. First, the parameters are not constraint
in any way and are therefore not intuitive and difficult to interpret.
Second, if the number of parameters become large, the effect of
overfitting easily occurs. Finally, OLS is quite sensitive to
outliers. Therefore, an alternative method is presented which
avoids these drawbacks and is based on robust regression.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mldm_2009_brueck_142.pdf},
year = {2009}
}
2008
2008.
Sustainability of Text-Technological Resources. Proceedings of the Post LREC-2008 Workshop: Sustainability of
Language Resources and Tools for Natural Language Processing Marrakech,
Morocco.
BibTeX
@inproceedings{Stuehrenberg:Beisswenger:Kuehnberger:Mehler:Luengen:Metzing:Moennich:2008,
author = {Stührenberg, Maik and Bei{\ss}wenger, Michael and Kühnberger, Kai-Uwe
and Mehler, Alexander and Lüngen, Harald and Metzing, Dieter and Mönnich, Uwe},
title = {Sustainability of Text-Technological Resources},
booktitle = {Proceedings of the Post LREC-2008 Workshop: Sustainability of
Language Resources and Tools for Natural Language Processing Marrakech,
Morocco},
abstract = {We consider that there are obvious relationships between research
on sustainability of language and linguistic resources on the
one hand and work undertaken in the Research Unit 'Text-Technological
Modelling of Information' on the other. Currently the main focus
in sustainability research is concerned with archiving methods
of textual resources, i.e. methods for sustainability of primary
and secondary data; these aspects are addressed in our work as
well. However, we believe that there are additional certain aspects
of sustainability on which new light is shed on by procedures,
algorithms and dynamic processes undertaken in our Research Unit},
pdf = {http://www.michael-beisswenger.de/pub/lrec-sustainability.pdf},
year = {2008}
}
2008.
Sprachliche Netzwerke. Netzwerkanalyse und Netzwerktheorie, 413–427.
BibTeX
@incollection{Mehler:Job:Blanchard:Eikmeyer:2008,
author = {Mehler, Alexander and Job, Barbara and Blanchard, Philippe and Eikmeyer, Hans-Jürgen},
title = {Sprachliche Netzwerke},
booktitle = {Netzwerkanalyse und Netzwerktheorie},
publisher = {VS},
editor = {Stegbauer, Christian},
pages = {413-427},
address = {Wiesbaden},
abstract = {In diesem Kapitel beschreiben wir so genannte sprachliche Netzwerke.
Dabei handelt es sich um Netzwerke sprachlicher Einheiten, die
in Zusammenhang mit ihrer Einbettung in das Netzwerk jener Sprachgemeinschaft
analysiert werden, welche diese Einheiten und deren Vernetzung
hervorgebracht hat. Wir erörtern ein Dreistufenmodell zur Analyse
solcher Netzwerke und exemplifizieren dieses Modell anhand mehrerer
Spezialwikis. Ein Hauptaugenmerk des Kapitels liegt dabei auf
einem Mehrebenennetzwerkmodell, und zwar in Abkehr von den unipartiten
Graphmodellen der Theorie komplexer Netzwerke.},
year = {2008}
}
2008.
A Unified Database of Dependency Treebanks. Integrating, Quantifying
and Evaluating Dependency Data. Proceedings of the 6th Language Resources and Evaluation Conference
(LREC 2008), Marrakech (Morocco).
BibTeX
@inproceedings{Pustylnikov:Mehler:Gleim:2008,
author = {Abramov, Olga and Mehler, Alexander and Gleim, Rüdiger},
title = {A Unified Database of Dependency Treebanks. Integrating, Quantifying
and Evaluating Dependency Data},
booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
(LREC 2008), Marrakech (Morocco)},
abstract = {This paper describes a database of 11 dependency treebanks which
were unified by means of a two-dimensional graph format. The format
was evaluated with respect to storage-complexity on the one hand,
and efficiency of data access on the other hand. An example of
how the treebanks can be integrated within a unique interface
is given by means of the DTDB interface.},
pdf = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/LREC08_full.pdf},
year = {2008}
}
2008.
Structural Similarities of Complex Networks: A Computational Model
by Example of Wiki Graphs. Applied Artificial Intelligence, 22(7&8):619–683.
BibTeX
@article{Mehler:2008:a,
author = {Mehler, Alexander},
title = {Structural Similarities of Complex Networks: A Computational Model
by Example of Wiki Graphs},
journal = {Applied Artificial Intelligence},
volume = {22},
number = {7\&8},
pages = {619–683},
abstract = {This article elaborates a framework for representing and classifying
large complex networks by example of wiki graphs. By means of
this framework we reliably measure the similarity of document,
agent, and word networks by solely regarding their topology. In
doing so, the article departs from classical approaches to complex
network theory which focuses on topological characteristics in
order to check their small world property. This does not only
include characteristics that have been studied in complex network
theory, but also some of those which were invented in social network
analysis and hypertext theory. We show that network classifications
come into reach which go beyond the hypertext structures traditionally
analyzed in web mining. The reason is that we focus on networks
as a whole as units to be classified—above the level of websites
and their constitutive pages. As a consequence, we bridge classical
approaches to text and web mining on the one hand and complex
network theory on the other hand. Last but not least, this approach
also provides a framework for quantifying the linguistic notion
of intertextuality.},
doi = {10.1080/08839510802164085},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/mehler_2008_Structural_Similarities_of_Complex_Networks.pdf},
website = {https://www.researchgate.net/publication/200772675_Structural_similarities_of_complex_networks_A_computational_model_by_example_of_wiki_graphs},
year = {2008}
}
2008.
Lexical-Semantic Resources in Automated Discourse Analysis.
Journal for Language Technology and Computational
Linguistics (JLCL), 23(2).
GSCL.
BibTeX
@book{Luengen:Mehler:Storrer:2008:a,
author = {Mehler, Alexander},
editor = {Lüngen, Harald and Mehler, Alexander and Storrer, Angelika},
title = {Lexical-Semantic Resources in Automated Discourse Analysis},
publisher = {GSCL},
volume = {23(2)},
series = {Journal for Language Technology and Computational
Linguistics (JLCL)},
image = {https://www.texttechnologylab.org/wp-content/uploads/2015/09/LexicalSemanticResources-300-20.png},
pagetotal = {111},
pdf = {{http://www.jlcl.org/2008_Heft2/JLCL23(2).pdf}},
website = {https://www.researchgate.net/publication/228956889_Lexical-Semantic_Resources_in_Automated_Discourse_Analysis},
year = {2008}
}
2008.
Large Text Networks as an Object of Corpus Linguistic Studies. Corpus Linguistics. An International Handbook of the Science of
Language and Society, 328–382.
BibTeX
@incollection{Mehler:2008:b,
author = {Mehler, Alexander},
title = {Large Text Networks as an Object of Corpus Linguistic Studies},
booktitle = {Corpus Linguistics. An International Handbook of the Science of
Language and Society},
publisher = {De Gruyter},
editor = {Lüdeling, Anke and Kytö, Merja},
pages = {328–382},
address = {Berlin/New York},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_a.pdf},
year = {2008}
}
July, 2008.
A Dynamic Approach for Automatic Error Detection in Generation Grammars. Proceedings of the 18th European Conference on Artificial Intelligence (ECAI).
BibTeX
@inproceedings{vor:der:Brueck:Stenzhorn:2008,
author = {vor der Brück, Tim and Stenzhorn, Holger},
title = {A Dynamic Approach for Automatic Error Detection in Generation Grammars},
booktitle = {Proceedings of the 18th European Conference on Artificial Intelligence (ECAI)},
address = {Patras, Greece},
abstract = {In any real world application scenario, natural language generation
(NLG) systems have to employ grammars consisting of tremendous
amounts of rules. Detecting and fixing errors in such grammars
is therefore a highly tedious task. In this work we present a
data mining algorithm which deduces incorrect grammar rules by
abductive reasoning out of positive and negative training examples.
More specifcally, the constituency trees belonging to successful
generation processes and the incomplete trees of failed ones are
analyzed. From this a quality score is derived for each grammar
rule by analyzing the occurrences of the rules in the trees and
by spotting the exact error locations in the incomplete trees.
In prior work on automatic error detection v.d.Brück et al. [5]
proposed a static error detection algorithm for generation grammars.
The approach of Cussens et al. creates missing grammar rules for
parsing using abduction [1]. Zeller introduced a dynamic approach
in the related area of detecting errors in computer programs [6].},
isbn = {978-1-58603-891-5},
month = {July},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/xtragen_egram.pdf},
year = {2008}
}
October, 2008.
A Readability Checker with Supervised Learning using Deep Syntactic
and Semantic Indicators. Proceedings of the 11th International Multiconference: Information
Society - IS 2008 - Language Technologies, 92–97.
BibTeX
@inproceedings{vor:der:Brueck:Hartrumpf:Helbig:2008:a,
author = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
title = {A Readability Checker with Supervised Learning using Deep Syntactic
and Semantic Indicators},
booktitle = {Proceedings of the 11th International Multiconference: Information
Society - IS 2008 - Language Technologies},
editor = {Erjavec, Tomaž and Gros, Jerneja Žganec},
pages = {92--97},
address = {Ljubljana, Slovenia},
abstract = {Checking for readability or simplicity of texts is important for
many institutional and individual users. Formulas for approximately
measuring text readability have a long tradition. Usually, they
exploit surfaceoriented indicators like sentence length, word
length, word frequency, etc. However, in many cases, this information
is not adequate to realistically approximate the cognitive difficulties
a person can have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The syntactic information
is represented by a dependency tree, the semantic information
by a semantic network. Both representations are automatically
generated by a deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor algorithm on
3,000 ratings of 300 test persons. The evaluation showed that
the deep syntactic and semantic indicators lead to promising results
comparable to the best surface-based indicators. The combination
of deep and shallow indicators leads to an improvement over shallow
indicators alone. Finally, a graphical user interface was developed
which highlights difficult passages, depending on the individual
indicator values, and displays a global readability score. Povzetek:
Strojno učenje z odvisnostnimi drevesi je uporabljeno za ugotavljanje
berljivosti besedil. 1},
isbn = {987-961-264-006-4},
month = {October},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf_helbig08.pdf},
url = {http://pi7.fernuni-hagen.de/brueck/papers/brueck_hartrumpf_helbig08.pdf},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.5878},
year = {2008}
}
2008.
A Readability Checker with Supervised Learning using Deep Indicators. Informatica, 32(4):429–435.
BibTeX
@article{vor:der:Brueck:Hartrumpf:Helbig:2008:b,
author = {vor der Brück, Tim and Hartrumpf, Sven and Helbig, Hermann},
title = {A Readability Checker with Supervised Learning using Deep Indicators},
journal = {Informatica},
volume = {32},
number = {4},
pages = {429--435},
abstract = {Checking for readability or simplicity of texts is important for
many institutional and individual users. Formulas for approximately
measuring text readability have a long tradition. Usually, they
exploit surface-oriented indicators like sentence length, word
length, word frequency, etc. However, in many cases, this information
is not adequate to realistically approximate the cognitive difficulties
a person can have to understand a text. Therefore we use deep
syntactic and semantic indicators in addition. The syntactic information
is represented by a dependency tree, the semantic information
by a semantic network. Both representations are automatically
generated by a deep syntactico-semantic analysis. A global readability
score is determined by applying a nearest neighbor algorithm on
3,000 ratings of 300 test persons. The evaluation showed that
the deep syntactic and semantic indicators lead to promising results
comparable to the best surface-based indicators. The combination
of deep and shallow indicators leads to an improvement over shallow
indicators alone. Finally, a graphical user interface was developed
which highlights difficult passages, depending on the individual
indicator values, and displays a global readability score.},
website = {http://connection.ebscohost.com/c/articles/36288796/readability-checker-supervised-learning-using-deep-indicators},
year = {2008}
}
2008.
Text classification by means of structural features. What kind
of information about texts is captured by their structure?. Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia.
BibTeX
@inproceedings{Pustylnikov:Mehler:2008:c,
author = {Pustylnikov, Olga and Mehler, Alexander},
title = {Text classification by means of structural features. What kind
of information about texts is captured by their structure?},
booktitle = {Proceedings of RUSSIR '08, September 1-5, Taganrog, Russia},
pdf = {http://www.www.texttechnologylab.org/data/pdf/mehler_geibel_pustylnikov_2007.pdf},
year = {2008}
}
2008.
An Integrated Model of Lexical Chaining: Applications, Resources
and their Format. Proceedings of KONVENS 2008 – Ergänzungsband Textressourcen
und lexikalisches Wissen, 59–70.
BibTeX
@inproceedings{Waltinger:Mehler:Stuehrenberg:2008,
author = {Waltinger, Ulli and Mehler, Alexander and Stührenberg, Maik},
title = {An Integrated Model of Lexical Chaining: Applications, Resources
and their Format},
booktitle = {Proceedings of KONVENS 2008 – Erg{\"a}nzungsband Textressourcen
und lexikalisches Wissen},
editor = {Storrer, Angelika and Geyken, Alexander and Siebert, Alexander
and Würzner, Kay-Michael},
pages = {59-70},
pdf = {http://www.ulliwaltinger.de/pdf/Konvens_2008_Integrated_Model_of_Lexical_Chaining_WaltingerMehlerStuehrenberg.pdf},
year = {2008}
}
2008.
A Model of the Distribution of the Distances of Alike Elements
in Dialogical Communication. Proceedings of the International Conference on Information Theory
and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas, 45–50.
BibTeX
@inproceedings{Mehler:2008:c,
author = {Mehler, Alexander},
title = {A Model of the Distribution of the Distances of Alike Elements
in Dialogical Communication},
booktitle = {Proceedings of the International Conference on Information Theory
and Statistical Learning (ITSL '08), July 14-15, 2008, Las Vegas},
pages = {45-50},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_c.pdf},
year = {2008}
}
2008.
Towards Automatic Content Tagging: Enhanced Web Services in Digital
Libraries Using Lexical Chaining. 4th Int. Conf. on Web Information Systems and Technologies (WEBIST
'08), 4-7 May, Funchal, Portugal, 231–236.
BibTeX
@inproceedings{Waltinger:Mehler:Heyer:2008,
author = {Waltinger, Ulli and Mehler, Alexander and Heyer, Gerhard},
title = {Towards Automatic Content Tagging: Enhanced Web Services in Digital
Libraries Using Lexical Chaining},
booktitle = {4th Int. Conf. on Web Information Systems and Technologies (WEBIST
'08), 4-7 May, Funchal, Portugal},
editor = {Cordeiro, José and Filipe, Joaquim and Hammoudi, Slimane},
pages = {231-236},
address = {Barcelona},
publisher = {INSTICC Press},
pdf = {http://www.ulliwaltinger.de/pdf/Webist_2008_Towards_Automatic_Content_Tagging_WaltingerMehlerHeyer.pdf},
url = {http://dblp.uni-trier.de/db/conf/webist/webist2008-2.html#WaltingerMH08},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.463.3097},
year = {2008}
}
2008.
A Short Note on Social-Semiotic Networks from the Point of View
of Quantitative Semantics. Proceedings of the Dagstuhl Seminar on Social Web Communities,
September 21-26, Dagstuhl.
BibTeX
@inproceedings{Mehler:2008:f,
author = {Mehler, Alexander},
title = {A Short Note on Social-Semiotic Networks from the Point of View
of Quantitative Semantics},
booktitle = {Proceedings of the Dagstuhl Seminar on Social Web Communities,
September 21-26, Dagstuhl},
editor = {Alani, Harith and Staab, Steffen and Stumme, Gerd},
pdf = {http://drops.dagstuhl.de/opus/volltexte/2008/1788/pdf/08391.MehlerAlexander.ExtAbstract.1788.pdf},
year = {2008}
}
2008.
WikiDB: Building Interoperable Wiki-Based Knowledge Resources
for Semantic Databases. Sprache und Datenverarbeitung. International Journal
for Language Data Processing, 32(1):47–70.
BibTeX
@article{Mehler:Gleim:Ernst:Waltinger:2008,
author = {Mehler, Alexander and Gleim, Rüdiger and Ernst, Alexandra and Waltinger, Ulli},
title = {WikiDB: Building Interoperable Wiki-Based Knowledge Resources
for Semantic Databases},
journal = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
volume = {32},
number = {1},
pages = {47-70},
abstract = {This article describes an API for exploring the logical document
and the logical network structure of wikis. It introduces an algorithm
for the semantic preprocessing, filtering and typing of these
building blocks. Further, this article models the process of wiki
generation based on a unified format of syntactic, semantic and
pragmatic representations. This three-level approach to make accessible
syntactic, semantic and pragmatic aspects of wiki-based structure
formation is complemented by a corresponding database model –
called WikiDB – and an API operating thereon. Finally, the article
provides an empirical study of using the three-fold representation
format in conjunction with WikiDB.},
pdf = {http://www.ulliwaltinger.de/pdf/Konvens_2008_WikiDB_Building_Semantic_Databases_MehlerGleimErnstWaltinger.pdf},
year = {2008}
}
2008.
Who is it? Context sensitive named entity and instance recognition
by means of Wikipedia. Proceedings of the 2008 IEEE/WIC/ACM International Conference
on Web Intelligence (WI-2008), 381–384.
BibTeX
@inproceedings{Waltinger:Mehler:2008:a,
author = {Waltinger, Ulli and Mehler, Alexander},
title = {Who is it? Context sensitive named entity and instance recognition
by means of Wikipedia},
booktitle = {Proceedings of the 2008 IEEE/WIC/ACM International Conference
on Web Intelligence (WI-2008)},
pages = {381–384},
publisher = {IEEE Computer Society},
pdf = {http://www.ulliwaltinger.de/pdf/WI_2008_Context_Sensitive_Instance_Recognition_WaltingerMehler.pdf},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.324.5881},
year = {2008}
}
June 2–4, 2008.
Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching
Empirical Evidence of Intrapersonal Alignment in Multimodal Communication. LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics
and Pragmatics of Dialogue (SEMDIAL), 157–164.
BibTeX
@inproceedings{Luecking:Mehler:Menke:2008,
author = {Lücking, Andy and Mehler, Alexander and Menke, Peter},
title = {Taking Fingerprints of Speech-and-Gesture Ensembles: Approaching
Empirical Evidence of Intrapersonal Alignment in Multimodal Communication},
booktitle = {LONDIAL 2008: Proceedings of the 12th Workshop on the Semantics
and Pragmatics of Dialogue (SEMDIAL)},
pages = {157–164},
address = {King's College London},
month = {June 2–4},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/luecking_mehler_menke_2008.pdf},
website = {https://www.researchgate.net/publication/237305375_Taking_Fingerprints_of_Speech-and-Gesture_Ensembles_Approaching_Empirical_Evidence_of_Intrapersonal_Alignment_in_Multimodal_Communication},
year = {2008}
}
2008.
Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen. Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs,
Wikis, Podcasts und Communities aus interdisziplinärer Sicht, 267–300.
BibTeX
@incollection{Mehler:Sutter:2008,
author = {Mehler, Alexander and Sutter, Tilmann},
title = {Interaktive Textproduktion in Wiki-basierten Kommunikationssystemen},
booktitle = {Kommunikation, Partizipation und Wirkungen im Social Web – Weblogs,
Wikis, Podcasts und Communities aus interdisziplin{\"a}rer Sicht},
publisher = {Herbert von Halem},
editor = {Zerfa{\ss}, Ansgar and Welker, Martin and Schmidt, Jan},
pages = {267-300},
address = {Köln},
abstract = {This article addresses challenges in maintaining and annotating
image resources in the field of iconographic research. We focus
on the task of bringing together generic and extensible techniques
for resource and anno- tation management with the highly specific
demands in this area of research. Special emphasis is put on the
interrelation of images, image segements and textual contents.
In addition, we describe the architecture, data model and user
interface of the open annotation system used in the image database
application that is a part of the eHumanities Desktop.},
year = {2008}
}
2008.
On the Impact of Community Structure on Self-Organizing Lexical Networks. Proceedings of the 7th Evolution of Language Conference (Evolang
2008), March 11-15, 2008, Barcelona, 227–234.
BibTeX
@inproceedings{Mehler:2008:e,
author = {Mehler, Alexander},
title = {On the Impact of Community Structure on Self-Organizing Lexical Networks},
booktitle = {Proceedings of the 7th Evolution of Language Conference (Evolang
2008), March 11-15, 2008, Barcelona},
editor = {Smith, Andrew D. M. and Smith, Kenny and Cancho, Ramon Ferrer i},
pages = {227-234},
publisher = {World Scientific},
abstract = {This paper presents a simulation model of self-organizing lexical
networks. Its starting point is the notion of an association game
in which the impact of varying community models is studied on
the emergence of lexical networks. The paper reports on experiments
whose results are in accordance with findings in the framework
of the naming game. This is done by means of a multilevel network
model in which the correlation of social and of linguistic networks
is studied},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2008_b.pdf},
website = {http://stel.ub.edu/evolang2008/evo10.htm},
year = {2008}
}
2008.
Towards a Uniform Representation of Treebanks: Providing Interoperability
for Dependency Tree Data. Proceedings of First International Conference on Global Interoperability
for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11.
BibTeX
@inproceedings{Pustylnikov:Mehler:2008:a,
author = {Abramov, Olga and Mehler, Alexander},
title = {Towards a Uniform Representation of Treebanks: Providing Interoperability
for Dependency Tree Data},
booktitle = {Proceedings of First International Conference on Global Interoperability
for Language Resources (ICGL 2008), Hong Kong SAR, January 9-11},
abstract = {In this paper we present a corpus representation format which
unifies the representation of a wide range of dependency treebanks
within a single model. This approach provides interoperability
and reusability of annotated syntactic data which in turn extends
its applicability within various research contexts. We demonstrate
our approach by means of dependency treebanks of 11 languages.
Further, we perform a comparative quantitative analysis of these
treebanks in order to demonstrate the interoperability of our
approach.},
pdf = {http://wwwhomes.uni-bielefeld.de/opustylnikov/pustylnikov/pdfs/acl07.1.0.pdf},
website = {https://www.researchgate.net/publication/242681771_Towards_a_Uniform_Representation_of_Treebanks_Providing_Interoperability_for_Dependency_Tree_Data},
year = {2008}
}
2008.
Towards a Reference Corpus of Web Genres for the Evaluation of
Genre Identification Systems. Proceedings of the 6th Language Resources and Evaluation Conference
(LREC 2008), Marrakech (Morocco).
BibTeX
@inproceedings{Rehm:Santini:Mehler:Braslavski:Gleim:Stubbe:Symonenko:Tavosanis:Vidulin:2008,
author = {Rehm, Georg and Santini, Marina and Mehler, Alexander and Braslavski, Pavel
and Gleim, Rüdiger and Stubbe, Andrea and Symonenko, Svetlana and Tavosanis, Mirko
and Vidulin, Vedrana},
title = {Towards a Reference Corpus of Web Genres for the Evaluation of
Genre Identification Systems},
booktitle = {Proceedings of the 6th Language Resources and Evaluation Conference
(LREC 2008), Marrakech (Morocco)},
abstract = {We present initial results from an international and multi-disciplinary
research collaboration that aims at the construction of a reference
corpus of web genres. The primary application scenario for which
we plan to build this resource is the automatic identification
of web genres. Web genres are rather difficult to capture and
to describe in their entirety, but we plan for the finished reference
corpus to contain multi-level tags of the respective genre or
genres a web document or a website instantiates. As the construction
of such a corpus is by no means a trivial task, we discuss several
alternatives that are, for the time being, mostly based on existing
collections. Furthermore, we discuss a shared set of genre categories
and a multi-purpose tool as two additional prerequisites for a
reference corpus of web genres.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/rehm_santini_mehler_braslavski_gleim_stubbe_symonenko_tavosanis_vidulin_2008.pdf},
website = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/94.html},
year = {2008}
}
2007
2007.
Aisles through the Category Forest – Utilising the Wikipedia Category
System for Corpus Building in Machine Learning. 3rd International Conference on Web Information Systems and Technologies
(WEBIST '07), March 3-6, 2007, Barcelona, 142–149.
BibTeX
@inproceedings{Gleim:Mehler:Dehmer:Abramov:2007,
author = {Gleim, Rüdiger and Mehler, Alexander and Dehmer, Matthias and Abramov, Olga},
title = {Aisles through the Category Forest – Utilising the Wikipedia Category
System for Corpus Building in Machine Learning},
booktitle = {3rd International Conference on Web Information Systems and Technologies
(WEBIST '07), March 3-6, 2007, Barcelona},
editor = {Filipe, Joaquim and Cordeiro, José and Encarnação, Bruno and Pedrosa, Vitor},
pages = {142-149},
address = {Barcelona},
abstract = {The Word Wide Web is a continuous challenge to machine learning.
Established approaches have to be enhanced and new methods be
developed in order to tackle the problem of finding and organising
relevant information. It has often been motivated that semantic
classifications of input documents help solving this task. But
while approaches of supervised text categorisation perform quite
well on genres found in written text, newly evolved genres on
the web are much more demanding. In order to successfully develop
approaches to web mining, respective corpora are needed. However,
the composition of genre- or domain-specific web corpora is still
an unsolved problem. It is time consuming to build large corpora
of good quality because web pages typically lack reliable meta
information. Wikipedia along with similar approaches of collaborative
text production offers a way out of this dilemma. We examine how
social tagging, as supported by the MediaWiki software, can be
utilised as a source of corpus building. Further, we describe
a representation format for social ontologies and present the
Wikipedia Category Explorer, a tool which supports categorical
views to browse through the Wikipedia and to construct domain
specific corpora for machine learning.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2016/10/webist_2007-gleim_mehler_dehmer_pustylnikov.pdf},
year = {2007}
}
2007.
Structural Uncertainty of Hypertext Types. An Empirical Study. Proceedings of the Workshop "Towards Genre-Enabled Search Engines:
The Impact of NLP", September, 30, 2007, in conjunction with RANLP
2007, Borovets, Bulgaria, 13–19.
BibTeX
@inproceedings{Mehler:Gleim:Wegner:2007,
author = {Mehler, Alexander and Gleim, Rüdiger and Wegner, Armin},
title = {Structural Uncertainty of Hypertext Types. An Empirical Study},
booktitle = {Proceedings of the Workshop "Towards Genre-Enabled Search Engines:
The Impact of NLP", September, 30, 2007, in conjunction with RANLP
2007, Borovets, Bulgaria},
editor = {Rehm, Georg and Santini, Marina},
pages = {13-19},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/RANLP.pdf},
year = {2007}
}
2007.
Evolving Lexical Networks. A Simulation Model of Terminological Alignment. Proceedings of the Workshop on Language, Games, and Evolution
at the 9th European Summer School in Logic, Language and Information
(ESSLLI 2007), Trinity College, Dublin, 6-17 August, 57–67.
BibTeX
@inproceedings{Mehler:2007:d,
author = {Mehler, Alexander},
title = {Evolving Lexical Networks. A Simulation Model of Terminological Alignment},
booktitle = {Proceedings of the Workshop on Language, Games, and Evolution
at the 9th European Summer School in Logic, Language and Information
(ESSLLI 2007), Trinity College, Dublin, 6-17 August},
editor = {Benz, Anton and Ebert, Christian and van Rooij, Robert},
pages = {57-67},
abstract = {In this paper we describe a simulation model of terminological
alignment in a multiagent community. It is based on the notion
of an association game which is used instead of the classical
notion of a naming game (Steels, 1996). The simulation model integrates
a small world-like agent community which restricts agent communication.
We hypothesize that this restriction is decisive when it comes
to simulate terminological alignment based on lexical priming.
The paper presents preliminary experimental results in support
of this hypothesis.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_2007_d.pdf},
year = {2007}
}
2007.
Much Ado About Text Content. Learning Text Types Solely by Structural
Differentiae. Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
to Extract Semantic Knowledge from Structured Information, 63–71.
BibTeX
@inproceedings{Mehler:Geibel:Gleim:Herold:Jain:Pustylnikov:2007,
author = {Mehler, Alexander and Geibel, Peter and Gleim, Rüdiger and Herold, Sebastian
and Jain, Brijnesh-Johannes and Abramov, Olga},
title = {Much Ado About Text Content. Learning Text Types Solely by Structural
Differentiae},
booktitle = {Proceedings of OTT '06 – Ontologies in Text Technology: Approaches
to Extract Semantic Knowledge from Structured Information},
editor = {Mönnich, Uwe and Kühnberger, Kai-Uwe},
series = {Publications of the Institute of Cognitive Science
(PICS)},
pages = {63-71},
address = {Osnabrück},
abstract = {In this paper, we deal with classifying texts into classes which
denote text types whose textual instances serve more or less homogeneous
functions. Other than mainstream approaches to text classification,
which rely on the vector space model [30] or some of its descendants
[2] and, thus, on content-related lexical features, we solely
refer to structural differentiae, that is, to patterns of text
structure as determinants of class membership. Further, we suppose
that text types span a type hierarchy based on the type-subtype
relation [31]. Thus, although we admit that class membership is
fuzzy so that overlapping classes are inevitable, we suppose a
non-overlapping type system structured into a rooted tree – whether
solely based on functional or additional on, e.g., content- or
mediabased criteria [1]. What regards criteria of goodness of
classification, we perform a classical supervised categorization
experiment [30] based on cross-validation as a method of model
selection [11]. That is, we perform a categorization experiment
in which for all training and test cases class membership is known
ex ante. In summary, we perform a supervised experiment of text
classification in order to learn functionally grounded text types
where membership to these types is solely based on structural
criteria.},
pdf = {http://ikw.uni-osnabrueck.de/~ott06/ott06-abstracts/Mehler_Geibel_abstract.pdf},
year = {2007}
}
2007.
Graph-theoretical Characterizations of Generalized Trees. Proceedings of the 2007 International Conference on Machine Learning:
Models, Technologies & Applications (MLMTA '07), June 25-28,
2007, Las Vegas, 113–117.
BibTeX
@inproceedings{Dehmer:Mehler:Emmert-Streib:2007:a,
author = {Dehmer, Matthias and Mehler, Alexander and Emmert-Streib, Frank},
title = {Graph-theoretical Characterizations of Generalized Trees},
booktitle = {Proceedings of the 2007 International Conference on Machine Learning:
Models, Technologies \& Applications (MLMTA '07), June 25-28,
2007, Las Vegas},
pages = {113-117},
website = {https://www.researchgate.net/publication/221188591_Graph-theoretical_Characterizations_of_Generalized_Trees},
year = {2007}
}
2007.
Representing and Maintaining Large Corpora. Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK).
BibTeX
@inproceedings{Gleim:Mehler:Eikmeyer:2007:a,
author = {Gleim, Rüdiger and Mehler, Alexander and Eikmeyer, Hans-Jürgen},
title = {Representing and Maintaining Large Corpora},
booktitle = {Proceedings of the Corpus Linguistics 2007 Conference, Birmingham (UK)},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/gleim_mehler_eikmeyer_2007_a.pdf},
year = {2007}
}
2007.
Classification of Documents Based on the Structure of Their DOM Trees. Proceedings of ICONIP 2007 (14th International Conference on Neural
Information Processing), 779–788.
BibTeX
@inproceedings{Geibel:Pustylnikov:Mehler:Gust:Kuehnberger:2007,
author = {Geibel, Peter and Abramov, Olga and Mehler, Alexander and Gust, Helmar
and Kühnberger, Kai-Uwe},
title = {Classification of Documents Based on the Structure of Their DOM Trees},
booktitle = {Proceedings of ICONIP 2007 (14th International Conference on Neural
Information Processing)},
series = {Lecture Notes in Computer Science 4985},
pages = {779–788},
publisher = {Springer},
abstract = {In this paper, we discuss kernels that can be applied for the
classification of XML documents based on their DOM trees. DOM
trees are ordered trees in which every node might be labeled by
a vector of attributes including its XML tag and the textual content.
We describe five new kernels suitable for such structures: a kernel
based on predefined structural features, a tree kernel derived
from the well-known parse tree kernel, the set tree kernel that
allows permutations of children, the string tree kernel being
an extension of the so-called partial tree kernel, and the soft
tree kernel as a more efficient alternative. We evaluate the kernels
experimentally on a corpus containing the DOM trees of newspaper
articles and on the well-known SUSANNE corpus.},
website = {http://www.springerlink.com/content/x414002113425742/},
year = {2007}
}
2007.
A Corpus Management System for Historical Semantics. Sprache und Datenverarbeitung. International Journal
for Language Data Processing, 31(1-2):81–89.
BibTeX
@article{Jussen:Mehler:Ernst:2007,
author = {Jussen, Bernhard and Mehler, Alexander and Ernst, Alexandra},
title = {A Corpus Management System for Historical Semantics},
journal = {Sprache und Datenverarbeitung. International Journal
for Language Data Processing},
volume = {31},
number = {1-2},
pages = {81-89},
abstract = {Der Beitrag beschreibt ein Korpusmanagementsystem für die historische
Semantik. Die Grundlage hierfür bildet ein Bedeutungsbegriff,
der – methodologisch gesprochen – auf der Analyse diachroner Korpora
beruht. Das Ziel der Analyse dieser Korpora besteht darin, Bedeutungswandel
als eine Bezugsgrö{\ss}e für den Wandel sozialer Systeme zu untersuchen.
Das vorgestellte Korpusmanagementsystem unterstützt diese Art
der korpusbasierten historischen Semantik.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/jussen_mehler_ernst_2007.pdf},
year = {2007}
}
2007.
Machine Learning in a Semiotic Perspective. Aspects of Automatic Text Analysis, 1–29.
BibTeX
@incollection{Mehler:Koehler:2007:b,
author = {Mehler, Alexander and Köhler, Reinhard},
title = {Machine Learning in a Semiotic Perspective},
booktitle = {Aspects of Automatic Text Analysis},
publisher = {Springer},
editor = {Mehler, Alexander and Köhler, Reinhard},
series = {Studies in Fuzziness and Soft Computing},
pages = {1-29},
address = {Berlin/New York},
abstract = {Gegenstand des folgenden Aufsatzes ist der konnotative Aspekt
der Bedeutungen von Texten. Den Ausgangspunkt der {\"U}berlegungen
zur Konnotation des Textes bildet die Auffassung, wonach Wort-
und Textbedeutungskonstitution Ergebnis eines zirkul{\"a}ren Prozesses
sind, der für die Emergenz einer Hierarchie ineinander geschachtelter
Spracheinheiten verantwortlich zeichnet. Der Proze{\ss} der Zeichenartikulation
erfolgt entlang dieser Ebenen und erzeugt durch Verbindung von
(konnotativer) Inhalts- und Ausdrucksseite auf Textebene das Textzeichen.
Im Gegensatz zu einer strikten Interpretation des Fregeschen Kompositionalit{\"a}tsprinzips,
derzufolge die Bedeutungen sprachlicher Einheiten als fixierte,
kontextfreie Grö{\ss}en vorauszusetzen sind, behandelt der vorliegende
Ansatz bereits die lexikalische Bedeutung als Grö{\ss}e, die in
Abh{\"a}ngigkeit von ihrem Kontext variieren kann. Aus semiotischer
Perspektive ist es vor allem der Gestaltcharakter, welcher die
konnotative Textbedeutung einer Anwendung des FregePrinzips entzieht.
Anders ausgedrückt: Die konnotative Bedeutung eines Textes ist
keineswegs in eine Struktur 'atomarer' Repr{\"a}sentationen zerlegbar.
Die hierarchische Organisation von Texten erweist sich insofern
als komplex, als ihre Bedeutungen aus einem zirkul{\"a}ren Proze{\ss}
resultieren, der best{\"a}tigend und/oder ver{\"a}ndernd auf die
Bedeutungen der Textkonstituenten einwirkt. Diese Zirkularit{\"a}t
bedingt, da{\ss} Texte nicht nur als Orte der Manifestation von
Wortbedeutungsstrukturen anzusehen sind, sondern zugleich als
Ausgangspunkte für die Modifikation und Emergenz solcher Strukturen
dienen. Im folgenden wird unter Rekurs auf den Kopenhagener Strukturalismus
ein Modell der konnotativen Bedeutung von Texten entwickelt, das
sich unter anderem an dem glossematischen Begriff der Konstante
orientiert. Die Formalisierung des Modells erfolgt mit Hilfe des
Konzeptes der unscharfen Menge. Zu diesem Zweck werden die unscharfen
Verwendungsregularit{\"a}ten von Wörtern auf der Basis eines zweistufigen
Verfahrens analysiert, welches die syntagmatischen und paradigmatischen
Regularit{\"a}ten des Wortgebrauches berücksichtigt. Die Rolle
der Satzebene innerhalb des Prozesses der konnotativen Textbedeutungskonstitution
wird angedeutet. Abschlie{\ss}end erfolgt eine Exemplifizierung
des Algorithmus anhand der automatischen Analyse eines Textcorpus.},
website = {http://rd.springer.com/chapter/10.1007/978-3-540-37522-7_1},
year = {2007}
}
2007.
A Formal Text Representation Model Based on Lexical Chaining. Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial
Data (LNVD 2007) September 10, Osnabrück, 17–26.
BibTeX
@inproceedings{Mehler:Waltinger:Wegner:2007:a,
author = {Mehler, Alexander and Waltinger, Ulli and Wegner, Armin},
title = {A Formal Text Representation Model Based on Lexical Chaining},
booktitle = {Proceedings of the KI 2007 Workshop on Learning from Non-Vectorial
Data (LNVD 2007) September 10, Osnabrück},
editor = {Geibel, Peter and Jain, Brijnesh J.},
pages = {17-26},
address = {Osnabrück},
publisher = {Universit{\"a}t Osnabrück},
abstract = {This paper presents a formal text representation model as an alternative
to the vector space model. It combines a tree-like model with
graph-inducing lexical relations. The paper aims at formalizing
two yet unrelated approaches, i.e. lexical chaining [3] and quantitative
structure analysis [9], in order to combine content and structure
modeling.},
pdf = {http://www.ulliwaltinger.de/pdf/LNVD07MehlerWaltingerWegner.pdf},
year = {2007}
}
October, 2007.
A Semantically Oriented Readability Checker for German. Proceedings of the 3rd Language & Technology Conference, 270–274.
BibTeX
@incollection{vor:der:Brueck:Hartrumpf:2007,
author = {vor der Brück, Tim and Hartrumpf, Sven},
title = {A Semantically Oriented Readability Checker for German},
booktitle = {Proceedings of the 3rd Language \& Technology Conference},
publisher = {Wydawnictwo Poznańskie},
editor = {Zygmunt Vetulani},
pages = {270--274},
address = {Poznań, Poland},
abstract = {One major reason that readability checkers are still far away
from judging the understandability of texts consists in the fact
that no semantic information is used. Syntactic, lexical, or morphological
information can only give limited access for estimating the cognitive
difficulties for a human being to comprehend a text. In this paper
however, we present a readability checker which uses semantic
information in addition. This information is represented as semantic
networks and is derived by a deep syntactico-semantic analysis.
We investigate in which situations a semantic readability indicator
can lead to superior results in comparison with ordinary surface
indicators like sentence length. Finally, we compute the correlations
and absolute errors for our semantic indicators related to user
ratings collected in an online evaluation.},
isbn = {978-83-7177-407-2},
month = {October},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/brueck_hartrumpf07_online.pdf},
url = {http://pi7.fernuni-hagen.de/papers/brueck_hartrumpf07_online.pdf},
year = {2007}
}
2007.
Suggesting Error Corrections of Path Expressions and Categories
for Tree-Mapping Grammars. Zeitschrift für Sprachwissenschaft, 26(2).
BibTeX
@article{vor:der:Brueck:Busemann:2007,
author = {vor der Brück, Tim and Busemann, Stephan},
title = {Suggesting Error Corrections of Path Expressions and Categories
for Tree-Mapping Grammars},
journal = {Zeitschrift für Sprachwissenschaft},
volume = {26},
number = {2},
abstract = {Tree mapping grammars are used in natural language generation
(NLG) to map non-linguistic input onto a derivation tree from
which the target text can be trivially read off as the terminal
yield. Such grammars may consist of a large number of rules. Finding
errors is quite tedious and sometimes very time-consuming. Often
the generation fails because the relevant input subtree is not
specified correctly. This work describes a method to detect and
correct wrong assignments of input subtrees to grammar categories
by cross-validating grammar rules with the given input structures.
The method also detects and corrects the usage of a category in
a grammar rule. The result is implemented in a grammar development
workbench and accelerates the grammar writer's work considerably.
The paper suggests the algorithms can be ported to other areas
in which tree mapping is required.},
url = {http://www.reference-global.com/doi/pdfplus/10.1515/ZFS.2007.021},
year = {2007}
}
2007.
Parameter Learning for a Readability Checking Tool. Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML.
BibTeX
@incollection{vor:der:Brueck:Leveling:2007,
author = {vor der Brück, Tim and Leveling, Johannes},
title = {Parameter Learning for a Readability Checking Tool},
booktitle = {Proceedings of the LWA 2007 (Lernen-Wissen-Adaption), Workshop KDML},
publisher = {Gesellschaft für Informatik},
editor = {Alexander Hinneburg},
address = {Halle/Saale, Germany},
abstract = {This paper describes the application of machine learning methods
to determine parameters for DeLite, a readability checking tool.
DeLite pinpoints text segments that are difficult to understand
and computes for a given text a global readability score, which
is a weighted sum of normalized indicator values. Indicator values
are numeric properties derived from linguistic units in the text,
such as the distance between a verb and its complements or the
number of possible antecedents for a pronoun. Indicators are normalized
by means of a derivation of the Fermi function with two parameters.
DeLite requires individual parameters for this normalization function
and a weight for each indicator to compute the global readability
score. Several experiments to determine these parameters were
conducted, using different machine learning approaches. The training
data consists of more than 300 user ratings of texts from the
municipality domain. The weights for the indicators are learned
using two approaches: i) robust regression with linear optimization
and ii) an approximative iterative linear regression algorithm.
For evaluation, the computed readability scores are compared to
user ratings. The evaluation showed that iterative linear regression
yields a smaller square error than robust regression although
this method is only approximative. Both methods yield results
outperforming a first manual setting, and for both methods, basically
the same set of non-zero weights remain.},
website = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.6079},
year = {2007}
}
2007.
Reliability and Validity of Cervical Auscultation. Dysphagia, 22:225–234.
BibTeX
@article{Borr:Luecking:Hierlscher:2007,
author = {Borr, Christiane and Hielscher-Fastabend, Martina and Lücking, Andy},
title = {Reliability and Validity of Cervical Auscultation},
journal = {Dysphagia},
volume = {22},
pages = {225--234},
abstract = {We conducted a two-part study that contributes to the discussion
about cervical auscultation (CA) as a scientifically justifiable
and medically useful tool to identify patients with a high risk
of aspiration/penetration. We sought to determine (1) acoustic
features that mark a deglutition act as dysphagic; (2) acoustic
changes in healthy older deglutition profiles compared with those
of younger adults; (3) the correctness and concordance of rater
judgments based on CA; and (4) if education in CA improves individual
reliability. The first part of the study focused on a comparison
of the swallow morphology of dysphagic as opposed to healthy subjects
deglutition in terms of structure properties of the pharyngeal
phase of deglutition. We obtained the following results. The duration
of deglutition apnea is significantly higher in the older group
than in the younger one. Comparing the younger group and the dysphagic
group we found significant differences in duration of deglutition
apnea, onset time, and number of gulps. Just one parameter, number
of gulps, distinguishes significantly between the older and the
dysphagic groups. The second part of the study aimed at evaluating
the reliability of CA in detecting dysphagia measured as the concordance
and the correctness of CA experts in classifying swallowing sounds.
The interrater reliability coefficient AC1 resulted in a value
of 0.46, which is to be interpreted as fair agreement. Furthermore,
we found that comparison with radiologically defined aspiration/penetration
for the group of experts (speech and language therapists) yielded
70\% specificity and 94\% sensitivity. We conclude that the swallowing
sounds contain audible cues that should, in principle, permit
reliable classification and view CA as an early warning system
for identifying patients with a high risk of aspiration/penetration;
however, it is not appropriate as a stand-alone tool.},
doi = {10.1007/s00455-007-9078-3},
issue = {3},
pdf = {http://www.shkim.eu/cborr/ca5manuscript.pdf},
publisher = {Springer New York},
url = {http://dx.doi.org/10.1007/s00455-007-9078-3},
website = {http://www.springerlink.com/content/c45578u74r38m4v7/},
year = {2007}
}
June, 2007.
Locating Objects by Pointing.
BibTeX
@misc{Kranstedt:et:al:2007,
author = {Kranstedt, Alfred and Lücking, Andy and Pfeiffer, Thies and Rieser, Hannes
and Staudacher, Marc},
title = {Locating Objects by Pointing},
howpublished = {3rd International Conference of the International
Society for Gesture Studies. Evanston, IL, USA},
keywords = {own},
month = {6},
year = {2007}
}
2007.
Error-tolerant Finite-state Recognizer and String Pattern Similarity
Based Spell-Checker for Bengali. 5th International Conference on Natural Language Processing (ICON)
as a poster,Hyderabad, India, January 2007.
BibTeX
@inproceedings{Asadullah:Zahurul:Khan:2007,
author = {Asadullah, Munshi and Islam, Md. Zahurul and Khan, Mumit},
title = {Error-tolerant Finite-state Recognizer and String Pattern Similarity
Based Spell-Checker for Bengali},
booktitle = {5th International Conference on Natural Language Processing (ICON)
as a poster,Hyderabad, India, January 2007},
abstract = {A crucial figure of merit for a spelling checker is not just whether
it can detect misspelled words, but also in how it ranks the sugges
tions for the word. Spelling checker algorithms using edit distance
methods tend to produce a large number of possibilities for misspelled
words. We propose an alternative approach to checking the spelling
of Bangla text that uses a finite state automaton (FSA) to probabilistically
create the suggestion list for a misspelled word. FSA has proven
to be an effective method for problems requiring probabilistic
solution and high error tolerance. We start by using a finite
state representation for all the words in the Bangla dictionary;
the algorithm then uses the state tables to test a string, and
in case of an erroneous string, try to find all possible solutions
by attempting singular and multi - step transitions to consume
one or more characters and using the su bsequent characters as
look - ahead; and finally, we use backtracking to add each possible
solution to the suggestion list. The use of finite state representation
for the word implies that the algorithm is much more efficient
in the case of non - inflected for ms; in case of nouns, it is
even more significant as Bangla nouns are heavily used in the
non - inflected form. In terms of error detection and correction,
the algorithm uses the statistics of Bangla error pattern and
thus produces a small number of signific ant suggestions. One
notable limitation is the inability to handle transposition errors
as a single edit distance errors. This is not as significant as
it may seem since the number of transposition errors are not as
common as other errors in Bangla. This p aper presents the structure
and the algorithm to implement a Practical Bangla spell - checker,
and discusses the results obtained from the prototype implementation.},
owner = {zahurul},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Error-tolerant_Finite-state_Recognizer_and_String_Pattern_Similarity_Based_Spell-Checker_for_Bengali.pdf},
timestamp = {2011.08.02},
year = {2007}
}
2007.
A Light Weight Stemmer for Bengali and Its Use in Spelling Checker. 1st International Conference on Digital Communications and Computer
Applications (DCCA2007).
BibTeX
@inproceedings{Zahurul:Uddin:Khan:2007,
author = {Islam, Md. Zahurul and Uddin, Md. Nizam and Khan, Mumit},
title = {A Light Weight Stemmer for Bengali and Its Use in Spelling Checker},
booktitle = {1st International Conference on Digital Communications and Computer
Applications (DCCA2007)},
abstract = {Stemming is an operation that splits a word into the constituent
root part and affix without doing complete morphological analysis.
It is used to impr ove the performance of spelling checkers and
informatio n retrieval applications, where morphological analysi
would be too computationally expensive. For spellin g checkers
specifically, using stemming may drastical ly reduce the dictionary
size, often a bottleneck for mobile and embedded devices. This
paper presents a computationally inexpensive stemming algorithm
for Bengali, which handles suffix removal in a domain independent
way. The evaluation of the proposed algorithm in a Bengali spelling
checker indicates t hat it can be effectively used in information
retrieval applications in general.},
owner = {zahurul},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/A_Light_Weight_Stemmer_for_Bengali_and_Its_Use_in_Spelling_Checker.pdf},
timestamp = {2011.08.02},
year = {2007}
}
2007.
Bangla Verb Morphology and a Multilingual Computational Morphology
FrameWork for PC-KIMMO. The Proceedings of Workshop on Morpho - Syntactic Analysis by
the School of Asian Applied Natural Language Processing for Language
Diversity and Language Resource Development (ADD), Bangkok, Thailand.
BibTeX
@inproceedings{Zahurul:Khan:2007,
author = {Islam, Md. Zahurul and Khan, Mumit},
title = {Bangla Verb Morphology and a Multilingual Computational Morphology
FrameWork for PC-KIMMO},
booktitle = {The Proceedings of Workshop on Morpho - Syntactic Analysis by
the School of Asian Applied Natural Language Processing for Language
Diversity and Language Resource Development (ADD), Bangkok, Thailand},
owner = {zahurul},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Bangla_Verb_Morphology_and_a_Multilingual_Computational_Morphology_FrameWork_for_PC-KIMMO-talk.pdf},
timestamp = {2011.08.02},
year = {2007}
}
2007.
Structural Classifiers of Text Types: Towards a Novel Model of
Text Representation. Journal for Language Technology and Computational
Linguistics (JLCL), 22(2):51–66.
BibTeX
@article{Mehler:Geibel:Pustylnikov:2007,
author = {Mehler, Alexander and Geibel, Peter and Abramov, Olga},
title = {Structural Classifiers of Text Types: Towards a Novel Model of
Text Representation},
journal = {Journal for Language Technology and Computational
Linguistics (JLCL)},
volume = {22},
number = {2},
pages = {51-66},
abstract = {Texts can be distinguished in terms of their content, function,
structure or layout (Brinker, 1992; Bateman et al., 2001; Joachims,
2002; Power et al., 2003). These reference points do not open
necessarily orthogonal perspectives on text classification. As
part of explorative data analysis, text classification aims at
automatically dividing sets of textual objects into classes of
maximum internal homogeneity and external heterogeneity. This
paper deals with classifying texts into text types whose instances
serve more or less homogeneous functions. Other than mainstream
approaches, which rely on the vector space model (Sebastiani,
2002) or some of its descendants (Baeza-Yates and Ribeiro-Neto,
1999) and, thus, on content-related lexical features, we solely
refer to structural differentiae. That is, we explore patterns
of text structure as determinants of class membership. Our starting
point are tree-like text representations which induce feature
vectors and tree kernels. These kernels are utilized in supervised
learning based on cross-validation as a method of model selection
(Hastie et al., 2001) by example of a corpus of press communication.
For a subset of categories we show that classification can be
performed very well by structural differentia only.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/mehler_geibel_pustylnikov_2007.pdf},
website = {http://citeseerx.ist.psu.edu/viewd