
Scientific Assistant
Goethe-Universität Frankfurt am Main
Robert-Mayer-Straße 10
Room 402
D-60325 Frankfurt am Main
D-60054 Frankfurt am Main (use for package delivery)
Postfach / P.O. Box: 154
Phone:
Fax:
Mail:
Office Hour: Thuesday, 8-10 AM
Follow me on ResearchGate
Thesis topic proposals
2025
Description
- TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts
- Unleashing annotations with TextAnnotator: Multimedia, multi-perspective document views for ubiquitous annotation
- TextAnnotator: A flexible framework for semantic annotations
Corresponding Lab Member:
Description
- Unlocking the Heterogeneous Landscape of Big Data NLP with DUUI
- Efficient, uniform and scalable parallel NLP pre-processing with DUUI: Perspectives and Best Practice for the Digital Humanities
Corresponding Lab Member:
If you have any suggestions of your own relating to this or our other proposed topics, please do not hesitate to contact us.
In addition, we provide a mailing list for free, which we use to inform regularly about updates on new qualification and research work as well as other information relating to Texttechnology.
Projects
TextAnnotator

In different disciplines, scholars are supported in their research by the use of digital methods to process increasingly large amounts of data. For the necessary annotation, tools are required which should meet at least the following general requirements: they can handle diverse data and annotation levels within one tool, and they support the annotation process with automatic (pre-)processing outcomes as much as possible. We developed a framework that meets these general requirements and that enables versatile and browser-based annotations of texts, the TextAnnotator. It combines NLP methods of pre-processing with methods of flexible post-processing. Infact, machine learning (ML) requires a lot of training and test data, but is usually far from achieving perfect results. Producing high-level annotations for ML and post-correcting its results are therefore necessary. This is the purpose of TextAnnotator, which is entirely implemented in ExtJS and provides a range of interactive visualizations of annotations. In addition, it allows for flexibly integrating knowledge resources, e.g. in the course of post-processing named entity recognition. The paper describes TextAnnotator‘s architecture together with different use cases: annotating temporal structures, argument structures, propositional structures, rhetorical structures and named entity linking.
Using TextAnnotator, please use the following citation:
BibTeX
@inproceedings{Abrami:Stoeckel:Mehler:2020,
author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
Annotation of Texts},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {891--900},
isbn = {979-10-95546-34-4},
abstract = {The annotation of texts and other material in the field of digital
humanities and Natural Language Processing (NLP) is a common task
of research projects. At the same time, the annotation of corpora
is certainly the most time- and cost-intensive component in research
projects and often requires a high level of expertise according
to the research interest. However, for the annotation of texts,
a wide range of tools is available, both for automatic and manual
annotation. Since the automatic pre-processing methods are not
error-free and there is an increasing demand for the generation
of training data, also with regard to machine learning, suitable
annotation tools are required. This paper defines criteria of
flexibility and efficiency of complex annotations for the assessment
of existing annotation tools. To extend this list of tools, the
paper describes TextAnnotator, a browser-based, multi-annotation
system, which has been developed to perform platform-independent
multimodal annotations and annotate complex textual structures.
The paper illustrates the current state of development of TextAnnotator
and demonstrates its ability to evaluate annotation quality (inter-annotator
agreement) at runtime. In addition, it will be shown how annotations
of different users can be performed simultaneously and collaboratively
on the same document from different platforms using UIMA as the
basis for annotation.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.112},
keywords = {textannotator},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}
Publications
BibTeX
@inproceedings{Abrami:et:al:2021,
author = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila
and Adeberg, Pascal and Mehler, Alexander},
title = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective
document views for ubiquitous annotation},
booktitle = {Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation},
series = {ISA-17},
publisher = {Association for Computational Linguistics},
address = {Groningen, The Netherlands (online)},
month = {June},
editor = {Bunt, Harry},
year = {2021},
url = {https://aclanthology.org/2021.isa-1.7},
pages = {65--75},
keywords = {textannotator},
pdf = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf},
abstract = {We argue that mainly due to technical innovation in the landscape
of annotation tools, a conceptual change in annotation models
and processes is also on the horizon. It is diagnosed that these
changes are bound up with multi-media and multi-perspective facilities
of annotation tools, in particular when considering virtual reality
(VR) and augmented reality (AR) applications, their potential
ubiquitous use, and the exploitation of externally trained natural
language pre-processing methods. Such developments potentially
lead to a dynamic and exploratory heuristic construction of the
annotation process. With TextAnnotator an annotation suite is
introduced which focuses on multi-mediality and multi-perspectivity
with an interoperable set of task-specific annotation modules
(e.g., for word classification, rhetorical structures, dependency
trees, semantic roles, and more) and their linkage to VR and mobile
implementations. The basic architecture and usage of TextAnnotator
is described and related to the above mentioned shifts in the
field.}
}
BibTeX
@inproceedings{Abrami:Mehler:Stoeckel:2020,
author = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
title = {{TextAnnotator}: A web-based annotation suite for texts},
booktitle = {Proceedings of the Digital Humanities 2020},
series = {DH 2020},
location = {Ottawa, Canada},
year = {2020},
url = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html},
doi = {http://dx.doi.org/10.17613/tenm-4907},
abstract = {The TextAnnotator is a tool for simultaneous and collaborative
annotation of texts with visual annotation support, integration
of knowledge bases and, by pipelining the TextImager, a rich variety
of pre-processing and automatic annotation tools. It includes
a variety of modules for the annotation of texts, which contains
the annotation of argumentative, rhetorical, propositional and
temporal structures as well as a module for named entity linking
and rapid annotation of named entities. Especially the modules
for annotation of temporal, argumentative and propositional structures
are currently unique in web-based annotation tools. The TextAnnotator,
which allows the annotation of texts as a platform, is divided
into a front- and a backend component. The backend is a web service
based on WebSockets, which integrates the UIMA Database Interface
to manage and use texts. Texts are made accessible by using the
ResourceManager and the AuthorityManager, based on user and group
access permissions. Different views of a document can be created
and used depending on the scenario. Once a document has been opened,
access is gained to the annotations stored within annotation views
in which these are organized. Any annotation view can be assigned
with access permissions and by default, each user obtains his
or her own user view for every annotated document. In addition,
with sufficient access permissions, all annotation views can also
be used and curated. This allows the possibility to calculate
an Inter-Annotator-Agreement for a document, which shows an agreement
between the annotators. Annotators without sufficient rights cannot
display this value so that the annotators do not influence each
other. This contribution is intended to reflect the current state
of development of TextAnnotator, demonstrate the possibilities
of an instantaneous Inter-Annotator-Agreement and trigger a discussion
about further functions for the community.},
keywords = {textannotator},
poster = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf}
}
BibTeX
@inproceedings{Abrami:Stoeckel:Mehler:2020,
author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
Annotation of Texts},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {891--900},
isbn = {979-10-95546-34-4},
abstract = {The annotation of texts and other material in the field of digital
humanities and Natural Language Processing (NLP) is a common task
of research projects. At the same time, the annotation of corpora
is certainly the most time- and cost-intensive component in research
projects and often requires a high level of expertise according
to the research interest. However, for the annotation of texts,
a wide range of tools is available, both for automatic and manual
annotation. Since the automatic pre-processing methods are not
error-free and there is an increasing demand for the generation
of training data, also with regard to machine learning, suitable
annotation tools are required. This paper defines criteria of
flexibility and efficiency of complex annotations for the assessment
of existing annotation tools. To extend this list of tools, the
paper describes TextAnnotator, a browser-based, multi-annotation
system, which has been developed to perform platform-independent
multimodal annotations and annotate complex textual structures.
The paper illustrates the current state of development of TextAnnotator
and demonstrates its ability to evaluate annotation quality (inter-annotator
agreement) at runtime. In addition, it will be shown how annotations
of different users can be performed simultaneously and collaboratively
on the same document from different platforms using UIMA as the
basis for annotation.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.112},
keywords = {textannotator},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}
BibTeX
@inproceedings{Abrami:et:al:2019,
author = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias
and Helfrich, Philipp},
title = {{TextAnnotator}: A flexible framework for semantic annotations},
booktitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, (ISA-15)},
series = {ISA-15},
location = {Gothenburg, Sweden},
month = {May},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf},
year = {2019},
keywords = {textannotator},
abstract = {Modern annotation tools should meet at least the following general
requirements: they can handle diverse data and annotation levels
within one tool, and they support the annotation process with
automatic (pre-)processing outcomes as much as possible. We developed
a framework that meets these general requirements and that enables
versatile and browser-based annotations of texts, the TextAnnotator.
It combines NLP methods of pre-processing with methods of flexible
post-processing. Infact, machine learning (ML) requires a lot
of training and test data, but is usually far from achieving perfect
results. Producing high-level annotations for ML and post-correcting
its results are therefore necessary. This is the purpose of TextAnnotator,
which is entirely implemented in ExtJS and provides a range of
interactive visualizations of annotations. In addition, it allows
for flexibly integrating knowledge resources, e.g. in the course
of post-processing named entity recognition. The paper describes
TextAnnotator’s architecture together with three use cases: annotating
temporal structures, argument structures and named entity linking.}
}
GerParCor

GerParCor is a genre-specific corpus of (predominantly historical) German-language parliamentary protocols from three centuries and four countries, including state and federal level data. In addition, GerParCor contains conversions of scanned protocols and, in particular, of protocols in Fraktur converted via an OCR process based on Tesseract. All protocols were preprocessed by means of the NLP pipeline of spaCy3 and automatically annotated with metadata regarding their session date. GerParCor is made available in the XMI format of the UIMA project. In this way, GerParCor can be used as a large corpus of historical texts in the field of political communication for various tasks in NLP.
The corpus (LREC 2022) is available via .
The latest corpus (LREC-COLING 2024) is available via .
BibTeX
@inproceedings{Abrami:et:al:2024:a,
abstract = {In 2022, the largest German-speaking corpus of parliamentary protocols
from three different centuries, on a national and federal level
from the countries of Germany, Austria, Switzerland and Liechtenstein,
was collected and published - GerParCor. Through GerParCor, it
became possible to provide for the first time various parliamentary
protocols which were not available digitally and, moreover, could
not be retrieved and processed in a uniform manner. Furthermore,
GerParCor was additionally preprocessed using NLP methods and
made available in XMI format. In this paper, GerParCor is significantly
updated by including all new parliamentary protocols in the corpus,
as well as adding and preprocessing further parliamentary protocols
previously not covered, so that a period up to 1797 is now covered.
Besides the integration of a new, state-of-the-art and appropriate
NLP preprocessing for the handling of large text corpora, this
update also provides an overview of the further reuse of GerParCor
by presenting various provisioning capabilities such as API's,
among others.},
address = {Torino, Italy},
author = {Abrami, Giuseppe and Bagci, Mevl{\"u}t and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
pages = {7707--7716},
publisher = {ELRA and ICCL},
title = {{G}erman Parliamentary Corpus ({G}er{P}ar{C}or) Reloaded},
url = {https://aclanthology.org/2024.lrec-main.681},
pdf = {https://aclanthology.org/2024.lrec-main.681.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/GerParCor_Reloaded_Poster.pdf},
video = {https://www.youtube.com/watch?v=5X-w_oXOAYo},
keywords = {gerparcor,corpus},
year = {2024}
}
BibTeX
@inproceedings{Abrami:Bagci:Hammerla:Mehler:2022,
author = {Abrami, Giuseppe and Bagci, Mevlüt and Hammerla, Leon and Mehler, Alexander},
editor = {Calzolari, Nicoletta and B\'echet, Fr\'ed\'eric and Blache, Philippe
and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara
and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H\'el\`ene
and Odijk, Jan and Piperidis, Stelios},
title = {German Parliamentary Corpus (GerParCor)},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
year = {2022},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {1900--1906},
abstract = {Parliamentary debates represent a large and partly unexploited
treasure trove of publicly accessible texts. In the German-speaking
area, there is a certain deficit of uniformly accessible and annotated
corpora covering all German-speaking parliaments at the national
and federal level. To address this gap, we introduce the German
Parliamentary Corpus (GerParCor). GerParCor is a genre-specific
corpus of (predominantly historical) German-language parliamentary
protocols from three centuries and four countries, including state
and federal level data. In addition, GerParCor contains conversions
of scanned protocols and, in particular, of protocols in Fraktur
converted via an OCR process based on Tesseract. All protocols
were preprocessed by means of the NLP pipeline of spaCy3 and automatically
annotated with metadata regarding their session date. GerParCor
is made available in the XMI format of the UIMA project. In this
way, GerParCor can be used as a large corpus of historical texts
in the field of political communication for various tasks in NLP.},
url = {https://aclanthology.org/2022.lrec-1.202},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2022/06/GerParCor_LREC_2022.pdf},
keywords = {gerparcor},
pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.202.pdf}
}
Docker Unified UIMA Interface (DUUI)

Automatic analysis of large corpora is a complex task, especially in terms of time efficiency. This complexity is increased by the fact that flexible, extensible text analysis requires the continuous integration of ever new tools. Since there are no adequate frameworks for these purposes in the field of NLP, and especially in the context of UIMA, that are not outdated or unusable for security reasons, we present a new approach to address the latter task: Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight, and feature-rich framework for automatic distributed analysis of text corpora that leverages Big Data experience and virtualization with Docker. The communication approach of DUUI compared to other state-of-the-art approaches has been evaluated and it shows outstanding behavior in terms of time efficiency, enabling the analysis of big text data.
DUUI is available via ; also some DUUI components as Docker images.
Using DUUI, please use the following citation
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui}
}
Publications related to DUUI
BibTeX
@article{Abrami:et:al:2025:a,
title = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
journal = {SoftwareX},
volume = {29},
pages = {102033},
year = {2025},
issn = {2352-7110},
doi = {https://doi.org/10.1016/j.softx.2024.102033},
url = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
author = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
and Alexander Mehler},
keywords = {duui, Docker, Kubernetes, UIMA, Distributed NLP},
abstract = {Processing large amounts of natural language text using machine
learning-based models is becoming important in many disciplines.
This demand is being met by a variety of approaches, resulting
in the heterogeneous deployment of separate, partly incompatible,
not natively scalable applications. To overcome the technological
bottleneck involved, we have developed Docker Unified UIMA Interface,
a system for the standardized, parallel, platform-independent,
distributed and microservices-based solution for processing large
and extensive text corpora with any NLP method. We present DUUI
as a framework that enables automated orchestration of GPU-based
NLP processes beyond the existing Docker Swarm cluster variant,
and in addition to the adaptation to new runtime environments
such as Kubernetes. Therefore, a new driver for DUUI is introduced,
which enables the lightweight orchestration of DUUI processes
within a Kubernetes environment in a scalable setup. In this way,
the paper opens up novel text-technological perspectives for existing
practices in disciplines that deal with the scientific analysis
of large amounts of data based on NLP.}
}
BibTeX
@inproceedings{Abrami:et:al:2025:b,
author = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
title = {DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing},
year = {2025},
booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities},
numpages = {3},
location = {Bielefeld, Germany},
series = {DHd 2025},
publisher = {Zenodo},
keywords = {duui},
pages = {446--448},
doi = {10.5281/zenodo.14943128},
url = {https://doi.org/10.5281/zenodo.14943128},
poster = {https://zenodo.org/records/14944575}
}
BibTeX
@inproceedings{Abrami:Mehler:2024,
author = {Abrami, Giuseppe and Mehler, Alexander},
title = {Efficient, uniform and scalable parallel NLP pre-processing with
DUUI: Perspectives and Best Practice for the Digital Humanities},
year = {2024},
month = {08},
editor = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
location = {Washington, DC, USA},
series = {DH},
keywords = {duui},
publisher = {Zenodo},
doi = {10.5281/zenodo.13761079},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Poster.pdf},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Abstract.pdf},
url = {https://doi.org/10.5281/zenodo.13761079},
pages = {15--18},
numpages = {4}
}
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui}
}
Publications
2025
BibTeX
@article{Bagci:et:al:2025,
author = {Bagci, Mevl{\"u}t and Mehler, Alexander and Abrami, Giuseppe and Schrottenbacher, Patrick
and Spiekermann, Christian and Konca, Maxim and Schreiber, Jakob and Saukel, Kevin
and Quintino, Marc and Engel, Juliane},
title = {Simulation-Based Learning in Virtual Reality: Three Use Cases
from Social Science and Technological Foundations in Terms of
Va.Si.Li-Lab},
journal = {Technology, Knowledge and Learning},
publisher = {Springer Nature},
year = {2025},
month = {April},
day = {01},
abstract = {This article examines the predictability of communication scenarios
within the context of simulation-based learning in virtual reality
(VR). The aim is to investigate multimodal patterns of social
interaction that accompany human communication in conflict situations.
Understanding these patterns can ultimately enhance educational
technologies' ability to address problematic learning situations
and support learners in benefiting from VR-based learning. To
achieve this, the system must accurately predict the task context.
A central goal of this article is to shed light on this potential.
Additionally, our research extends to visual communication beyond
purely linguistic interactions, aiming to enhance VR immersion
in communicative practices. To this end, the article examines
the associations between multimodal information units generated
by individuals interacting in three distinct learning scenarios:
work organization, school pedagogy, and social life. Several experiments
demonstrate that predictability exists when multimodal communication
is analyzed at the level of eight coarse-grained modalities, including
speech, head and body movements, and gestures. The interactions
are observed in VR using Va.Si.Li-Lab, a simulation-based system
that virtualizes learning scenarios, enabling participants to
collaboratively manage potentially conflicting tasks through multimodal
communication (Mehler et al. in: Duffy (ed) Digital human modeling
and applications in health, safety, ergonomics and risk management,
Springer Nature Switzerland, Cham, 2023). The article discusses
the technology underlying Va.Si.Li-Lab, its database, and the
post-processing of interaction data, including speech data. It
provides theoretical motivation for the application scenarios
and presents experimental data to illustrate the system's usefulness.
Based on these data, the article details experiments on the multimodal
detection of social scenarios, positioning Va.Si.Li-Lab as a use
case in simulation-based learning.},
issn = {2211-1670},
doi = {10.1007/s10758-025-09837-7},
url = {https://doi.org/10.1007/s10758-025-09837-7}
}
BibTeX
@inproceedings{Boenisch:et:al:2025,
title = {Towards Unified, Dynamic and Annotation-based Visualisations and
Exploration of Annotated Big Data Corpora with the Help of Unified
Corpus Explorer},
author = {Kevin B{\"o}nisch and Giuseppe Abrami and Alexander Mehler},
booktitle = {2025 Annual Conference of the North American Chapter of the Association
for Computational Linguistics -- System Demonstration Track},
year = {2025},
keywords = {uce,biofid},
video = {https://www.youtube.com/watch?v=f3kB9pNPjsk},
note = {accepted, Best Demo Award}
}
BibTeX
@article{Abrami:et:al:2025:a,
title = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
journal = {SoftwareX},
volume = {29},
pages = {102033},
year = {2025},
issn = {2352-7110},
doi = {https://doi.org/10.1016/j.softx.2024.102033},
url = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
author = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz
and Alexander Mehler},
keywords = {duui, Docker, Kubernetes, UIMA, Distributed NLP},
abstract = {Processing large amounts of natural language text using machine
learning-based models is becoming important in many disciplines.
This demand is being met by a variety of approaches, resulting
in the heterogeneous deployment of separate, partly incompatible,
not natively scalable applications. To overcome the technological
bottleneck involved, we have developed Docker Unified UIMA Interface,
a system for the standardized, parallel, platform-independent,
distributed and microservices-based solution for processing large
and extensive text corpora with any NLP method. We present DUUI
as a framework that enables automated orchestration of GPU-based
NLP processes beyond the existing Docker Swarm cluster variant,
and in addition to the adaptation to new runtime environments
such as Kubernetes. Therefore, a new driver for DUUI is introduced,
which enables the lightweight orchestration of DUUI processes
within a Kubernetes environment in a scalable setup. In this way,
the paper opens up novel text-technological perspectives for existing
practices in disciplines that deal with the scientific analysis
of large amounts of data based on NLP.}
}
BibTeX
@inproceedings{Abrami:et:al:2025:b,
author = {Abrami, Giuseppe and Baumartz, Daniel and Mehler, Alexander},
title = {DUUI: A Toolbox for the Construction of a new Kind of Natural
Language Processing},
year = {2025},
booktitle = {Proceedings of the DHd 2025: Under Construction. Geisteswissenschaften
und Data Humanities},
numpages = {3},
location = {Bielefeld, Germany},
series = {DHd 2025},
publisher = {Zenodo},
keywords = {duui},
pages = {446--448},
doi = {10.5281/zenodo.14943128},
url = {https://doi.org/10.5281/zenodo.14943128},
poster = {https://zenodo.org/records/14944575}
}
BibTeX
@article{Schrottenbacher:et:al:2025,
author = {Schrottenbacher, Patrick and Mehler, Alexander and Berg, Theresa
and Hustedt, Jasper and Gagel, Julian and Lüttig, Timo and Abrami, Giuseppe},
title = {Geo-spatial hypertext in virtual reality: mapping and navigating
global news event spaces},
journal = {New Review of Hypermedia and Multimedia},
volume = {31},
number = {1-2},
pages = {76--105},
year = {2025},
publisher = {Taylor \& Francis},
doi = {10.1080/13614568.2024.2383601},
url = {https://doi.org/10.1080/13614568.2024.2383601},
eprint = {https://doi.org/10.1080/13614568.2024.2383601},
abstract = {Every day, a myriad of events take place that are documented and
shared online through news articles from a variety of sources.
As a result, as users navigate the Web, the volume of data can
lead to information overload, making it difficult to find specific
details about an event. We present News in Time and Space (NiTS)
to address this issue: NiTS is a fully immersive system integrated
into Va.Si.Li-Lab that organises textual information in a geospatial
hypertext system in virtual reality. With NiTS, users can visualise,
filter and interact with information currently based on GDELT
on a virtual globe providing document networks to analyse global
events and trends. The article describes NiTS, its event semantics
and architecture. It evaluates NiTS in comparison to a classic
search engine website, extended by NiTSs information filtering
capabilities to make it comparable. Our comparison with this website
technology, which is directly linked to the user's usage habits,
shows that NiTS enables comparable information exploration even
if the users have little or no experience with VR. That is, we
observe an equivalent search result behaviour, but with the advantage
that VR allows users to get their results with a higher level
of usability without distracting them from their tasks. Through
its integration with Va.Si.Li-Lab, a simulation-based learning
environment, NiTS can be used in simulations of learning processes
aimed at studying critical online reasoning, where Va.Si.Li-Lab
guarantees that this can be done in relation to individual or
groups of learners.}
}
BibTeX
@article{Boenisch:et:al:2025:b,
author = {B\"{o}nisch, Kevin and Mehler, Alexander and Babbili, Shaduan
and Heinrich, Yannick and Stephan, Philipp and Abrami, Giuseppe},
abstract = {We present Viki LibraRy, a dynamically built library in virtual
reality (VR) designed to visualize hypertext systems, with an
emphasis on collaborative interaction and spatial immersion. Viki
LibraRy goes beyond traditional methods of text distribution by
providing a platform where users can share, process, and engage
with textual information. It operates at the interface of VR,
collaborative learning and spatial data processing to make reading
tangible and memorable in a spatially mediated way. The article
describes the building blocks of Viki LibraRy, its underlying
architecture, and several use cases. It evaluates Viki LibraRy
in comparison to a conventional web interface for text retrieval
and reading. The article shows that Viki LibraRy provides users
with spatial references for structuring their recall, so that
they can better remember consulted texts and their meta-information
(e.g. in terms of subject areas and content categories)},
title = {{Viki LibraRy: Collaborative Hypertext Browsing and Navigation
in Virtual Reality}},
journal = {New Review of Hypermedia and Multimedia},
volume = {31},
number = {1-2},
pages = {45--75},
year = {2025},
publisher = {Taylor \& Francis},
doi = {10.1080/13614568.2024.2383581},
url = {https://doi.org/10.1080/13614568.2024.2383581},
eprint = {https://doi.org/10.1080/13614568.2024.2383581}
}
2024
BibTeX
@inbook{Mehler:et:al:2024:a,
author = {Mehler, Alexander and Bagci, Mevl{\"u}t and Schrottenbacher, Patrick
and Henlein, Alexander and Konca, Maxim and Abrami, Giuseppe and B{\"o}nisch, Kevin
and Stoeckel, Manuel and Spiekermann, Christian and Engel, Juliane},
editor = {Zlatkin-Troitschanskaia, Olga and Nagel, Marie-Theres and Klose, Verena
and Mehler, Alexander},
title = {Towards New Data Spaces for the Study of Multiple Documents with
Va.Si.Li-Lab: A Conceptual Analysis},
booktitle = {Students', Graduates' and Young Professionals' Critical Use of
Online Information: Digital Performance Assessment and Training
within and across Domains},
year = {2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {259--303},
abstract = {The constitution of multiple documents has so far been studied
essentially as a process in which a single learner consults a
number (of segments) of different documents in the context of
the task at hand in order to construct a mental model for the
purpose of completing the task. As a result of this research focus,
the constitution of multiple documents appears predominantly as
a monomodal, non-interactive process in which mainly textual units
are studied, supplemented by images, text-image relations and
comparable artifacts. This approach is reflected in the contextual
fixity of the research design, in which the learners under study
search for information using suitably equipped computers. If,
on the other hand, we consider the openness of multi-agent learning
situations, this scenario lacks the aspects of interactivity,
contextual openness and, above all, the multimodality of information
objects, information processing and information exchange. This
is where the chapter comes in. It describes Va.Si.Li-Lab as an
instrument for multimodal measurement for studying and modeling
multiple documents in the context of interactive learning in a
multi-agent environment. To this end, the chapter places Va.Si.Li-Lab
in the spectrum of evolutionary approaches that vary the combination
of human and machine innovation and selection. It also combines
the requirements of multimodal representational learning with
various aspects of contextual plasticity to prepare Va.Si.Li-Lab
as a system that can be used for experimental research. The chapter
is conceptual in nature, designing a system of requirements using
the example of Va.Si.Li-Lab to outline an experimental environment
in which the study of Critical Online Reasoning (COR) as a group
process becomes possible. Although the chapter illustrates some
of these requirements with realistic data from the field of simulation-based
learning, the focus is still conceptual rather than experimental,
hypothesis-driven. That is, the chapter is concerned with the
design of a technology for future research into COR processes.},
isbn = {978-3-031-69510-0},
doi = {10.1007/978-3-031-69510-0_12},
url = {https://doi.org/10.1007/978-3-031-69510-0_12}
}
BibTeX
@inproceedings{Abrami:et:al:2024:b,
author = {Abrami, Giuseppe and Wontke, Dominik Alexander and Singh, Gurpreet
and Mehler, Alexander},
title = {Va.Si.Li-ES: VR-based Dynamic Event Processing, Environment Change
and User Feedback in Va.Si.Li-Lab},
year = {2024},
isbn = {9798400705953},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3648188.3675154},
doi = {10.1145/3648188.3675154},
abstract = {Flexibility, adaptability, modularity, and extensibility in the
context of a collaborative system are critical features for multi-user
hypertext systems. In addition to facilitating acceptance and
increasing reusability, these features simplify development cycles
and enable a larger range of application areas. However, especially
in virtual 3D hypertext systems, many of the features are only
partially available or not available at all. To fill this gap,
we present an approach to virtual hypertext systems for the realization
of dynamic event systems. Such an event system can be created
and serialized simultaneously at run time regarding the modification
of situational, environmental parameters. This includes informing
users and allowing them to participate in the environmental dynamics
of the system. We present Va.Si.Li-ES as a module of Va.Si.Li-Lab,
describe several environmental scenarios that can be adapted,
and provide use cases in the context of 3D hypertext systems.},
booktitle = {Proceedings of the 35th ACM Conference on Hypertext and Social Media},
pages = {357–-368},
numpages = {12},
keywords = {Collaborative Simulation, Environmental Event System, Hypertext, Ubiq, Va.Si.Li-Lab, Virtual Reality},
location = {Poznan, Poland},
series = {HT '24}
}
BibTeX
@inproceedings{Abrami:Mehler:2024,
author = {Abrami, Giuseppe and Mehler, Alexander},
title = {Efficient, uniform and scalable parallel NLP pre-processing with
DUUI: Perspectives and Best Practice for the Digital Humanities},
year = {2024},
month = {08},
editor = {Karajgikar, Jajwalya and Janco, Andrew and Otis, Jessica},
booktitle = {Digital Humanities Conference 2024 - Book of Abstracts (DH 2024)},
location = {Washington, DC, USA},
series = {DH},
keywords = {duui},
publisher = {Zenodo},
doi = {10.5281/zenodo.13761079},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Poster.pdf},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2024/12/DH2024_Abstract.pdf},
url = {https://doi.org/10.5281/zenodo.13761079},
pages = {15--18},
numpages = {4}
}
BibTeX
@inproceedings{Luecking:et:al:2024,
abstract = {Purpose: Based on the examples of English and German, we investigate
to what extent parsers trained on modern variants of these languages
can be transferred to older language levels without loss. Methods:
We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT)
which covers, roughly, the time period from 1800 until today,
in conjunction with the further development of the annotation
tool DependencyAnnotator. DoTT consists of a collection of diachronic
corpora enriched with dependency annotations using 3 parsers,
6 pre-trained language models, 5 newly trained models for German,
and two tag sets (TIGER and Universal Dependencies). To assess
how the different parsers perform on texts from different time
periods, we created a gold standard sample as a benchmark. Results:
We found that the parsers/models perform quite well on modern
texts (document-level LAS ranging from 82.89 to 88.54) and slightly
worse on older texts, as expected (average document-level LAS
84.60 vs. 86.14), but not significantly. For German texts, the
(German) TIGER scheme achieved slightly better results than UD.
Conclusion: Overall, this result speaks for the transferability
of parsers to past language levels, at least dating back until
around 1800. This very transferability, it is however argued,
means that studies of language change in the field of dependency
syntax can draw on dependency distance but miss out on some grammatical
phenomena.},
address = {Torino, Italy},
author = {L{\"u}cking, Andy and Abrami, Giuseppe and Hammerla, Leon and Rahn, Marc
and Baumartz, Daniel and Eger, Steffen and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
month = {may},
pages = {4641--4653},
publisher = {ELRA and ICCL},
title = {Dependencies over Times and Tools ({D}o{TT})},
url = {https://aclanthology.org/2024.lrec-main.415},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/LREC_2024_Poster_DoTT.pdf},
year = {2024}
}
BibTeX
@inproceedings{Abrami:et:al:2024:a,
abstract = {In 2022, the largest German-speaking corpus of parliamentary protocols
from three different centuries, on a national and federal level
from the countries of Germany, Austria, Switzerland and Liechtenstein,
was collected and published - GerParCor. Through GerParCor, it
became possible to provide for the first time various parliamentary
protocols which were not available digitally and, moreover, could
not be retrieved and processed in a uniform manner. Furthermore,
GerParCor was additionally preprocessed using NLP methods and
made available in XMI format. In this paper, GerParCor is significantly
updated by including all new parliamentary protocols in the corpus,
as well as adding and preprocessing further parliamentary protocols
previously not covered, so that a period up to 1797 is now covered.
Besides the integration of a new, state-of-the-art and appropriate
NLP preprocessing for the handling of large text corpora, this
update also provides an overview of the further reuse of GerParCor
by presenting various provisioning capabilities such as API's,
among others.},
address = {Torino, Italy},
author = {Abrami, Giuseppe and Bagci, Mevl{\"u}t and Mehler, Alexander},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro
and Sakti, Sakriani and Xue, Nianwen},
pages = {7707--7716},
publisher = {ELRA and ICCL},
title = {{G}erman Parliamentary Corpus ({G}er{P}ar{C}or) Reloaded},
url = {https://aclanthology.org/2024.lrec-main.681},
pdf = {https://aclanthology.org/2024.lrec-main.681.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2024/05/GerParCor_Reloaded_Poster.pdf},
video = {https://www.youtube.com/watch?v=5X-w_oXOAYo},
keywords = {gerparcor,corpus},
year = {2024}
}
2023
BibTeX
@inproceedings{Boenisch:et:al:2023,
title = {{Bundestags-Mine}: Natural Language Processing for Extracting
Key Information from Government Documents},
isbn = {9781643684734},
issn = {1879-8314},
url = {http://dx.doi.org/10.3233/FAIA230996},
doi = {10.3233/faia230996},
booktitle = {Legal Knowledge and Information Systems},
publisher = {IOS Press},
author = {B\"{o}nisch, Kevin and Abrami, Giuseppe and Wehnert, Sabine and Mehler, Alexander},
year = {2023}
}
BibTeX
@inproceedings{Leonhardt:et:al:2023,
title = {Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}},
author = {Leonhardt, Alexander and Abrami, Giuseppe and Baumartz, Daniel
and Mehler, Alexander},
editor = {Bouamor, Houda and Pino, Juan and Bali, Kalika},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
year = {2023},
address = {Singapore},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2023.findings-emnlp.29},
pages = {385--399},
pdf = {https://aclanthology.org/2023.findings-emnlp.29.pdf},
abstract = {Automatic analysis of large corpora is a complex task, especially
in terms of time efficiency. This complexity is increased by the
fact that flexible, extensible text analysis requires the continuous
integration of ever new tools. Since there are no adequate frameworks
for these purposes in the field of NLP, and especially in the
context of UIMA, that are not outdated or unusable for security
reasons, we present a new approach to address the latter task:
Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight,
and feature-rich framework for automatic distributed analysis
of text corpora that leverages Big Data experience and virtualization
with Docker. We evaluate DUUI{'}s communication approach against
a state-of-the-art approach and demonstrate its outstanding behavior
in terms of time efficiency, enabling the analysis of big text
data.},
keywords = {duui}
}
BibTeX
@inproceedings{Babbili:et:al:2023,
author = {Babbili, Shaduan and B\"{o}nisch, Kevin and Heinrich, Yannick
and Stephan, Philipp and Abrami, Giuseppe and Mehler, Alexander},
title = {Viki LibraRy: A Virtual Reality Library for Collaborative Browsing
and Navigation through Hypertext},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609079},
doi = {10.1145/3603163.3609079},
abstract = {We present Viki LibraRy, a virtual-reality-based system for generating
and exploring online information as a spatial hypertext. It creates
a virtual library based on Wikipedia in which Rooms are used to
make data available via a RESTful backend. In these Rooms, users
can browse through all articles of the corresponding Wikipedia
category in the form of Books. In addition, users can access different
Rooms, through virtual portals. Beyond that, the explorations
can be done alone or collaboratively, using Ubiq.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {6},
numpages = {3},
keywords = {virtual reality simulation, virtual reality, virtual hypertext, virtual museum},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609079}
}
BibTeX
@inproceedings{Gagel:et:al:2023,
author = {Gagel, Julian and Hustedt, Jasper and L\"{u}ttig, Timo and Berg, Theresa
and Abrami, Giuseppe and Mehler, Alexander},
title = {News in Time and Space: Global Event Exploration in Virtual Reality},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609080},
doi = {10.1145/3603163.3609080},
abstract = {We present News in Time and Space (NiTS), a virtual reality application
for visualization, filtering and interaction with geo-referenced
events based on GDELT. It can be used both via VR glasses and
as a desktop solution for shared use by multiple users with Ubiq.
The aim of NiTS is to provide overviews of global events and trends
in order to create a resource for their monitoring and analysis.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {7},
numpages = {3},
keywords = {virtual hypertext, human data interaction, spatial computing, virtual reality simulation, geographic information systems, virtual reality},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609080}
}
BibTeX
@inproceedings{Abrami:et:al:2023,
author = {Abrami, Giuseppe and Mehler, Alexander and Bagci, Mevl\"{u}t and Schrottenbacher, Patrick
and Henlein, Alexander and Spiekermann, Christian and Engel, Juliane
and Schreiber, Jakob},
title = {Va.Si.Li-Lab as a Collaborative Multi-User Annotation Tool in
Virtual Reality and Its Potential Fields of Application},
year = {2023},
isbn = {9798400702327},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3603163.3609076},
doi = {10.1145/3603163.3609076},
abstract = {During the last thirty years a variety of hypertext approaches
and virtual environments -- some virtual hypertext environments
-- have been developed and discussed. Although the development
of virtual and augmented reality technologies is rapid and improving,
and many technologies can be used at affordable conditions, their
usability for hypertext systems has not yet been explored. At
the same time, even for virtual three-dimensional virtual and
augmented environments, there is no generally accepted concept
that is similar or nearly as elegant as hypertext. This gap will
have to be filled in the next years and a good concept should
be developed; in this article we aim to contribute in this direction
and also introduce a prototype for a possible implementation of
criteria for virtual hypertext simulations.},
booktitle = {Proceedings of the 34th ACM Conference on Hypertext and Social Media},
articleno = {22},
numpages = {9},
keywords = {VaSiLiLab, virtual hypertext, virtual reality, virtual reality simulation, authoring system},
location = {Rome, Italy},
series = {HT '23},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3603163.3609076}
}
BibTeX
@inproceedings{Henlein:et:al:2023b,
author = {Henlein, Alexander and Kett, Attila and Baumartz, Daniel and Abrami, Giuseppe
and Mehler, Alexander and Bastian, Johannes and Blecher, Yannic and Budgenhagen, David
and Christof, Roman and Ewald, Tim-Oliver and Fauerbach, Tim and Masny, Patrick
and Mende, Julian and Schn{\"u}re, Paul and Viel, Marc},
editor = {Duffy, Vincent G.},
title = {Semantic Scene Builder: Towards a Context Sensitive Text-to-3D Scene Framework},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
year = {2023},
publisher = {Springer Nature Switzerland},
address = {Cham},
pages = {461--479},
abstract = {We introduce Semantic Scene Builder (SeSB), a VR-based text-to-3D
scene framework using SemAF (Semantic Annotation Framework) as
a scheme for annotating discourse structures. SeSB integrates
a variety of tools and resources by using SemAF and UIMA as a
unified data structure to generate 3D scenes from textual descriptions.
Based on VR, SeSB allows its users to change annotations through
body movements instead of symbolic manipulations: from annotations
in texts to corrections in editing steps to adjustments in generated
scenes, all this is done by grabbing and moving objects. We evaluate
SeSB in comparison with a state-of-the-art open source text-to-scene
method (the only one which is publicly available) and find that
our approach not only performs better, but also allows for modeling
a greater variety of scenes.},
isbn = {978-3-031-35748-0},
doi = {10.1007/978-3-031-35748-0_32}
}
BibTeX
@inproceedings{Mehler:et:al:2023:a,
abstract = {Simulation-based learning is a method in which learners learn
to master real-life scenarios and tasks from simulated application
contexts. It is particularly suitable for the use of VR technologies,
as these allow immersive experiences of the targeted scenarios.
VR methods are also relevant for studies on online learning, especially
in groups, as they provide access to a variety of multimodal learning
and interaction data. However, VR leads to a trade-off between
technological conditions of the observability of such data and
the openness of learner behavior. We present Va.Si.Li-Lab, a VR-L
ab for Simulation-based Learn ing developed to address this trade-off.
Va.Si.Li-Lab uses a graph-theoretical model based on hypergraphs
to represent the data diversity of multimodal learning and interaction.
We develop this data model in relation to mono- and multimodal,
intra- and interpersonal data and interleave it with ISO-Space
to describe distributed multiple documents from the perspective
of their interactive generation. The paper adds three use cases
to motivate the broad applicability of Va.Si.Li-Lab and its data
model.},
address = {Cham},
author = {Mehler, Alexander and Bagci, Mevl{\"u}t and Henlein, Alexander
and Abrami, Giuseppe and Spiekermann, Christian and Schrottenbacher, Patrick
and Konca, Maxim and L{\"u}cking, Andy and Engel, Juliane and Quintino, Marc
and Schreiber, Jakob and Saukel, Kevin and Zlatkin-Troitschanskaia, Olga},
booktitle = {Digital Human Modeling and Applications in Health, Safety, Ergonomics
and Risk Management},
editor = {Duffy, Vincent G.},
isbn = {978-3-031-35741-1},
pages = {539--565},
publisher = {Springer Nature Switzerland},
title = {A Multimodal Data Model for Simulation-Based Learning with Va.Si.Li-Lab},
year = {2023},
doi = {10.1007/978-3-031-35741-1_39}
}
2022
BibTeX
@inproceedings{Abrami:Bagci:Hammerla:Mehler:2022,
author = {Abrami, Giuseppe and Bagci, Mevlüt and Hammerla, Leon and Mehler, Alexander},
editor = {Calzolari, Nicoletta and B\'echet, Fr\'ed\'eric and Blache, Philippe
and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara
and Isahara, Hitoshi and Maegaard, Bente and Mariani, Joseph and Mazo, H\'el\`ene
and Odijk, Jan and Piperidis, Stelios},
title = {German Parliamentary Corpus (GerParCor)},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
year = {2022},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {1900--1906},
abstract = {Parliamentary debates represent a large and partly unexploited
treasure trove of publicly accessible texts. In the German-speaking
area, there is a certain deficit of uniformly accessible and annotated
corpora covering all German-speaking parliaments at the national
and federal level. To address this gap, we introduce the German
Parliamentary Corpus (GerParCor). GerParCor is a genre-specific
corpus of (predominantly historical) German-language parliamentary
protocols from three centuries and four countries, including state
and federal level data. In addition, GerParCor contains conversions
of scanned protocols and, in particular, of protocols in Fraktur
converted via an OCR process based on Tesseract. All protocols
were preprocessed by means of the NLP pipeline of spaCy3 and automatically
annotated with metadata regarding their session date. GerParCor
is made available in the XMI format of the UIMA project. In this
way, GerParCor can be used as a large corpus of historical texts
in the field of political communication for various tasks in NLP.},
url = {https://aclanthology.org/2022.lrec-1.202},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2022/06/GerParCor_LREC_2022.pdf},
keywords = {gerparcor},
pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.202.pdf}
}
BibTeX
@inproceedings{Luecking:Stoeckel:Abrami:Mehler:2022,
author = {L{\"u}cking, Andy and Stoeckel, Manuel and Abrami, Giuseppe and Mehler, Alexander},
title = {I still have Time(s): Extending {HeidelTime} for {German} Texts},
booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference},
series = {LREC 2022},
location = {Marseille, France},
year = {2022},
url = {https://aclanthology.org/2022.lrec-1.505},
pdf = {https://aclanthology.org/2022.lrec-1.505.pdf}
}
2021
BibTeX
@incollection{Henlein:et:al:2021,
author = {Alexander Henlein and Giuseppe Abrami and Attila Kett and Christian Spiekermann
and Alexander Mehler},
title = {Digital Learning, Teaching and Collaboration in an Era of ubiquitous Quarantine},
editor = {Linda Daniela and Anna Visvizin},
booktitle = {Remote Learning in Times of Pandemic - Issues, Implications and Best Practice},
publisher = {Routledge},
address = {Thames, Oxfordshire, England, UK},
year = {2021},
chapter = {3}
}
BibTeX
@article{Luecking:et:al:2021,
author = {Andy Lücking and Christine Driller and Manuel Stoeckel and Giuseppe Abrami
and Adrian Pachzelt and Alexander Mehler},
year = {2021},
journal = {Language Resources and Evaluation},
title = {Multiple Annotation for Biodiversity: Developing an annotation
framework among biology, linguistics and text technology},
editor = {Nancy Ide and Nicoletta Calzolari},
doi = {10.1007/s10579-021-09553-5},
pdf = {https://link.springer.com/content/pdf/10.1007/s10579-021-09553-5.pdf},
keywords = {biofid}
}
BibTeX
@inproceedings{Fischer:et:al:2021,
author = {Fischer, Pascal and Smajic, Alen and Abrami, Giuseppe and Mehler, Alexander},
title = {Multi-Type-TD-TSR - Extracting Tables from Document Images using
a Multi-stage Pipeline for Table Detection and Table Structure
Recognition: from OCR to Structured Table Representations},
booktitle = {Proceedings of the 44th German Conference on Artificial Intelligence},
series = {KI2021},
location = {Berlin, Germany},
year = {2021},
url = {https://www.springerprofessional.de/multi-type-td-tsr-extracting-tables-from-document-images-using-a/19711570},
pdf = {https://arxiv.org/pdf/2105.11021.pdf}
}
BibTeX
@inproceedings{Abrami:et:al:2021,
author = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila
and Adeberg, Pascal and Mehler, Alexander},
title = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective
document views for ubiquitous annotation},
booktitle = {Proceedings of the 17th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation},
series = {ISA-17},
publisher = {Association for Computational Linguistics},
address = {Groningen, The Netherlands (online)},
month = {June},
editor = {Bunt, Harry},
year = {2021},
url = {https://aclanthology.org/2021.isa-1.7},
pages = {65--75},
keywords = {textannotator},
pdf = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf},
abstract = {We argue that mainly due to technical innovation in the landscape
of annotation tools, a conceptual change in annotation models
and processes is also on the horizon. It is diagnosed that these
changes are bound up with multi-media and multi-perspective facilities
of annotation tools, in particular when considering virtual reality
(VR) and augmented reality (AR) applications, their potential
ubiquitous use, and the exploitation of externally trained natural
language pre-processing methods. Such developments potentially
lead to a dynamic and exploratory heuristic construction of the
annotation process. With TextAnnotator an annotation suite is
introduced which focuses on multi-mediality and multi-perspectivity
with an interoperable set of task-specific annotation modules
(e.g., for word classification, rhetorical structures, dependency
trees, semantic roles, and more) and their linkage to VR and mobile
implementations. The basic architecture and usage of TextAnnotator
is described and related to the above mentioned shifts in the
field.}
}
BibTeX
@article{Luecking:Brueckner:Abrami:Uslu:Mehler:2021,
journal = {Frontiers in Education},
doi = {10.3389/feduc.2020.578475},
title = {Computational linguistic assessment of textbooks and online texts
by means of threshold concepts in economics},
author = {L{\"u}cking, Andy and Br{\"u}ckner, Sebastian and Abrami, Giuseppe
and Uslu, Tolga and Mehler, Alexander},
eid = {578475},
url = {https://www.frontiersin.org/articles/10.3389/feduc.2020.578475/},
year = {2021}
}
2020
BibTeX
@article{Luecking:et:al:2020,
author = {Andy L{\"{u}}cking and Sebastian Br{\"{u}}ckner and Giuseppe Abrami
and Tolga Uslu and Alexander Mehler},
title = {Computational linguistic assessment of textbook and online learning
media by means of threshold concepts in business education},
journal = {CoRR},
volume = {abs/2008.02096},
year = {2020},
url = {https://arxiv.org/abs/2008.02096},
archiveprefix = {arXiv},
eprint = {2008.02096},
timestamp = {Fri, 07 Aug 2020 15:07:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-02096.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
BibTeX
@article{Driller:et:al:2020,
author = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati
and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek},
title = {Fast and Easy Access to Central European Biodiversity Data with BIOfid},
volume = {4},
number = {},
year = {2020},
doi = {10.3897/biss.4.59157},
publisher = {Pensoft Publishers},
abstract = {The storage of data in public repositories such as the Global
Biodiversity Information Facility (GBIF) or the National Center
for Biotechnology Information (NCBI) is nowadays stipulated in
the policies of many publishers in order to facilitate data replication
or proliferation. Species occurrence records contained in legacy
printed literature are no exception to this. The extent of their
digital and machine-readable availability, however, is still far
from matching the existing data volume (Thessen and Parr 2014).
But precisely these data are becoming more and more relevant to
the investigation of ongoing loss of biodiversity. In order to
extract species occurrence records at a larger scale from available
publications, one has to apply specialised text mining tools.
However, such tools are in short supply especially for scientific
literature in the German language.The Specialised Information
Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims
at reducing this desideratum, inter alia, by preparing a searchable
text corpus semantically enriched by a new kind of multi-label
annotation. For this purpose, we feed manual annotations into
automatic, machine-learning annotators. This mixture of automatic
and manual methods is needed, because BIOfid approaches a new
application area with respect to language (mainly German of the
19th century), text type (biological reports), and linguistic
focus (technical and everyday language).We will present current
results of the performance of BIOfid’s semantic search engine
and the application of independent natural language processing
(NLP) tools. Most of these are freely available online, such as
TextImager (Hemati et al. 2016). We will show how TextImager is
tied into the BIOfid pipeline and how it is made scalable (e.g.
extendible by further modules) and usable on different systems
(docker containers).Further, we will provide a short introduction
to generating machine-learning training data using TextAnnotator
(Abrami et al. 2019) for multi-label annotation. Annotation reproducibility
can be assessed by the implementation of inter-annotator agreement
methods (Abrami et al. 2020). Beyond taxon recognition and entity
linking, we place particular emphasis on location and time information.
For this purpose, our annotation tag-set combines general categories
and biology-specific categories (including taxonomic names) with
location and time ontologies. The application of the annotation
categories is regimented by annotation guidelines (Lücking et
al. 2020). Within the next years, our work deliverable will be
a semantically accessible and data-extractable text corpus of
around two million pages. In this way, BIOfid is creating a new
valuable resource that expands our knowledge of biodiversity and
its determinants.},
issn = {},
pages = {e59157},
url = {https://doi.org/10.3897/biss.4.59157},
eprint = {https://doi.org/10.3897/biss.4.59157},
journal = {Biodiversity Information Science and Standards},
keywords = {biofid}
}
BibTeX
@inproceedings{Abrami:Mehler:Stoeckel:2020,
author = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel},
title = {{TextAnnotator}: A web-based annotation suite for texts},
booktitle = {Proceedings of the Digital Humanities 2020},
series = {DH 2020},
location = {Ottawa, Canada},
year = {2020},
url = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html},
doi = {http://dx.doi.org/10.17613/tenm-4907},
abstract = {The TextAnnotator is a tool for simultaneous and collaborative
annotation of texts with visual annotation support, integration
of knowledge bases and, by pipelining the TextImager, a rich variety
of pre-processing and automatic annotation tools. It includes
a variety of modules for the annotation of texts, which contains
the annotation of argumentative, rhetorical, propositional and
temporal structures as well as a module for named entity linking
and rapid annotation of named entities. Especially the modules
for annotation of temporal, argumentative and propositional structures
are currently unique in web-based annotation tools. The TextAnnotator,
which allows the annotation of texts as a platform, is divided
into a front- and a backend component. The backend is a web service
based on WebSockets, which integrates the UIMA Database Interface
to manage and use texts. Texts are made accessible by using the
ResourceManager and the AuthorityManager, based on user and group
access permissions. Different views of a document can be created
and used depending on the scenario. Once a document has been opened,
access is gained to the annotations stored within annotation views
in which these are organized. Any annotation view can be assigned
with access permissions and by default, each user obtains his
or her own user view for every annotated document. In addition,
with sufficient access permissions, all annotation views can also
be used and curated. This allows the possibility to calculate
an Inter-Annotator-Agreement for a document, which shows an agreement
between the annotators. Annotators without sufficient rights cannot
display this value so that the annotators do not influence each
other. This contribution is intended to reflect the current state
of development of TextAnnotator, demonstrate the possibilities
of an instantaneous Inter-Annotator-Agreement and trigger a discussion
about further functions for the community.},
keywords = {textannotator},
poster = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf}
}
BibTeX
@inproceedings{Abrami:Stoeckel:Mehler:2020,
author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander},
title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative
Annotation of Texts},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference},
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {891--900},
isbn = {979-10-95546-34-4},
abstract = {The annotation of texts and other material in the field of digital
humanities and Natural Language Processing (NLP) is a common task
of research projects. At the same time, the annotation of corpora
is certainly the most time- and cost-intensive component in research
projects and often requires a high level of expertise according
to the research interest. However, for the annotation of texts,
a wide range of tools is available, both for automatic and manual
annotation. Since the automatic pre-processing methods are not
error-free and there is an increasing demand for the generation
of training data, also with regard to machine learning, suitable
annotation tools are required. This paper defines criteria of
flexibility and efficiency of complex annotations for the assessment
of existing annotation tools. To extend this list of tools, the
paper describes TextAnnotator, a browser-based, multi-annotation
system, which has been developed to perform platform-independent
multimodal annotations and annotate complex textual structures.
The paper illustrates the current state of development of TextAnnotator
and demonstrates its ability to evaluate annotation quality (inter-annotator
agreement) at runtime. In addition, it will be shown how annotations
of different users can be performed simultaneously and collaboratively
on the same document from different platforms using UIMA as the
basis for annotation.},
url = {https://www.aclweb.org/anthology/2020.lrec-1.112},
keywords = {textannotator},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf}
}
BibTeX
@inproceedings{Abrami:Henlein:Kett:Mehler:2020,
author = {Abrami, Giuseppe and Henlein, Alexander and Kett, Attila and Mehler, Alexander},
title = {{Text2SceneVR}: Generating Hypertexts with VAnnotatoR as a Pre-processing
Step for Text2Scene Systems},
booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media},
series = {HT ’20},
year = {2020},
location = {Virtual Event, USA},
isbn = {9781450370981},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3372923.3404791},
doi = {10.1145/3372923.3404791},
pages = {177–186},
numpages = {10},
pdf = {https://dl.acm.org/doi/pdf/10.1145/3372923.3404791}
}
BibTeX
@article{Mehler:et:al:2020b,
author = {Mehler, Alexander and Jussen, Bernhard and Geelhaar, Tim and Henlein, Alexander
and Abrami, Giuseppe and Baumartz, Daniel and Uslu, Tolga and Hemati, Wahed},
title = {{The Frankfurt Latin Lexicon. From Morphological Expansion and
Word Embeddings to SemioGraphs}},
journal = {Studi e Saggi Linguistici},
doi = {10.4454/ssl.v58i1.276},
year = {2020},
volume = {58},
number = {1},
pages = {121--155},
abstract = {In this article we present the Frankfurt Latin Lexicon (FLL),
a lexical resource for Medieval Latin that is used both for the
lemmatization of Latin texts and for the post-editing of lemmatizations.
We describe recent advances in the development of lemmatizers
and test them against the Capitularies corpus (comprising Frankish
royal edicts, mid-6th to mid-9th century), a corpus created as
a reference for processing Medieval Latin. We also consider the
post-correction of lemmatizations using a limited crowdsourcing
process aimed at continuous review and updating of the FLL. Starting
from the texts resulting from this lemmatization process, we describe
the extension of the FLL by means of word embeddings, whose interactive
traversing by means of SemioGraphs completes the digital enhanced
hermeneutic circle. In this way, the article argues for a more
comprehensive understanding of lemmatization, encompassing classical
machine learning as well as intellectual post-corrections and,
in particular, human computation in the form of interpretation
processes based on graph representations of the underlying lexical
resources.},
url = {https://www.studiesaggilinguistici.it/index.php/ssl/article/view/276},
pdf = {https://www.studiesaggilinguistici.it/index.php/ssl/article/download/276/219}
}
BibTeX
@inproceedings{Henlein:et:al:2020,
author = {Henlein, Alexander and Abrami, Giuseppe and Kett, Attila and Mehler, Alexander},
title = {Transfer of ISOSpace into a 3D Environment for Annotations and Applications},
booktitle = {Proceedings of the 16th Joint ACL - ISO Workshop on Interoperable
Semantic Annotation},
month = {May},
year = {2020},
address = {Marseille},
publisher = {European Language Resources Association},
pages = {32--35},
abstract = {People's visual perception is very pronounced and therefore it
is usually no problem for them to describe the space around them
in words. Conversely, people also have no problems imagining a
concept of a described space. In recent years many efforts have
been made to develop a linguistic concept for spatial and spatial-temporal
relations. However, the systems have not really caught on so far,
which in our opinion is due to the complex models on which they
are based and the lack of available training data and automated
taggers. In this paper we describe a project to support spatial
annotation, which could facilitate annotation by its many functions,
but also enrich it with many more information. This is to be achieved
by an extension by means of a VR environment, with which spatial
relations can be better visualized and connected with real objects.
And we want to use the available data to develop a new state-of-the-art
tagger and thus lay the foundation for future systems such as
improved text understanding for Text2Scene.},
url = {https://www.aclweb.org/anthology/2020.isa-1.4},
pdf = {http://www.lrec-conf.org/proceedings/lrec2020/workshops/ISA16/pdf/2020.isa-1.4.pdf}
}
BibTeX
@inproceedings{Kuehn:Abrami:Mehler:2020,
author = {Vincent K{\"{u}}hn and Giuseppe Abrami and Alexander Mehler},
editor = {Jessie Y. C. Chen and Gino Fragomeni},
title = {WikNectVR: {A} Gesture-Based Approach for Interacting in Virtual
Reality Based on WikNect and Gestural Writing},
booktitle = {Virtual, Augmented and Mixed Reality. Design and Interaction -
12th International Conference, {VAMR} 2020, Held as Part of the
22nd {HCI} International Conference, {HCII} 2020, Copenhagen,
Denmark, July 19-24, 2020, Proceedings, Part {I}},
series = {Lecture Notes in Computer Science},
volume = {12190},
pages = {299--312},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-49695-1_20},
doi = {10.1007/978-3-030-49695-1_20},
timestamp = {Tue, 14 Jul 2020 10:55:57 +0200},
biburl = {https://dblp.org/rec/conf/hci/KuhnAM20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
BibTeX
@inbook{Abrami:et:al:2020,
author = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian
and Kett, Attila and L{\"o}{\"o}ck, Simon and Schwarz, Lukas},
editor = {Daniela, Linda},
title = {Educational Technologies in the area of ubiquitous historical
computing in virtual reality},
booktitle = {New Perspectives on Virtual and Augmented Reality: Finding New
Ways to Teach in a Transformed Learning Environment},
year = {2020},
publisher = {Taylor \& Francis},
abstract = {At ever shorter intervals, new technologies are being developed
that are opening up more and more areas of application. This regards,
for example, Virtual Reality (VR) and Augmented Reality (AR) devices.
In addition to the private sector, the public and education sectors,
which already make intensive use of these devices, benefit from
these technologies. However, especially in the field of historical
education, there are not many frameworks for generating immersive
virtual environments that can be used flexibly enough. This chapter
addresses this gap by means of VAnnotatoR. VAnnotatoR is a versatile
framework for the creation and use of virtual environments that
serve to model historical processes in historical education. The
paper describes the building blocks of VAnnotatoR and describes
applications in historical education.},
isbn = {978-0-367-43211-9},
url = {https://www.routledge.com/New-Perspectives-on-Virtual-and-Augmented-Reality-Finding-New-Ways-to-Teach/Daniela/p/book/9780367432119}
}
2019
BibTeX
@inproceedings{Mehler:Abrami:2019,
author = {Mehler, Alexander and Abrami, Giuseppe},
title = {{VAnnotatoR}: A framework for the multimodal reconstruction of
historical situations and spaces},
booktitle = {Proceedings of the Time Machine Conference},
year = {2019},
date = {October 10-11},
address = {Dresden, Germany},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/09/TimeMachineConference.pdf}
}
BibTeX
@inproceedings{Abrami:et:al:2019,
author = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias
and Helfrich, Philipp},
title = {{TextAnnotator}: A flexible framework for semantic annotations},
booktitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable
Semantic Annotation, (ISA-15)},
series = {ISA-15},
location = {Gothenburg, Sweden},
month = {May},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf},
year = {2019},
keywords = {textannotator},
abstract = {Modern annotation tools should meet at least the following general
requirements: they can handle diverse data and annotation levels
within one tool, and they support the annotation process with
automatic (pre-)processing outcomes as much as possible. We developed
a framework that meets these general requirements and that enables
versatile and browser-based annotations of texts, the TextAnnotator.
It combines NLP methods of pre-processing with methods of flexible
post-processing. Infact, machine learning (ML) requires a lot
of training and test data, but is usually far from achieving perfect
results. Producing high-level annotations for ML and post-correcting
its results are therefore necessary. This is the purpose of TextAnnotator,
which is entirely implemented in ExtJS and provides a range of
interactive visualizations of annotations. In addition, it allows
for flexibly integrating knowledge resources, e.g. in the course
of post-processing named entity recognition. The paper describes
TextAnnotator’s architecture together with three use cases: annotating
temporal structures, argument structures and named entity linking.}
}
BibTeX
@inproceedings{Abrami:Mehler:Spiekermann:2019,
author = {Abrami, Giuseppe and Mehler, Alexander and Spiekermann, Christian},
title = {{Graph-based Format for Modeling Multimodal Annotations in Virtual
Reality by Means of VAnnotatoR}},
booktitle = {Proceedings of the 21th International Conference on Human-Computer
Interaction, HCII 2019},
series = {HCII 2019},
location = {Orlando, Florida, USA},
editor = {Stephanidis, Constantine and Antona, Margherita},
month = {July},
publisher = {Springer International Publishing},
address = {Cham},
pages = {351--358},
abstract = {Projects in the field of Natural Language Processing (NLP), the
Digital Humanities (DH) and related disciplines dealing with machine
learning of complex relationships between data objects need annotations
to obtain sufficiently rich training and test sets. The visualization
of such data sets and their underlying Human Computer Interaction
(HCI) are perennial problems of computer science. However, despite
some success stories, the clarity of information presentation
and the flexibility of the annotation process may decrease with
the complexity of the underlying data objects and their relationships.
In order to face this problem, the so-called VAnnotatoR was developed,
as a flexible annotation tool using 3D glasses and augmented reality
devices, which enables annotation and visualization in three-dimensional
virtual environments. In addition, multimodal objects are annotated
and visualized within a graph-based approach.},
isbn = {978-3-030-30712-7},
pdf = {https://link.springer.com/content/pdf/10.1007\%2F978-3-030-30712-7_44.pdf},
year = {2019}
}
BibTeX
@inproceedings{Abrami:Spiekermann:Mehler:2019,
author = {Abrami, Giuseppe and Spiekermann, Christian and Mehler, Alexander},
title = {{VAnnotatoR: Ein Werkzeug zur Annotation multimodaler Netzwerke
in dreidimensionalen virtuellen Umgebungen}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Preprint_VAnnotatoR_DHd2019.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHDVAnnotatoRPoster.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Abrami:2019,
author = {Hemati, Wahed and Mehler, Alexander and Uslu, Tolga and Abrami, Giuseppe},
title = {{Der TextImager als Front- und Backend für das verteilte NLP von
Big Digital Humanities Data}},
booktitle = {Proceedings of the 6th Digital Humanities Conference in the German-speaking
Countries, DHd 2019},
series = {DHd 2019},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/Der-TextImager-als-Fron-und-Backend.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/DHD19_TextImager.pdf},
location = {Frankfurt, Germany},
year = {2019}
}
2018
BibTeX
@inproceedings{Abrami:et:al:2018,
author = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb},
title = {{TextAnnotator}: A Browser-based Framework for Annotating Textual
Data in Digital Humanities},
booktitle = {Proceedings of the Digital Humanities Austria 2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TA__A_Browser_based_Framework_for_Annotating_Textual_Data_in_Digital_Humanities.pdf},
location = {Salzburg, Austria},
year = {2018}
}
BibTeX
@inproceedings{Weiland:et:al:2018,
author = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt
and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt
and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
title = {{BioFID}, a platform to enhance accessibility of biodiversity data},
booktitle = {Proceedings of the 10th International Conference on Ecological Informatics},
year = {2018},
url = {https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail},
location = {Jena, Germany}
}
BibTeX
@inproceedings{Kett:et:al:2018,
author = {Attila Kett and Giuseppe Abrami and Alexander Mehler and Christian Spiekermann},
title = {{Resources2City Explorer}: A System for Generating Interactive
Walkable Virtual Cities out of File Systems},
booktitle = {Proceedings of the 31st ACM User Interface Software and Technology Symposium},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/UIST2018Final.pdf},
location = {Berlin, Germany},
abstract = {We present Resources2City Explorer (R2CE), a tool for representing
file systems as interactive, walkable virtual cities. R2CE visualizes
file systems based on concepts of spatial, 3D information processing.
For this purpose, it extends the range of functions of conventional
file browsers considerably. Visual elements in a city generated
by R2CE represent (relations of) objects of the underlying file
system. The paper describes the functional spectrum of R2CE and
illustrates it by visualizing a sample of 940 files.},
year = {2018}
}
BibTeX
@article{Driller:et:al:2018,
author = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland
and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed
and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler
and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster
and Gerwin Kasperek and Angela Hausinger},
title = {Workflow and Current Achievements of BIOfid, an Information Service
Mobilizing Biodiversity Data from Literature Sources},
volume = {2},
number = {},
year = {2018},
doi = {10.3897/biss.2.25876},
publisher = {Pensoft Publishers},
abstract = {BIOfid is a specialized information service currently being developed
to mobilize biodiversity data dormant in printed historical and
modern literature and to offer a platform for open access journals
on the science of biodiversity. Our team of librarians, computer
scientists and biologists produce high-quality text digitizations,
develop new text-mining tools and generate detailed ontologies
enabling semantic text analysis and semantic search by means of
user-specific queries. In a pilot project we focus on German publications
on the distribution and ecology of vascular plants, birds, moths
and butterflies extending back to the Linnaeus period about 250
years ago. The three organism groups have been selected according
to current demands of the relevant research community in Germany.
The text corpus defined for this purpose comprises over 400 volumes
with more than 100,000 pages to be digitized and will be complemented
by journals from other digitization projects, copyright-free and
project-related literature. With TextImager (Natural Language
Processing & Text Visualization) and TextAnnotator (Discourse
Semantic Annotation) we have already extended and launched tools
that focus on the text-analytical section of our project. Furthermore,
taxonomic and anatomical ontologies elaborated by us for the taxa
prioritized by the project’s target group - German institutions
and scientists active in biodiversity research - are constantly
improved and expanded to maximize scientific data output. Our
poster describes the general workflow of our project ranging from
literature acquisition via software development, to data availability
on the BIOfid web portal (http://biofid.de/), and the implementation
into existing platforms which serve to promote global accessibility
of biodiversity data.},
issn = {},
pages = {e25876},
url = {https://doi.org/10.3897/biss.2.25876},
eprint = {https://doi.org/10.3897/biss.2.25876},
journal = {Biodiversity Information Science and Standards},
keywords = {biofid}
}
BibTeX
@inproceedings{Mehler:Abrami:Spiekermann:Jostock:2018,
author = {Mehler, Alexander and Abrami, Giuseppe and Spiekermann, Christian
and Jostock, Matthias},
title = {{VAnnotatoR}: {A} Framework for Generating Multimodal Hypertexts},
booktitle = {Proceedings of the 29th ACM Conference on Hypertext and Social Media},
series = {Proceedings of the 29th ACM Conference on Hypertext and Social Media (HT '18)},
year = {2018},
location = {Baltimore, Maryland},
publisher = {ACM},
address = {New York, NY, USA},
pdf = {http://delivery.acm.org/10.1145/3210000/3209572/p150-mehler.pdf}
}
BibTeX
@inproceedings{Hemati:Mehler:Uslu:Baumartz:Abrami:2018,
author = {Wahed Hemati and Alexander Mehler and Tolga Uslu and Daniel Baumartz
and Giuseppe Abrami},
title = {Evaluating and Integrating Databases in the Area of {NLP}},
booktitle = {International Quantitative Linguistics Conference (QUALICO 2018)},
year = {2018},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/04/Hemat-Mehler-Uslu-Baumartz-Abrami-Qualico-2018.pdf},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2018/10/qualico2018_databases_poster_hemati_mehler_uslu_baumartz_abrami.pdf},
location = {Wroclaw, Poland}
}
BibTeX
@inproceedings{Abrami:Boden:Gleiss:2018,
author = {Abrami, Giuseppe and Boden, Gertrud and Glei\ss{}, Lisa},
title = {{World of the Khwe Bushmen: Accessing Khwe Cultural Heritage data
by means of a digital ontology based on OWLnotator}},
booktitle = {Proceedings of the Digital Humanities 2018},
series = {DH2018},
location = {Mexico City, Mexico},
year = {2018}
}
BibTeX
@inproceedings{Spiekerman:Abrami:Mehler:2018,
author = {Christian Spiekermann and Giuseppe Abrami and Alexander Mehler},
title = {{VAnnotatoR}: a Gesture-driven Annotation Framework for Linguistic
and Multimodal Annotation},
booktitle = {Proceedings of the Annotation, Recognition and Evaluation of Actions
(AREA 2018) Workshop},
series = {AREA},
location = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/VAnnotatoR.pdf},
year = {2018}
}
BibTeX
@inproceedings{Helfrich:et:al:2018,
author = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking
and Alexander Mehler},
title = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations},
booktitle = {Proceedings of the 11th edition of the Language Resources and
Evaluation Conference, May 7 - 12},
series = {LREC 2018},
address = {Miyazaki, Japan},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf},
year = {2018}
}
BibTeX
@inproceedings{Abrami:Mehler:2018,
address = {Miyazaki, Japan},
author = {Abrami, Giuseppe and Mehler, Alexander},
booktitle = {Proceedings of the Eleventh International Conference on Language
Resources and Evaluation ({LREC} 2018)},
editor = {Calzolari, Nicoletta and Choukri, Khalid and Cieri, Christopher
and Declerck, Thierry and Goggi, Sara and Hasida, Koiti and Isahara, Hitoshi
and Maegaard, Bente and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion
and Odijk, Jan and Piperidis, Stelios and Tokunaga, Takenobu},
month = {may},
series = {LREC 2018},
keywords = {UIMA},
pdf = {https://aclanthology.org/L18-1212.pdf},
publisher = {European Language Resources Association (ELRA)},
title = {A {UIMA} Database Interface for Managing {NLP}-related Text Annotations},
url = {https://aclanthology.org/L18-1212},
year = {2018}
}
BibTeX
@misc{Abrami:et:al:2018b,
author = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed
and Mehler, Alexander and Uslu Tolga},
title = {{Natural Language Processing and Text Mining for BIOfid}},
howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
adress = {Goethe-University, Frankfurt am Main, Germany},
year = {2018},
month = {March},
day = {08},
pdf = {}
}
2017
BibTeX
@inproceedings{Mehler:et:al:2017:a,
author = {Alexander Mehler and Giuseppe Abrami and Steffen Bruendel and Lisa Felder
and Thomas Ostertag and Christian Spiekermann},
title = {{Stolperwege:} An App for a Digital Public History of the {Holocaust}},
booktitle = {Proceedings of the 28th ACM Conference on Hypertext and Social Media},
series = {HT '17},
pages = {319--320},
address = {New York, NY, USA},
publisher = {ACM},
abstract = {We present the Stolperwege app, a web-based framework for ubiquitous
modeling of historical processes. Starting from the art project
Stolpersteine of Gunter Demnig, it allows for virtually connecting
these stumbling blocks with information about the biographies
of victims of Nazism. According to the practice of public history,
the aim of Stolperwege is to deepen public knowledge of the Holocaust
in the context of our everyday environment. Stolperwege uses an
information model that allows for modeling social networks of
agents starting from information about portions of their life.
The paper exemplifies how Stolperwege is informationally enriched
by means of historical maps and 3D animations of (historical)
buildings.},
acmid = {3078748},
doi = {10.1145/3078714.3078748},
isbn = {978-1-4503-4708-2},
keywords = {3d, geocaching, geotagging, historical maps,
historical processes, public history of the holocaust,
ubiquitous computing},
location = {Prague, Czech Republic},
numpages = {2},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2017/07/poster_ht2017.pdf},
url = {http://doi.acm.org/10.1145/3078714.3078748},
year = {2017}
}
2015
BibTeX
@inproceedings{Abrami:Mehler:Zeunert:2015:a,
author = {Abrami, Giuseppe and Mehler, Alexander and Zeunert, Susanne},
title = {Ontologiegestütze geisteswissenschaftliche Annotationen mit dem OWLnotator},
booktitle = {Proceedings of the Jahrestagung der Digital Humanities im deutschsprachigen Raum},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/Abrami_Mehler_Zeunert_DHd_2015_abstract.pdf},
year = {2015}
}
BibTeX
@incollection{Abrami:Mehler:Pravida:2015:b,
author = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar},
title = {Fusing Text and Image Data with the Help of the OWLnotator},
booktitle = {Human Interface and the Management of Information. Information
and Knowledge Design},
publisher = {Springer International Publishing},
editor = {Yamamoto, Sakae},
volume = {9172},
series = {Lecture Notes in Computer Science},
pages = {261-272},
doi = {10.1007/978-3-319-20612-7_25},
isbn = {978-3-319-20611-0},
language = {English},
website = {http://dx.doi.org/10.1007/978-3-319-20612-7_25},
year = {2015}
}
BibTeX
@inproceedings{Abrami:Freiberg:Warner:2015,
author = {Abrami, Giuseppe and Freiberg, Michael and Warner, Paul},
title = {Managing and Annotating Historical Multimodal Corpora with the
eHumanities Desktop - An outline of the current state of the LOEWE
project Illustrations of Goethe s Faust},
booktitle = {Historical Corpora},
pages = {353 - 363},
abstract = {Text corpora are structured sets of text segments that can be
annotated or interrelated. Expanding on this, we can define a
database of images as an iconographic multimodal corpus with annotated
images and the relations between images as well as between images
and texts. The Goethe-Museum in Frankfurt holds a significant
collection of art work and texts relating to Goethe’s Faust from
the early 19th century until the present. In this project we create
a database containing digitized items from this collection, and
extend a tool, the ImageDB in the eHumanities Desktop, to annotate
and provide relations between resources. This article gives an
overview of the project and provides some technical details. Furthermore
we show newly implemented features, explain the challenge of creating
an ontology on multimodal corpora and give a forecast for future
work.},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/AbramiFreibergWarner_HC_2012.pdf},
website = {http://www.narr-shop.de/historical-corpora.html},
year = {2015}
}
2014
BibTeX
@article{Abrami:Mehler:Pravida:Zeunert:2014,
author = {Abrami, Giuseppe and Mehler, Alexander and Pravida, Dietmar and Zeunert, Susanne},
title = {Rubrik: Neues aus dem Netz},
journal = {Kunstchronik},
volume = {12},
pages = {623},
address = {München},
month = {12},
publisher = {Zentralinstitut für Kunstgeschichte},
website = {http://www.zikg.eu/publikationen/laufende-publikationen/kunstchronik},
year = {2014}
}
BibTeX
@article{Mehler:Luecking:Abrami:2014,
author = {Mehler, Alexander and Lücking, Andy and Abrami, Giuseppe},
title = {{WikiNect}: Image Schemata as a Basis of Gestural Writing for
Kinetic Museum Wikis},
journal = {Universal Access in the Information Society},
pages = {1-17},
abstract = {This paper provides a theoretical assessment of gestures in the
context of authoring image-related hypertexts by example of the
museum information system WikiNect. To this end, a first implementation
of gestural writing based on image schemata is provided (Lakoff
in Women, fire, and dangerous things: what categories reveal about
the mind. University of Chicago Press, Chicago, 1987). Gestural
writing is defined as a sort of coding in which propositions are
only expressed by means of gestures. In this respect, it is shown
that image schemata allow for bridging between natural language
predicates and gestural manifestations. Further, it is demonstrated
that gestural writing primarily focuses on the perceptual level
of image descriptions (Hollink et al. in Int J Hum Comput Stud
61(5):601–626, 2004). By exploring the metaphorical potential
of image schemata, it is finally illustrated how to extend the
expressiveness of gestural writing in order to reach the conceptual
level of image descriptions. In this context, the paper paves
the way for implementing museum information systems like WikiNect
as systems of kinetic hypertext authoring based on full-fledged
gestural writing.},
doi = {10.1007/s10209-014-0386-8},
issn = {1615-5289},
keywords = {wikinect},
pdf = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/art_10.1007_s10209-014-0386-8.pdf},
website = {http://dx.doi.org/10.1007/s10209-014-0386-8},
year = {2014}
}
2013
BibTeX
@misc{Mehler:Luecking:vor:der:Brueck:2013:a,
author = {Mehler, Alexander and Lücking, Andy and vor der Brück, Tim and Abrami, Giuseppe},
title = {WikiNect - A Kinetic Artwork Wiki for Exhibition Visitors},
howpublished = {Poster Presentation at the Scientific Computing and
Cultural Heritage 2013 Conference, Heidelberg},
keywords = {wikinect},
month = {11},
poster = {https://www.texttechnologylab.org/wp-content/uploads/2015/08/SCCHPoster2013.pdf},
url = {http://scch2013.wordpress.com/},
year = {2013}
}
2011
BibTeX
@inproceedings{Doebenhenisch:Abrami:Pfaff:Struwe:2011,
author = {Doeben-Henisch, Gerd and Abrami, Giuseppe and Pfaff, Marcus and Struwe, Marvin},
title = {Conscious learning semiotics systems to assist human persons (CLS2H)},
booktitle = {AFRICON, 2011},
volume = {},
number = {},
pages = {1 -7},
abstract = {Challenged by the growing societal demand for Ambient Assistive
Living (AAL) technologies, we are dedicated to develop intelligent
technical devices which are able to communicate with human persons
in a truly human-like manner. The core of the project is a simulation
environment which enables the development of conscious learning
semiotic agents which will be able to assist human persons in
their daily life. We are reporting first results and future perspectives.},
doi = {10.1109/AFRCON.2011.6072043},
issn = {2153-0025},
keywords = {ambient assistive living;conscious learning semiotic
agents;conscious learning semiotics systems;human
persons;intelligent technical devices;simulation
environment;learning (artificial
intelligence);multi-agent systems;},
month = {sept.},
pdf = {http://www.doeben-henisch.de/gdhnp/csg/africon2011.pdf},
website = {http://www.researchgate.net/publication/261451874_Conscious_Learning_Semiotics_Systems_to_Assist_Human_Persons_(CLS(2)H)},
year = {2011}
}