The following publications were accepted at the International Joint Conference on Natural Language Processing & Asia-Pacific Chapter of the Association for Computational Linguistics (IJCNLP-AACL):
December, 2025.
Standardizing Heterogeneous Corpora with DUUR: A Dual Data-
and Process-Oriented Approach to Enhancing NLP Pipeline Integration. Proceedings of the 14th International Joint Conference on Natural
Language Processing and the 4th Conference of the Asia-Pacific
Chapter of the Association for Computational Linguistics, 1410–1425.
BibTeX
@inproceedings{Hammerla:et:al:2025a,
author = {Hammerla, Leon and Mehler, Alexander and Abrami, Giuseppe},
title = {Standardizing Heterogeneous Corpora with {DUUR}: A Dual Data-
and Process-Oriented Approach to Enhancing NLP Pipeline Integration},
editor = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
and Singh, Dhirendra Pratap},
booktitle = {Proceedings of the 14th International Joint Conference on Natural
Language Processing and the 4th Conference of the Asia-Pacific
Chapter of the Association for Computational Linguistics},
month = {dec},
year = {2025},
address = {Mumbai, India},
publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
url = {https://aclanthology.org/2025.findings-ijcnlp.87/},
pages = {1410--1425},
isbn = {979-8-89176-303-6},
abstract = {Despite their success, LLMs are too computationally expensive
to replace task- or domain-specific NLP systems. However, the
variety of corpus formats makes reusing these systems difficult.
This underscores the importance of maintaining an interoperable
NLP landscape. We address this challenge by pursuing two objectives:
standardizing corpus formats and enabling massively parallel corpus
processing. We present a unified conversion framework embedded
in a massively parallel, microservice-based, programming language-independent
NLP architecture designed for modularity and extensibility. It
allows for the integration of external NLP conversion tools and
supports the addition of new components that meet basic compatibility
requirements. To evaluate our dual data- and process-oriented
approach to standardization, we (1) benchmark its efficiency in
terms of processing speed and memory usage, (2) demonstrate the
benefits of standardized corpus formats for NLP downstream tasks,
and (3) illustrate the advantages of incorporating custom formats
into a corpus format ecosystem.},
keywords = {neglab,duui}
}
December, 2025.
D-Neg: Syntax-Aware Graph Reasoning for Negation Detection. Proceedings of the 14th International Joint Conference on Natural
Language Processing and the 4th Conference of the Asia-Pacific
Chapter of the Association for Computational Linguistics, 1432–1454.
BibTeX
@inproceedings{Hammerla:et:al:2025b,
author = {Hammerla, Leon and Lücking, Andy and Reinert, Carolin and Mehler, Alexander},
title = {{D}-Neg: Syntax-Aware Graph Reasoning for Negation Detection},
editor = {Inui, Kentaro and Sakti, Sakriani and Wang, Haofen and Wong, Derek F.
and Bhattacharyya, Pushpak and Banerjee, Biplab and Ekbal, Asif and Chakraborty, Tanmoy
and Singh, Dhirendra Pratap},
booktitle = {Proceedings of the 14th International Joint Conference on Natural
Language Processing and the 4th Conference of the Asia-Pacific
Chapter of the Association for Computational Linguistics},
month = {dec},
year = {2025},
address = {Mumbai, India},
publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics},
url = {https://aclanthology.org/2025.findings-ijcnlp.89/},
pages = {1432--1454},
isbn = {979-8-89176-303-6},
abstract = {Despite the communicative importance of negation, its detection
remains challenging. Previous approaches perform poorly in out-of-domain
scenarios, and progress outside of English has been slow due to
a lack of resources and robust models. To address this gap, we
present D-Neg: a syntax-aware graph reasoning model based on a
transformer that incorporates syntactic embeddings by attention-gating.
D-Neg uses graph attention to represent syntactic structures,
emulating the effectiveness of rule-based dependency approaches
for negation detection. We train D-Neg using 7 English resources
and their translations into 10 languages, all aligned at the annotation
level. We conduct an evaluation of all these datasets in in-domain
and out-of-domain settings. Our work represents a significant
advance in negation detection, enabling more effective cross-lingual
research.},
keywords = {neglab}
}
