
Total: 7
2021 (1)
-
G. Abrami, A. Henlein, A. Lücking, A. Kett, P. Adeberg, and A. Mehler, “Unleashing annotations with TextAnnotator: Multimedia, multi-perspective document views for ubiquitous annotation,” in Proceedings of the Seventeenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation (ISA-17), 2021.
[BibTeX]@InProceedings{Abrami:et:al:2021, Author = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila and Adeberg, Pascal and Mehler, Alexander}, Title = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective document views for ubiquitous annotation}, BookTitle = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-17)}, Series = {ISA-17}, location = {Groningen, Netherlands}, month = {June}, year = {2021}, pdf = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf} }
2020 (2)
-
G. Abrami, A. Mehler, and M. Stoeckel, “TextAnnotator: A web-based annotation suite for texts,” in Proceedings of the Digital Humanities 2020, 2020.
[Abstract] [Poster][BibTeX]The TextAnnotator is a tool for simultaneous and collaborative annotation of texts with visual annotation support, integration of knowledge bases and, by pipelining the TextImager, a rich variety of pre-processing and automatic annotation tools. It includes a variety of modules for the annotation of texts, which contains the annotation of argumentative, rhetorical, propositional and temporal structures as well as a module for named entity linking and rapid annotation of named entities. Especially the modules for annotation of temporal, argumentative and propositional structures are currently unique in web-based annotation tools. The TextAnnotator, which allows the annotation of texts as a platform, is divided into a front- and a backend component. The backend is a web service based on WebSockets, which integrates the UIMA Database Interface to manage and use texts. Texts are made accessible by using the ResourceManager and the AuthorityManager, based on user and group access permissions. Different views of a document can be created and used depending on the scenario. Once a document has been opened, access is gained to the annotations stored within annotation views in which these are organized. Any annotation view can be assigned with access permissions and by default, each user obtains his or her own user view for every annotated document. In addition, with sufficient access permissions, all annotation views can also be used and curated. This allows the possibility to calculate an Inter-Annotator-Agreement for a document, which shows an agreement between the annotators. Annotators without sufficient rights cannot display this value so that the annotators do not influence each other. This contribution is intended to reflect the current state of development of TextAnnotator, demonstrate the possibilities of an instantaneous Inter-Annotator-Agreement and trigger a discussion about further functions for the community.
@InProceedings{Abrami:Mehler:Stoeckel:2020, author = {Abrami, Giuseppe and Mehler, Alexander and Stoeckel, Manuel}, title = {{TextAnnotator}: A web-based annotation suite for texts}, booktitle = {Proceedings of the Digital Humanities 2020}, series = {DH 2020}, location = {Ottawa, Canada}, year = {2020}, url = {https://dh2020.adho.org/wp-content/uploads/2020/07/547_TextAnnotatorAwebbasedannotationsuitefortexts.html}, doi = {http://dx.doi.org/10.17613/tenm-4907}, abstract = {The TextAnnotator is a tool for simultaneous and collaborative annotation of texts with visual annotation support, integration of knowledge bases and, by pipelining the TextImager, a rich variety of pre-processing and automatic annotation tools. It includes a variety of modules for the annotation of texts, which contains the annotation of argumentative, rhetorical, propositional and temporal structures as well as a module for named entity linking and rapid annotation of named entities. Especially the modules for annotation of temporal, argumentative and propositional structures are currently unique in web-based annotation tools. The TextAnnotator, which allows the annotation of texts as a platform, is divided into a front- and a backend component. The backend is a web service based on WebSockets, which integrates the UIMA Database Interface to manage and use texts. Texts are made accessible by using the ResourceManager and the AuthorityManager, based on user and group access permissions. Different views of a document can be created and used depending on the scenario. Once a document has been opened, access is gained to the annotations stored within annotation views in which these are organized. Any annotation view can be assigned with access permissions and by default, each user obtains his or her own user view for every annotated document. In addition, with sufficient access permissions, all annotation views can also be used and curated. This allows the possibility to calculate an Inter-Annotator-Agreement for a document, which shows an agreement between the annotators. Annotators without sufficient rights cannot display this value so that the annotators do not influence each other. This contribution is intended to reflect the current state of development of TextAnnotator, demonstrate the possibilities of an instantaneous Inter-Annotator-Agreement and trigger a discussion about further functions for the community.}, poster = {https://hcommons.org/deposits/download/hc:31816/CONTENT/dh2020_textannotator_poster.pdf} }
-
G. Abrami, M. Stoeckel, and A. Mehler, “TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts,” in Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, 2020, pp. 891-900.
[Abstract] [BibTeX]The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.
@InProceedings{Abrami:Stoeckel:Mehler:2020, author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander}, title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts}, booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference}, month = {May}, year = {2020}, address = {Marseille, France}, publisher = {European Language Resources Association}, pages = {891--900}, ISBN = "979-10-95546-34-4", abstract = {The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.}, url = {https://www.aclweb.org/anthology/2020.lrec-1.112}, pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf} }
2019 (1)
-
G. Abrami, A. Mehler, A. Lücking, E. Rieb, and P. Helfrich, “TextAnnotator: A flexible framework for semantic annotations,” in Proceedings of the Fifteenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation, (ISA-15), 2019.
[Abstract] [BibTeX]Modern annotation tools should meet at least the following general requirements: they can handle diverse data and annotation levels within one tool, and they support the annotation process with automatic (pre-)processing outcomes as much as possible. We developed a framework that meets these general requirements and that enables versatile and browser-based annotations of texts, the TextAnnotator. It combines NLP methods of pre-processing with methods of flexible post-processing. Infact, machine learning (ML) requires a lot of training and test data, but is usually far from achieving perfect results. Producing high-level annotations for ML and post-correcting its results are therefore necessary. This is the purpose of TextAnnotator, which is entirely implemented in ExtJS and provides a range of interactive visualizations of annotations. In addition, it allows for flexibly integrating knowledge resources, e.g. in the course of post-processing named entity recognition. The paper describes TextAnnotator’s architecture together with three use cases: annotating temporal structures, argument structures and named entity linking.
@InProceedings{Abrami:et:al:2019, Author = {Abrami, Giuseppe and Mehler, Alexander and Lücking, Andy and Rieb, Elias and Helfrich, Philipp}, Title = {{TextAnnotator}: A flexible framework for semantic annotations}, BookTitle = {Proceedings of the Fifteenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation, (ISA-15)}, Series = {ISA-15}, location = {Gothenburg, Sweden}, month = {May}, pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TextAnnotator_IWCS_Göteborg.pdf}, year = 2019, abstract ="Modern annotation tools should meet at least the following general requirements: they can handle diverse data and annotation levels within one tool, and they support the annotation process with automatic (pre-)processing outcomes as much as possible. We developed a framework that meets these general requirements and that enables versatile and browser-based annotations of texts, the TextAnnotator. It combines NLP methods of pre-processing with methods of flexible post-processing. Infact, machine learning (ML) requires a lot of training and test data, but is usually far from achieving perfect results. Producing high-level annotations for ML and post-correcting its results are therefore necessary. This is the purpose of TextAnnotator, which is entirely implemented in ExtJS and provides a range of interactive visualizations of annotations. In addition, it allows for flexibly integrating knowledge resources, e.g. in the course of post-processing named entity recognition. The paper describes TextAnnotator’s architecture together with three use cases: annotating temporal structures, argument structures and named entity linking." }
2018 (3)
-
G. Abrami, A. Mehler, P. Helfrich, and E. Rieb, “TextAnnotator: A Browser-based Framework for Annotating Textual Data in Digital Humanities,” in Proceedings of the Digital Humanities Austria 2018, 2018.
[BibTeX]@InProceedings{Abrami:et:al:2018, Author = {Giuseppe Abrami and Alexander Mehler and Philipp Helfrich and Elias Rieb}, Title = {{TextAnnotator}: A Browser-based Framework for Annotating Textual Data in Digital Humanities}, BookTitle = {Proceedings of the Digital Humanities Austria 2018}, pdf = {https://www.texttechnologylab.org/wp-content/uploads/2019/04/TA__A_Browser_based_Framework_for_Annotating_Textual_Data_in_Digital_Humanities.pdf}, location = {Salzburg, Austria}, year = 2018 }
-
P. Helfrich, E. Rieb, G. Abrami, A. Lücking, and A. Mehler, “TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]@InProceedings{Helfrich:et:al:2018, Author = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking and Alexander Mehler}, Title = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations}, BookTitle = {Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12}, Series = {LREC 2018}, Address = {Miyazaki, Japan}, pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf}, year = 2018 }
-
G. Abrami and A. Mehler, “A UIMA Database Interface for Managing NLP-related Text Annotations,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]@InProceedings{Abrami:Mehler:2018, Author = {Giuseppe Abrami and Alexander Mehler}, Title = {A UIMA Database Interface for Managing NLP-related Text Annotations}, BookTitle = {Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12}, Series = {LREC 2018}, Address = {Miyazaki, Japan}, pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/UIMA-DI.pdf}, year = 2018 }
Total: 4
2021 (1)
-
G. Abrami, A. Henlein, A. Lücking, A. Kett, P. Adeberg, and A. Mehler, “Unleashing annotations with TextAnnotator: Multimedia, multi-perspective document views for ubiquitous annotation,” in Proceedings of the Seventeenth Joint ACL – ISO Workshop on Interoperable Semantic Annotation (ISA-17), 2021.
[BibTeX]@InProceedings{Abrami:et:al:2021, Author = {Abrami, Giuseppe and Henlein, Alexander and Lücking, Andy and Kett, Attila and Adeberg, Pascal and Mehler, Alexander}, Title = {Unleashing annotations with {TextAnnotator}: Multimedia, multi-perspective document views for ubiquitous annotation}, BookTitle = {Proceedings of the Seventeenth Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-17)}, Series = {ISA-17}, location = {Groningen, Netherlands}, month = {June}, year = {2021}, pdf = {https://iwcs2021.github.io/proceedings/isa/pdf/2021.isa-1.7.pdf} }
2020 (2)
-
C. Driller, M. Koch, G. Abrami, W. Hemati, A. Lücking, A. Mehler, A. Pachzelt, and G. Kasperek, “Fast and Easy Access to Central European Biodiversity Data with BIOfid,” Biodiversity Information Science and Standards, vol. 4, p. e59157, 2020.
[Abstract] [BibTeX]The storage of data in public repositories such as the Global Biodiversity Information Facility (GBIF) or the National Center for Biotechnology Information (NCBI) is nowadays stipulated in the policies of many publishers in order to facilitate data replication or proliferation. Species occurrence records contained in legacy printed literature are no exception to this. The extent of their digital and machine-readable availability, however, is still far from matching the existing data volume (Thessen and Parr 2014). But precisely these data are becoming more and more relevant to the investigation of ongoing loss of biodiversity. In order to extract species occurrence records at a larger scale from available publications, one has to apply specialised text mining tools. However, such tools are in short supply especially for scientific literature in the German language.The Specialised Information Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims at reducing this desideratum, inter alia, by preparing a searchable text corpus semantically enriched by a new kind of multi-label annotation. For this purpose, we feed manual annotations into automatic, machine-learning annotators. This mixture of automatic and manual methods is needed, because BIOfid approaches a new application area with respect to language (mainly German of the 19th century), text type (biological reports), and linguistic focus (technical and everyday language).We will present current results of the performance of BIOfid’s semantic search engine and the application of independent natural language processing (NLP) tools. Most of these are freely available online, such as TextImager (Hemati et al. 2016). We will show how TextImager is tied into the BIOfid pipeline and how it is made scalable (e.g. extendible by further modules) and usable on different systems (docker containers).Further, we will provide a short introduction to generating machine-learning training data using TextAnnotator (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility can be assessed by the implementation of inter-annotator agreement methods (Abrami et al. 2020). Beyond taxon recognition and entity linking, we place particular emphasis on location and time information. For this purpose, our annotation tag-set combines general categories and biology-specific categories (including taxonomic names) with location and time ontologies. The application of the annotation categories is regimented by annotation guidelines (Lücking et al. 2020). Within the next years, our work deliverable will be a semantically accessible and data-extractable text corpus of around two million pages. In this way, BIOfid is creating a new valuable resource that expands our knowledge of biodiversity and its determinants.
@article{Driller:et:al:2020, author = {Christine Driller and Markus Koch and Giuseppe Abrami and Wahed Hemati and Andy Lücking and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek}, title = {Fast and Easy Access to Central European Biodiversity Data with BIOfid}, volume = {4}, number = {}, year = {2020}, doi = {10.3897/biss.4.59157}, publisher = {Pensoft Publishers}, abstract = {The storage of data in public repositories such as the Global Biodiversity Information Facility (GBIF) or the National Center for Biotechnology Information (NCBI) is nowadays stipulated in the policies of many publishers in order to facilitate data replication or proliferation. Species occurrence records contained in legacy printed literature are no exception to this. The extent of their digital and machine-readable availability, however, is still far from matching the existing data volume (Thessen and Parr 2014). But precisely these data are becoming more and more relevant to the investigation of ongoing loss of biodiversity. In order to extract species occurrence records at a larger scale from available publications, one has to apply specialised text mining tools. However, such tools are in short supply especially for scientific literature in the German language.The Specialised Information Service Biodiversity Research*1 BIOfid (Koch et al. 2017) aims at reducing this desideratum, inter alia, by preparing a searchable text corpus semantically enriched by a new kind of multi-label annotation. For this purpose, we feed manual annotations into automatic, machine-learning annotators. This mixture of automatic and manual methods is needed, because BIOfid approaches a new application area with respect to language (mainly German of the 19th century), text type (biological reports), and linguistic focus (technical and everyday language).We will present current results of the performance of BIOfid’s semantic search engine and the application of independent natural language processing (NLP) tools. Most of these are freely available online, such as TextImager (Hemati et al. 2016). We will show how TextImager is tied into the BIOfid pipeline and how it is made scalable (e.g. extendible by further modules) and usable on different systems (docker containers).Further, we will provide a short introduction to generating machine-learning training data using TextAnnotator (Abrami et al. 2019) for multi-label annotation. Annotation reproducibility can be assessed by the implementation of inter-annotator agreement methods (Abrami et al. 2020). Beyond taxon recognition and entity linking, we place particular emphasis on location and time information. For this purpose, our annotation tag-set combines general categories and biology-specific categories (including taxonomic names) with location and time ontologies. The application of the annotation categories is regimented by annotation guidelines (Lücking et al. 2020). Within the next years, our work deliverable will be a semantically accessible and data-extractable text corpus of around two million pages. In this way, BIOfid is creating a new valuable resource that expands our knowledge of biodiversity and its determinants.}, issn = {}, pages = {e59157}, URL = {https://doi.org/10.3897/biss.4.59157}, eprint = {https://doi.org/10.3897/biss.4.59157}, journal = {Biodiversity Information Science and Standards} }
-
G. Abrami, M. Stoeckel, and A. Mehler, “TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts,” in Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, France, 2020, pp. 891-900.
[Abstract] [BibTeX]The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.
@InProceedings{Abrami:Stoeckel:Mehler:2020, author = {Abrami, Giuseppe and Stoeckel, Manuel and Mehler, Alexander}, title = {TextAnnotator: A UIMA Based Tool for the Simultaneous and Collaborative Annotation of Texts}, booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference}, month = {May}, year = {2020}, address = {Marseille, France}, publisher = {European Language Resources Association}, pages = {891--900}, ISBN = "979-10-95546-34-4", abstract = {The annotation of texts and other material in the field of digital humanities and Natural Language Processing (NLP) is a common task of research projects. At the same time, the annotation of corpora is certainly the most time- and cost-intensive component in research projects and often requires a high level of expertise according to the research interest. However, for the annotation of texts, a wide range of tools is available, both for automatic and manual annotation. Since the automatic pre-processing methods are not error-free and there is an increasing demand for the generation of training data, also with regard to machine learning, suitable annotation tools are required. This paper defines criteria of flexibility and efficiency of complex annotations for the assessment of existing annotation tools. To extend this list of tools, the paper describes TextAnnotator, a browser-based, multi-annotation system, which has been developed to perform platform-independent multimodal annotations and annotate complex textual structures. The paper illustrates the current state of development of TextAnnotator and demonstrates its ability to evaluate annotation quality (inter-annotator agreement) at runtime. In addition, it will be shown how annotations of different users can be performed simultaneously and collaboratively on the same document from different platforms using UIMA as the basis for annotation.}, url = {https://www.aclweb.org/anthology/2020.lrec-1.112}, pdf = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.112.pdf} }
2018 (1)
-
P. Helfrich, E. Rieb, G. Abrami, A. Lücking, and A. Mehler, “TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations,” in Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 – 12, Miyazaki, Japan, 2018.
[BibTeX]@InProceedings{Helfrich:et:al:2018, Author = {Philipp Helfrich and Elias Rieb and Giuseppe Abrami and Andy L{\"u}cking and Alexander Mehler}, Title = {TreeAnnotator: Versatile Visual Annotation of Hierarchical Text Relations}, BookTitle = {Proceedings of the 11th edition of the Language Resources and Evaluation Conference, May 7 - 12}, Series = {LREC 2018}, Address = {Miyazaki, Japan}, pdf = {https://www.texttechnologylab.org/wp-content/uploads/2018/03/TreeAnnotator.pdf}, year = 2018 }