Sajawel Ahmed

Sajawel Ahmed

Staff member

 

 

ContactPublications

Total: 6

2019 (2)

  • S. Schweter and S. Ahmed, “Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection,” in Proceedings of the 15th Conference on Natural Language Processing (KONVENS), 2019.
    [BibTeX]

    @InProceedings{Schweter:Ahmed:2019,
        author = {Stefan Schweter and Sajawel Ahmed},
        title = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
        booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
        location = {Erlangen, Germany},
        year = 2019
    }
  • S. Ahmed, M. Stoeckel, C. Driller, A. Pachzelt, and Alexander Mehler, “BIOfid Dataset: Publishing a German Gold Standard for Named Entity Recognition in Historical Biodiversity Literature,” in Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), 2019. accepted
    [BibTeX]

    @InProceedings{Ahmed:Stoeckel:Driller:Pachzelt:Mehler:2019,
     author = {Sajawel Ahmed and Manuel Stoeckel and Christine Driller and Adrian Pachzelt and Alexander
    Mehler},
     title = {{BIOfid Dataset: Publishing a German Gold Standard for Named Entity Recognition in Historical
    Biodiversity Literature}},
     booktitle = {Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)},
     publisher = {Association for Computational Linguistics},
     location = {Hongkong, China},
     year = 2019,
     note = {accepted}
    }

2018 (4)

  • [PDF] S. Ahmed and A. Mehler, “Resource-Size matters: Improving Neural Named Entity Recognition with Optimized Large Corpora,” in Proceedings of the 17th IEEE International Conference on Machine Learning and Applications (ICMLA), 2018.
    [Abstract] [BibTeX]

    This study improves the performance of neural named entity recognition by a margin of up to 11% in terms of F-score on the example of a low-resource language like German, thereby outperforming existing baselines and establishing a new state-of-the-art on each single open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank 2018). Rather than designing deeper and wider hybrid neural architectures, we gather all available resources and perform a detailed optimization and grammar-dependent morphological processing consisting of lemmatization and part-of-speech tagging prior to exposing the raw data to any training process. We test our approach in a threefold monolingual experimental setup of a) single, b) joint, and c) optimized training and shed light on the dependency of downstream-tasks on the size of corpora used to compute word embeddings.
    @InProceedings{Ahmed:Mehler:2018,
    author = {Sajawel Ahmed and Alexander Mehler},
    title = {{Resource-Size matters: Improving Neural Named Entity Recognition with Optimized Large Corpora}},
    abstract = {This study improves the performance of neural named entity recognition by a margin of up to 11% in terms of F-score on the example of a low-resource language like German, thereby outperforming existing baselines and establishing a new state-of-the-art on each single open-source dataset (CoNLL 2003, GermEval 2014 and Tübingen Treebank 2018). Rather than designing deeper and wider hybrid neural architectures, we gather all available resources and perform a detailed optimization and grammar-dependent morphological processing consisting of lemmatization and part-of-speech tagging prior to exposing the raw data to any training process. We test our approach in a threefold monolingual experimental setup of a) single, b) joint, and c) optimized training and shed light on the dependency of downstream-tasks on the size of corpora used to compute word embeddings.},
    booktitle = {Proceedings of the 17th IEEE International Conference on Machine Learning and Applications (ICMLA)},
    location = {Orlando, Florida, USA},
    pdf = {https://arxiv.org/pdf/1807.10675.pdf},
    year = 2018
    }
  • [https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail] C. Weiland, C. Driller, M. Koch, M. Schmidt, G. Abrami, S. Ahmed, A. Mehler, A. Pachzelt, G. Kasperek, A. Hausinger, and T. Hörnschemeyer, “BioFID, a platform to enhance accessibility of biodiversity data,” in Proceedings of the 10th International Conference on Ecological Informatics, 2018.
    [BibTeX]

    @inproceedings{Weiland:et:al:2018,
            author = {Claus Weiland and Christine Driller and Markus Koch and Marco Schmidt and Giuseppe Abrami and Sajawel Ahmed and Alexander Mehler and Adrian Pachzelt and Gerwin Kasperek and Angela Hausinger and Thomas Hörnschemeyer},
            title = {{BioFID}, a platform to enhance accessibility of biodiversity data},
      BookTitle = {Proceedings of the 10th International Conference on Ecological Informatics},
            year = {2018},
      url={https://www.researchgate.net/profile/Marco_Schmidt3/publication/327940813_BIOfid_a_Platform_to_Enhance_Accessibility_of_Biodiversity_Data/links/5bae3e3e92851ca9ed2cd60f/BIOfid-a-Platform-to-Enhance-Accessibility-of-Biodiversity-Data.pdf?origin=publication_detail},
      location = {Jena, Germany}
    }
  • [https://doi.org/10.3897/biss.2.25876] [DOI] C. Driller, M. Koch, M. Schmidt, C. Weiland, T. Hörnschemeyer, T. Hickler, G. Abrami, S. Ahmed, R. Gleim, W. Hemati, T. Uslu, A. Mehler, A. Pachzelt, J. Rexhepi, T. Risse, J. Schuster, G. Kasperek, and A. Hausinger, “Workflow and Current Achievements of BIOfid, an Information Service Mobilizing Biodiversity Data from Literature Sources,” Biodiversity Information Science and Standards, vol. 2, p. e25876, 2018.
    [Abstract] [BibTeX]

    BIOfid is a specialized information service currently being developed to mobilize biodiversity data dormant in printed historical and modern literature and to offer a platform for open access journals on the science of biodiversity. Our team of librarians, computer scientists and biologists produce high-quality text digitizations, develop new text-mining tools and generate detailed ontologies enabling semantic text analysis and semantic search by means of user-specific queries. In a pilot project we focus on German publications on the distribution and ecology of vascular plants, birds, moths and butterflies extending back to the Linnaeus period about 250 years ago. The three organism groups have been selected according to current demands of the relevant research community in Germany. The text corpus defined for this purpose comprises over 400 volumes with more than 100,000 pages to be digitized and will be complemented by journals from other digitization projects, copyright-free and project-related literature. With TextImager (Natural Language Processing & Text Visualization) and TextAnnotator (Discourse Semantic Annotation) we have already extended and launched tools that focus on the text-analytical section of our project. Furthermore, taxonomic and anatomical ontologies elaborated by us for the taxa prioritized by the project’s target group - German institutions and scientists active in biodiversity research - are constantly improved and expanded to maximize scientific data output. Our poster describes the general workflow of our project ranging from literature acquisition via software development, to data availability on the BIOfid web portal (http://biofid.de/), and the implementation into existing platforms which serve to promote global accessibility of biodiversity data.
    @article{Driller:et:al:2018,
            author = {Christine Driller and Markus Koch and Marco Schmidt and Claus Weiland and Thomas Hörnschemeyer and Thomas Hickler and Giuseppe Abrami and Sajawel Ahmed and Rüdiger Gleim and Wahed Hemati and Tolga Uslu and Alexander Mehler and Adrian Pachzelt and Jashar Rexhepi and Thomas Risse and Janina Schuster and Gerwin Kasperek and Angela Hausinger},
            title = {Workflow and Current Achievements of BIOfid, an Information Service Mobilizing Biodiversity Data from Literature Sources},
            volume = {2},
            number = {},
            year = {2018},
            doi = {10.3897/biss.2.25876},
            publisher = {Pensoft Publishers},
            abstract = {BIOfid is a specialized information service currently being developed to mobilize biodiversity data dormant in printed historical and modern literature and to offer a platform for open access journals on the science of biodiversity. Our team of librarians, computer scientists and biologists produce high-quality text digitizations, develop new text-mining tools and generate detailed ontologies enabling semantic text analysis and semantic search by means of user-specific queries. In a pilot project we focus on German publications on the distribution and ecology of vascular plants, birds, moths and butterflies extending back to the Linnaeus period about 250 years ago. The three organism groups have been selected according to current demands of the relevant research community in Germany. The text corpus defined for this purpose comprises over 400 volumes with more than 100,000 pages to be digitized and will be complemented by journals from other digitization projects, copyright-free and project-related literature. With TextImager (Natural Language Processing & Text Visualization) and TextAnnotator (Discourse Semantic Annotation) we have already extended and launched tools that focus on the text-analytical section of our project. Furthermore, taxonomic and anatomical ontologies elaborated by us for the taxa prioritized by the project’s target group - German institutions and scientists active in biodiversity research - are constantly improved and expanded to maximize scientific data output. Our poster describes the general workflow of our project ranging from literature acquisition via software development, to data availability on the BIOfid web portal (http://biofid.de/), and the implementation into existing platforms which serve to promote global accessibility of biodiversity data.},
            issn = {},
            pages = {e25876},
            URL = {https://doi.org/10.3897/biss.2.25876},
            eprint = {https://doi.org/10.3897/biss.2.25876},
            journal = {Biodiversity Information Science and Standards}
    }
  • G. Abrami, S. Ahmed, R. Gleim, W. Hemati, A. Mehler, and U. Tolga, Natural Language Processing and Text Mining for BIOfid, 2018.
    [BibTeX]

    @misc{Abrami:et:al:2018b,
     author = {Abrami, Giuseppe and Ahmed, Sajawel and Gleim, R{\"u}diger and Hemati, Wahed and Mehler, Alexander and Uslu Tolga},
     title = {{Natural Language Processing and Text Mining for BIOfid}},
     howpublished = {Presentation at the 1st Meeting of the Scientific Advisory Board of the BIOfid Project},
     adress = {Goethe-University, Frankfurt am Main, Germany},
     year = {2018},
     month = {March},
     day = {08},
     pdf = {}
    }