@inproceedings{3b2a72afd1ff47b49f77f8dc4ba2c78b,
title = "Build fast and accurate lemmatization for Arabic",
abstract = "In this paper we describe the complexity of building a lemmatizer for Arabic which has a rich and complex morphology, and show some differences between lemmatization and surface stemming, i.e. removing prefixes and suffixes from words. We discuss the need for a fast and accurate lammatization to enhance Arabic Information Retrieval results. We also introduce a new dataset that can be used to test lemmatization accuracy, and an efficient lemmatization algorithm that outperforms state-of-the-art Arabic lemmatization in terms of accuracy and speed. We share the dataset and the code for research purposes.",
keywords = "Arabic NLP, Diactitization, Information Retrieval, Lemmatization, Stemming",
author = "Hamdy Mubarak",
note = "Publisher Copyright: {\textcopyright} LREC 2018 - 11th International Conference on Language Resources and Evaluation. All rights reserved.; 11th International Conference on Language Resources and Evaluation, LREC 2018 ; Conference date: 07-05-2018 Through 12-05-2018",
year = "2018",
language = "English",
series = "LREC 2018 - 11th International Conference on Language Resources and Evaluation",
publisher = "European Language Resources Association (ELRA)",
pages = "1128--1132",
editor = "Nicoletta Calzolari and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Helene Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga",
booktitle = "LREC 2018 - 11th International Conference on Language Resources and Evaluation",
}