@inproceedings{f9fe5431202b48f1872e7450cfed0d6a,
title = "Using Twitter to Collect a Multi-DiaCorpus of Arabic",
abstract = "This paper describes the collection and classification of a multi-dialectal corpus of Arabic based on the geographical information of tweets. We mapped information of user locations to one of the Arab countries, and extracted tweets that have dialectal word(s). Manual evaluation of the extracted corpus shows that the accuracy of assignment of tweets to some countries (like Saudi Arabia and Egypt) is above 93\% while the accuracy for other countries, such Algeria and Syria is below 70\%.",
author = "Hamdy Mubarak and Kareem Darwish",
note = "Publisher Copyright: {\textcopyright}2014 Association for Computational Linguistics; EMNLP 2014 Workshop on Arabic Natural Language Processing, ANLP 2014 ; Conference date: 25-10-2014",
year = "2014",
language = "English",
series = "ANLP 2014 - EMNLP 2014 Workshop on Arabic Natural Language Processing, Proceedings",
publisher = "Association for Computational Linguistics (ACL)",
pages = "1--7",
editor = "Nizar Habash and Stephan Vogel",
booktitle = "ANLP 2014 - EMNLP 2014 Workshop on Arabic Natural Language Processing, Proceedings",
address = "United States",
}