@inproceedings{2714e4231ff546f38311de9473913291,
title = "ClusterLDA: Clustering-Based Topic Extraction for Online News",
abstract = "Topic extraction from large text datasets is a critical task for understanding and analyzing thematic patterns across documents. This study explores three methods for improving topic modeling: standard Latent Dirichlet Allocation (LDA), a clustering-based approach combined with LDA, and an experimental method enhanced with Wikipedia-supplemented clustering. We demonstrate that applying clustering prior to LDA significantly improves topic extraction by aligning the modeling process with the natural structure of the data, allowing for the identification of smaller, yet meaningful, topics that are often overshadowed in standard LDA due to thematic imbalances. Additionally, we applied the improved topic modeling approach to construct an entity-topic network that contextualizes entities' sentiments within the topics they were mentioned in, providing a nuanced view of the dataset. Although our hypothesis that Wikipedia-enhanced clustering would further improve topic extraction was not supported, as it introduced noise and worsened performance, ClusterLDA approach proved effective in enhancing the granularity of topic extraction and addressing the limitations of standard LDA. These findings highlight the potential of clustering to support more coherent and semantically relevant topic extraction, offering a foundation for future advancements in topic modeling and sentiment analysis.",
keywords = "Clustering, Latent Dirichlet Allocation (LDA), Text Mining, Topic Modeling",
author = "Sutan, \{Juan Kenichi\} and Jumanah Alshehri and Rafaa Aljurbua and Zoran Obradovic",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 8th International Women in Data Science Conference at Prince Sultan University, WiDS-PSU 2025 ; Conference date: 13-04-2025 Through 14-04-2025",
year = "2025",
doi = "10.1109/WiDS-PSU64963.2025.00039",
language = "English",
series = "Proceedings - 2025 8th International Women in Data Science Conference at Prince Sultan University, WiDS-PSU 2025",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "145--150",
editor = "Tanzila Saba and Amjad Rehman",
booktitle = "Proceedings - 2025 8th International Women in Data Science Conference at Prince Sultan University, WiDS-PSU 2025",
}