# cluster info users per cluster - 0: 18.5k - boycott bollywood - 1: 26.8k - Sushant Singh Rajput murder conspiracy theories - 2: 67k - love jihad and violence against women - 3: 144k - religious/ethnic conflict within in India/with Pakistan - 4: 111k - claims of hinduphobia/anti-Indian sentiment ```python # To get the new topics I generated keywords from my labeled tweets from the last narrative round, # and used those keywords to search in 'overclustered' clusters. # The keywords I used are: keywords_for_overclustering { O:['boycottbollywood', 'boycottbollywoodcompletely', 'bollywood' 'boycottpathaan', 'boycottbollywoodforever', 'boycottpathan', 'pathan', 'boycottbrahmastra', 'boycottpathaanmovie', 'boycottpathanmovie, 'gutterwood', 'bullywood', 'srk', 'boycottbollywoodmafia'], 1: ['ssr', 'justiceforsushantsinghrajput', 'ssrcase', 'nosushantnobollywood', 'bollywood', 'suicide', 'justice', 2: ['hindu', 'raped', 'minor', 'husband', 'muslim', 'murder', 'abducted', 'married', 'rape', 'convert', 'killed', 'shraddha', 'converted', 'stoplovejihad', 'lovejihad', 'death', 'kidnapped', 'victim', 'refused', 'forced', 'brahmin'], 3: ['muslim', 'hindu', 'islam', 'islamic', 'mosque', 'secularism', 'terrorists', 'pakistan', 'islamists', 'islamist', 'secular', 'quran', 'temple', 'mughals', 'jihad', 'victim', 'terrorism', 'minority', 'converted', 'masjid', 'mosques', "terrorist', 'pakistani', 'violence', 'convert', 'christian', 'killing', 'sikh', 'mob', 'sharia', 'jihadist', 'islamophobia', 'sikhs', 'radical', 'namaz', 'jihadi', 'stoplandjihad', 'conversion'], 4: ['hindu', 'india', 'pakistan", muslim'", 'temple'", 'kashmir', 'punjab', 'bengal', 'attack', 'bjp', 'bangladesh', 'hinduism', 'sikh', 'bharat', 'hindutva', 'violence', 'mandir', 'isi', 'pakistani', 'terrorists', 'khalistanis', 'kashmiri', 'genocide, 'pak', 'Khalistan', 'secularism', 'namaz', 'islamic', 'propaganda', 'islam', 'khalistani', 'mob', 'terrorist", 'maharashtra', 'terrorism', 'hinduphobic', 'terror', 'hindustan', 'genocide', 'killing', 'incident', 'culprits', 'riot', 'atiq', 'bi', 'hindusunderattack'] } # The keywords were used to flag the smaller clusters for review, and then I included only those that were a) coherent topically and b) largely inflammatory/false # I'm including the dictionary of subclusters to the topics here to give a sense of the size of each topic: keyword_clust_num_to_clusters_of_interest = { 0: [2105, 2180, 2646], 1: [115, 277, 169, 352, 774, 900, 1067, 1118, 1255, 1277, 1509, 1660, 1772, 1789, 1843, 1845, 1901, 1951, 2053, 2057, 2080, 2105, 2146, 2156, 2189, 2222, 2278, 2316, 2321, 2327, 2412, 2452, 2527, 2765, 2824, 2894, 2896], 2: [88, 153, 192, 440, 465, 485, 689, 742, 949, 1369, 1389, 1461, 1530, 1536, 1586, 1750, 1963, 2347, 2520, 2573, 2711, 2867], 3: [166, 253, 311, 469, 482, 557, 571, 582, 603, 648, 665, 755, 820, 905, 906, 680, 1008, 1035, 1151, 1212, 1249, 1276, 1303, 1308, 1322, 1330, 1360, 1393, 1416, 1477, 1538, 1593, 1596, 1606, 1618, 1693, 1733, 1778, 1780, 1817, 1858, 1879, 1881, 1902, 1953, 2089, 2093, 2094, 2132, 2134, 2161, 2255, 2271, 2273, 2363, 2493, 2568, 2571, 2698, 2730, 2747, 2977], 4: [102, 196, 199, 258, 301, 311, 317, 356, 362, 378, 505, 507, 548, 628, 877, 939, 990, 1096, 1488, 1490, 1500, 1505, 1549, 1732, 1740, 1955, 2203, 2293, 2335, 2381, 2555, 2761, 2883] } ```