# cluster info
users per cluster
- 0: 18.5k
- boycott bollywood
- 1: 26.8k
- Sushant Singh Rajput murder conspiracy theories
- 2: 67k
- love jihad and violence against women
- 3: 144k
- religious/ethnic conflict within in India/with Pakistan
- 4: 111k
- claims of hinduphobia/anti-Indian sentiment
```python
# To get the new topics I generated keywords from my labeled tweets from the last narrative round,
# and used those keywords to search in 'overclustered' clusters.
# The keywords I used are: keywords_for_overclustering
{
O:['boycottbollywood', 'boycottbollywoodcompletely', 'bollywood'
'boycottpathaan', 'boycottbollywoodforever',
'boycottpathan', 'pathan', 'boycottbrahmastra',
'boycottpathaanmovie', 'boycottpathanmovie, 'gutterwood',
'bullywood', 'srk', 'boycottbollywoodmafia'],
1: ['ssr', 'justiceforsushantsinghrajput', 'ssrcase',
'nosushantnobollywood', 'bollywood', 'suicide', 'justice',
2: ['hindu', 'raped', 'minor', 'husband',
'muslim', 'murder', 'abducted', 'married',
'rape', 'convert', 'killed', 'shraddha', 'converted',
'stoplovejihad', 'lovejihad', 'death',
'kidnapped', 'victim', 'refused', 'forced',
'brahmin'],
3: ['muslim', 'hindu', 'islam', 'islamic', 'mosque',
'secularism', 'terrorists', 'pakistan', 'islamists', 'islamist',
'secular', 'quran', 'temple', 'mughals', 'jihad', 'victim',
'terrorism', 'minority', 'converted', 'masjid', 'mosques',
"terrorist', 'pakistani', 'violence', 'convert', 'christian',
'killing', 'sikh', 'mob', 'sharia', 'jihadist', 'islamophobia',
'sikhs', 'radical', 'namaz', 'jihadi', 'stoplandjihad', 'conversion'],
4: ['hindu', 'india', 'pakistan", muslim'", 'temple'",
'kashmir', 'punjab', 'bengal', 'attack', 'bjp',
'bangladesh', 'hinduism', 'sikh', 'bharat', 'hindutva',
'violence', 'mandir', 'isi', 'pakistani', 'terrorists',
'khalistanis', 'kashmiri', 'genocide, 'pak',
'Khalistan', 'secularism', 'namaz', 'islamic',
'propaganda', 'islam', 'khalistani', 'mob', 'terrorist",
'maharashtra', 'terrorism', 'hinduphobic', 'terror', 'hindustan', 'genocide',
'killing', 'incident', 'culprits', 'riot', 'atiq', 'bi', 'hindusunderattack']
}
# The keywords were used to flag the smaller clusters for review, and then I included only those that were a) coherent topically and b) largely inflammatory/false
# I'm including the dictionary of subclusters to the topics here to give a sense of the size of each topic:
keyword_clust_num_to_clusters_of_interest = {
0: [2105, 2180, 2646],
1: [115, 277, 169, 352, 774, 900, 1067, 1118, 1255, 1277, 1509, 1660, 1772, 1789, 1843, 1845,
1901, 1951, 2053, 2057, 2080, 2105, 2146, 2156, 2189, 2222, 2278, 2316, 2321, 2327,
2412, 2452, 2527, 2765, 2824, 2894, 2896],
2: [88, 153, 192, 440, 465, 485, 689, 742, 949, 1369, 1389, 1461, 1530, 1536, 1586, 1750, 1963, 2347, 2520,
2573, 2711, 2867],
3: [166, 253, 311, 469, 482, 557, 571, 582, 603, 648, 665, 755, 820, 905, 906, 680, 1008, 1035, 1151,
1212, 1249, 1276, 1303, 1308, 1322, 1330, 1360, 1393, 1416, 1477, 1538, 1593, 1596, 1606,
1618, 1693, 1733, 1778, 1780, 1817, 1858, 1879, 1881, 1902, 1953, 2089, 2093, 2094, 2132, 2134, 2161,
2255, 2271, 2273, 2363, 2493, 2568, 2571, 2698, 2730, 2747, 2977],
4: [102, 196, 199, 258, 301, 311, 317, 356, 362, 378, 505, 507, 548, 628, 877, 939, 990, 1096,
1488, 1490, 1500, 1505, 1549, 1732, 1740, 1955, 2203, 2293, 2335, 2381, 2555,
2761, 2883]
}
```