You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
7.4 KiB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# Identifier les leaders d’opinion du domaine de l’IA sur Twitter\n",
  8. "\n",
  9. "Auteur : Jiayue LIU (MSc Data Management, Paris School of Business)\n",
  10. "\n",
  11. "Date : 18 Avril 2021 "
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": null,
  17. "metadata": {},
  18. "outputs": [],
  19. "source": [
  20. "# Installer toutes les librairies nécessaires à l'exercice\n",
  21. "import tweepy\n",
  22. "import pandas as pd\n",
  23. "pd.options.mode.chained_assignment = None\n",
  24. "import igraph as ig\n",
  25. "import datetime\n",
  26. "\n",
  27. "# Authentification API\n",
  28. "auth = tweepy.OAuthHandler(\n",
  29. " 'g5ktEfyoenGVaxGFbbz5Xt6CH', \n",
  30. " 'D5RFlzzO5FMDvFFkUf5piWFF1mNKpgzEZpZEjC40uP7ZA4QhrY')\n",
  31. "auth.set_access_token(\n",
  32. " '1313171160973139973-eVa2VAFWUoha0lLgUzVwCQwQycWJ0c', \n",
  33. " 'c4DdmZV6DWV2NwjpBTy5cZlN9tdPvwACbUrwWQyj3RKfX')\n",
  34. "api = tweepy.API(auth,wait_on_rate_limit=True)"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": null,
  40. "metadata": {},
  41. "outputs": [],
  42. "source": [
  43. "# Extraire les tweets contenant les mots-clés définis\n",
  44. "hashtags = ['#IA', '#IntelligenceArtificielle']\n",
  45. "results = tweepy.Cursor(api.search, q=hashtags, lang='fr').items()\n",
  46. "\n",
  47. "# Convertir les résultats de recherche du json en dataframe\n",
  48. "json_data = [r._json for r in results]\n",
  49. "results_df = pd.json_normalize(json_data)\n",
  50. "\n",
  51. "results_df.to_csv(\"tweets_database.csv\", sep=\",\")"
  52. ]
  53. },
  54. {
  55. "cell_type": "code",
  56. "execution_count": null,
  57. "metadata": {},
  58. "outputs": [],
  59. "source": [
  60. "# Garder des informations qui nous intéresseraient en renommant les colonnes\n",
  61. "simple_results = results_df[['created_at',\n",
  62. " 'user.location',\n",
  63. " 'user.screen_name',\n",
  64. " 'user.followers_count',\n",
  65. " 'entities.user_mentions']]\n",
  66. "simple_results.columns = ['time',\n",
  67. " 'location',\n",
  68. " 'user_id',\n",
  69. " 'num_followers',\n",
  70. " 'mentions']\n",
  71. "\n",
  72. "# Afficher le résultat brute mais simplifié\n",
  73. "today = datetime.date.today()\n",
  74. "week_ago = today - datetime.timedelta(days=7)\n",
  75. "print(\"Pendant la semaine du\", week_ago.strftime(\"%d/%m/%Y\"),\n",
  76. " \"au\", today.strftime(\"%d/%m/%Y\"),\n",
  77. " \", les tweets en français et ayant pour hashtags #IA ou #IntelligenceArtificielle sont les suivants : \\n\",\n",
  78. " simple_results)"
  79. ]
  80. },
  81. {
  82. "cell_type": "code",
  83. "execution_count": null,
  84. "metadata": {},
  85. "outputs": [],
  86. "source": [
  87. "# Convertir la colonne \"mentions\" en liste simple\n",
  88. "mentioned_users = []\n",
  89. "for mention in simple_results.mentions:\n",
  90. " mentioned_users.append(list(map(lambda d: d['screen_name'], mention)))\n",
  91. "simple_results['mentions'] = mentioned_users\n",
  92. "\n",
  93. "# Stocker tous les edges et nodes dans des dataframes\n",
  94. "edges_df = simple_results.loc[:, ['mentions', 'user_id', 'num_followers']]\n",
  95. "edges_df = edges_df.explode('mentions').reset_index().drop('index',1)\n",
  96. "\n",
  97. "mention_list = edges_df.mentions.to_list()\n",
  98. "user_list = edges_df.user_id.to_list()\n",
  99. "nodes_list = set(user_list + mention_list)\n",
  100. "\n",
  101. "edges = edges_df.dropna().reset_index().drop('index',1)\n",
  102. "nodes = pd.DataFrame(nodes_list)\n",
  103. "nodes.columns = (['user_id'])\n",
  104. "nodes = pd.merge(nodes, edges, on='user_id', how='left')\n",
  105. "nodes = nodes.drop(columns=['mentions']).groupby(by='user_id').mean().reset_index()\n",
  106. "\n",
  107. "print(\"La liste des mentions entre les utilisateurs : \\n\",\n",
  108. " edges)\n",
  109. "print(\"La liste des utilisateurs Twitter ayant publié du contenu relatif à l'IA durant la semaine passée : \\n\",\n",
  110. " nodes)"
  111. ]
  112. },
  113. {
  114. "cell_type": "code",
  115. "execution_count": null,
  116. "metadata": {},
  117. "outputs": [],
  118. "source": [
  119. "# Générer le graphe représentant le réseau social avec le package iGraph\n",
  120. "\n",
  121. "kol_map = ig.Graph.DataFrame(edges,\n",
  122. " directed = True,\n",
  123. " vertices = nodes)\n",
  124. "kol_map.vs['name'] = nodes['user_id']\n",
  125. "kol_map.vs['num_followers'] = nodes['num_followers']*0.001"
  126. ]
  127. },
  128. {
  129. "cell_type": "code",
  130. "execution_count": null,
  131. "metadata": {},
  132. "outputs": [],
  133. "source": [
  134. "# Comparer le nombre d'abonnés des utilisateurs du réseau\n",
  135. "nodes['num_followers'] = nodes['num_followers'].astype(pd.Int64Dtype())\n",
  136. "rank_followers = nodes.sort_values(by='num_followers',\n",
  137. " ascending=False)\n",
  138. "rank_followers"
  139. ]
  140. },
  141. {
  142. "cell_type": "code",
  143. "execution_count": null,
  144. "metadata": {},
  145. "outputs": [],
  146. "source": [
  147. "# Calculer la centralité de degré en utilisant le package igraph\n",
  148. "out_degrees = pd.DataFrame({'node': nodes['user_id'],\n",
  149. " 'degree':kol_map.degree(mode=\"out\")})\n",
  150. "out_degrees = out_degrees.sort_values(by='degree',\n",
  151. " ascending=False)\n",
  152. "\n",
  153. "print(\"Les dix comptes Twitter ayant été le plus mentionnés durant la semaine passée sont : \\n\",\n",
  154. " out_degrees.head(10))"
  155. ]
  156. },
  157. {
  158. "cell_type": "code",
  159. "execution_count": null,
  160. "metadata": {},
  161. "outputs": [],
  162. "source": [
  163. "# Apppliquer la méthode \"Fruchterman-Reingold force-directed\" pour construire le réseau\n",
  164. "layout = kol_map.layout('fr')\n",
  165. "\n",
  166. "visual_style = {}\n",
  167. "visual_style[\"vertex_size\"] = kol_map.degree()\n",
  168. "visual_style[\"vertex_color\"] = \"#1DA1F2\"\n",
  169. "visual_style[\"vertex_label\"] = kol_map.vs[\"name\"]\n",
  170. "visual_style[\"vertex_label_size\"] = 5\n",
  171. "visual_style[\"edge_arrow_size\"] = 0.5\n",
  172. "visual_style[\"layout\"] = layout\n",
  173. "visual_style[\"bbox\"] = (500, 500)\n",
  174. "visual_style[\"margin\"] = 20\n",
  175. "\n",
  176. "kol_map0 = kol_map.copy()\n",
  177. "visual_style0 = visual_style.copy()\n",
  178. "visual_style0[\"vertex_size\"] = kol_map.vs['num_followers']\n",
  179. "\n",
  180. "# Afficher et sauvegarder les graphes générés\n",
  181. "print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\",\n",
  182. " \"(la taille des noeuds est proportionnelle à leur degré sortant) \\n\")\n",
  183. "ig.plot(kol_map, \"twitter_network_mapping_degree.pdf\", **visual_style)\n",
  184. "\n",
  185. "print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\"\n",
  186. " \"(la taille des noeuds est proportionnelle à leur nombre d'abonnés) \\n\")\n",
  187. "ig.plot(kol_map0, \"twitter_network_mapping_follower.pdf\", **visual_style0)"
  188. ]
  189. }
  190. ],
  191. "metadata": {
  192. "kernelspec": {
  193. "display_name": "Python 3",
  194. "language": "python",
  195. "name": "python3"
  196. },
  197. "language_info": {
  198. "codemirror_mode": {
  199. "name": "ipython",
  200. "version": 3
  201. },
  202. "file_extension": ".py",
  203. "mimetype": "text/x-python",
  204. "name": "python",
  205. "nbconvert_exporter": "python",
  206. "pygments_lexer": "ipython3",
  207. "version": "3.8.5"
  208. }
  209. },
  210. "nbformat": 4,
  211. "nbformat_minor": 4
  212. }