Version mise au propre

4 years ago · 55705f1102
4 changed files with 1406 additions and 0 deletions
--- a/analyse_twitter_LIU.ipynb
+++ b/analyse_twitter_LIU.ipynb
@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Identifier les leaders d’opinion du domaine de l’IA sur Twitter\n",
+    "\n",
+    "Auteur : Jiayue LIU (MSc Data Management, Paris School of Business)\n",
+    "\n",
+    "Date : 18 Avril 2021 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Installer toutes les librairies nécessaires à l'exercice\n",
+    "import tweepy\n",
+    "import pandas as pd\n",
+    "pd.options.mode.chained_assignment = None\n",
+    "import igraph as ig\n",
+    "import datetime\n",
+    "\n",
+    "# Authentification API\n",
+    "auth = tweepy.OAuthHandler(\n",
+    "    'g5ktEfyoenGVaxGFbbz5Xt6CH', \n",
+    "    'D5RFlzzO5FMDvFFkUf5piWFF1mNKpgzEZpZEjC40uP7ZA4QhrY')\n",
+    "auth.set_access_token(\n",
+    "    '1313171160973139973-eVa2VAFWUoha0lLgUzVwCQwQycWJ0c', \n",
+    "    'c4DdmZV6DWV2NwjpBTy5cZlN9tdPvwACbUrwWQyj3RKfX')\n",
+    "api = tweepy.API(auth,wait_on_rate_limit=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extraire les tweets contenant les mots-clés définis\n",
+    "hashtags = ['#IA', '#IntelligenceArtificielle']\n",
+    "results = tweepy.Cursor(api.search, q=hashtags, lang='fr').items()\n",
+    "\n",
+    "# Convertir les résultats de recherche du json en dataframe\n",
+    "json_data = [r._json for r in results]\n",
+    "results_df = pd.json_normalize(json_data)\n",
+    "\n",
+    "results_df.to_csv(\"tweets_database.csv\", sep=\",\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Garder des informations qui nous intéresseraient en renommant les colonnes\n",
+    "simple_results = results_df[['created_at',\n",
+    "                             'user.location',\n",
+    "                             'user.screen_name',\n",
+    "                             'user.followers_count',\n",
+    "                             'entities.user_mentions']]\n",
+    "simple_results.columns = ['time',\n",
+    "                          'location',\n",
+    "                          'user_id',\n",
+    "                          'num_followers',\n",
+    "                          'mentions']\n",
+    "\n",
+    "# Afficher le résultat brute mais simplifié\n",
+    "today = datetime.date.today()\n",
+    "week_ago = today - datetime.timedelta(days=7)\n",
+    "print(\"Pendant la semaine du\", week_ago.strftime(\"%d/%m/%Y\"),\n",
+    "      \"au\", today.strftime(\"%d/%m/%Y\"),\n",
+    "      \", les tweets en français et ayant pour hashtags #IA ou #IntelligenceArtificielle sont les suivants : \\n\",\n",
+    "      simple_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convertir la colonne \"mentions\" en liste simple\n",
+    "mentioned_users = []\n",
+    "for mention in simple_results.mentions:\n",
+    "    mentioned_users.append(list(map(lambda d: d['screen_name'], mention)))\n",
+    "simple_results['mentions'] = mentioned_users\n",
+    "\n",
+    "# Stocker tous les edges et nodes dans des dataframes\n",
+    "edges_df = simple_results.loc[:, ['mentions', 'user_id', 'num_followers']]\n",
+    "edges_df = edges_df.explode('mentions').reset_index().drop('index',1)\n",
+    "\n",
+    "mention_list = edges_df.mentions.to_list()\n",
+    "user_list = edges_df.user_id.to_list()\n",
+    "nodes_list = set(user_list + mention_list)\n",
+    "\n",
+    "edges = edges_df.dropna().reset_index().drop('index',1)\n",
+    "nodes = pd.DataFrame(nodes_list)\n",
+    "nodes.columns = (['user_id'])\n",
+    "nodes = pd.merge(nodes, edges, on='user_id', how='left')\n",
+    "nodes = nodes.drop(columns=['mentions']).groupby(by='user_id').mean().reset_index()\n",
+    "\n",
+    "print(\"La liste des mentions entre les utilisateurs : \\n\",\n",
+    "      edges)\n",
+    "print(\"La liste des utilisateurs Twitter ayant publié du contenu relatif à l'IA durant la semaine passée : \\n\",\n",
+    "      nodes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Générer le graphe représentant le réseau social avec le package iGraph\n",
+    "\n",
+    "kol_map = ig.Graph.DataFrame(edges,\n",
+    "                       directed = True,\n",
+    "                       vertices = nodes)\n",
+    "kol_map.vs['name'] = nodes['user_id']\n",
+    "kol_map.vs['num_followers'] = nodes['num_followers']*0.001"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Comparer le nombre d'abonnés des utilisateurs du réseau\n",
+    "nodes['num_followers'] = nodes['num_followers'].astype(pd.Int64Dtype())\n",
+    "rank_followers = nodes.sort_values(by='num_followers',\n",
+    "                                   ascending=False)\n",
+    "rank_followers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculer la centralité de degré en utilisant le package igraph\n",
+    "out_degrees = pd.DataFrame({'node': nodes['user_id'],\n",
+    "                            'degree':kol_map.degree(mode=\"out\")})\n",
+    "out_degrees = out_degrees.sort_values(by='degree',\n",
+    "                                      ascending=False)\n",
+    "\n",
+    "print(\"Les dix comptes Twitter ayant été le plus mentionnés durant la semaine passée sont : \\n\",\n",
+    "      out_degrees.head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apppliquer la méthode \"Fruchterman-Reingold force-directed\" pour construire le réseau\n",
+    "layout = kol_map.layout('fr')\n",
+    "\n",
+    "visual_style = {}\n",
+    "visual_style[\"vertex_size\"] = kol_map.degree()\n",
+    "visual_style[\"vertex_color\"] = \"#1DA1F2\"\n",
+    "visual_style[\"vertex_label\"] = kol_map.vs[\"name\"]\n",
+    "visual_style[\"vertex_label_size\"] = 5\n",
+    "visual_style[\"edge_arrow_size\"] = 0.5\n",
+    "visual_style[\"layout\"] = layout\n",
+    "visual_style[\"bbox\"] = (500, 500)\n",
+    "visual_style[\"margin\"] = 20\n",
+    "\n",
+    "kol_map0 = kol_map.copy()\n",
+    "visual_style0 = visual_style.copy()\n",
+    "visual_style0[\"vertex_size\"] = kol_map.vs['num_followers']\n",
+    "\n",
+    "# Afficher et sauvegarder les graphes générés\n",
+    "print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\",\n",
+    "     \"(la taille des noeuds est proportionnelle à leur degré sortant) \\n\")\n",
+    "ig.plot(kol_map, \"twitter_network_mapping_degree.pdf\", **visual_style)\n",
+    "\n",
+    "print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\"\n",
+    "     \"(la taille des noeuds est proportionnelle à leur nombre d'abonnés) \\n\")\n",
+    "ig.plot(kol_map0, \"twitter_network_mapping_follower.pdf\", **visual_style0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/tweets_database.csv
+++ b/tweets_database.csv
--- a/twitter_network_mapping_degree.pdf
+++ b/twitter_network_mapping_degree.pdf
--- a/twitter_network_mapping_follower.pdf
+++ b/twitter_network_mapping_follower.pdf