JIAYUE LIU
4 years ago
4 changed files with 1406 additions and 0 deletions
-
212analyse_twitter_LIU.ipynb
-
1194tweets_database.csv
-
BINtwitter_network_mapping_degree.pdf
-
BINtwitter_network_mapping_follower.pdf
@ -0,0 +1,212 @@ |
|||||
|
{ |
||||
|
"cells": [ |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"# Identifier les leaders d’opinion du domaine de l’IA sur Twitter\n", |
||||
|
"\n", |
||||
|
"Auteur : Jiayue LIU (MSc Data Management, Paris School of Business)\n", |
||||
|
"\n", |
||||
|
"Date : 18 Avril 2021 " |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Installer toutes les librairies nécessaires à l'exercice\n", |
||||
|
"import tweepy\n", |
||||
|
"import pandas as pd\n", |
||||
|
"pd.options.mode.chained_assignment = None\n", |
||||
|
"import igraph as ig\n", |
||||
|
"import datetime\n", |
||||
|
"\n", |
||||
|
"# Authentification API\n", |
||||
|
"auth = tweepy.OAuthHandler(\n", |
||||
|
" 'g5ktEfyoenGVaxGFbbz5Xt6CH', \n", |
||||
|
" 'D5RFlzzO5FMDvFFkUf5piWFF1mNKpgzEZpZEjC40uP7ZA4QhrY')\n", |
||||
|
"auth.set_access_token(\n", |
||||
|
" '1313171160973139973-eVa2VAFWUoha0lLgUzVwCQwQycWJ0c', \n", |
||||
|
" 'c4DdmZV6DWV2NwjpBTy5cZlN9tdPvwACbUrwWQyj3RKfX')\n", |
||||
|
"api = tweepy.API(auth,wait_on_rate_limit=True)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Extraire les tweets contenant les mots-clés définis\n", |
||||
|
"hashtags = ['#IA', '#IntelligenceArtificielle']\n", |
||||
|
"results = tweepy.Cursor(api.search, q=hashtags, lang='fr').items()\n", |
||||
|
"\n", |
||||
|
"# Convertir les résultats de recherche du json en dataframe\n", |
||||
|
"json_data = [r._json for r in results]\n", |
||||
|
"results_df = pd.json_normalize(json_data)\n", |
||||
|
"\n", |
||||
|
"results_df.to_csv(\"tweets_database.csv\", sep=\",\")" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Garder des informations qui nous intéresseraient en renommant les colonnes\n", |
||||
|
"simple_results = results_df[['created_at',\n", |
||||
|
" 'user.location',\n", |
||||
|
" 'user.screen_name',\n", |
||||
|
" 'user.followers_count',\n", |
||||
|
" 'entities.user_mentions']]\n", |
||||
|
"simple_results.columns = ['time',\n", |
||||
|
" 'location',\n", |
||||
|
" 'user_id',\n", |
||||
|
" 'num_followers',\n", |
||||
|
" 'mentions']\n", |
||||
|
"\n", |
||||
|
"# Afficher le résultat brute mais simplifié\n", |
||||
|
"today = datetime.date.today()\n", |
||||
|
"week_ago = today - datetime.timedelta(days=7)\n", |
||||
|
"print(\"Pendant la semaine du\", week_ago.strftime(\"%d/%m/%Y\"),\n", |
||||
|
" \"au\", today.strftime(\"%d/%m/%Y\"),\n", |
||||
|
" \", les tweets en français et ayant pour hashtags #IA ou #IntelligenceArtificielle sont les suivants : \\n\",\n", |
||||
|
" simple_results)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Convertir la colonne \"mentions\" en liste simple\n", |
||||
|
"mentioned_users = []\n", |
||||
|
"for mention in simple_results.mentions:\n", |
||||
|
" mentioned_users.append(list(map(lambda d: d['screen_name'], mention)))\n", |
||||
|
"simple_results['mentions'] = mentioned_users\n", |
||||
|
"\n", |
||||
|
"# Stocker tous les edges et nodes dans des dataframes\n", |
||||
|
"edges_df = simple_results.loc[:, ['mentions', 'user_id', 'num_followers']]\n", |
||||
|
"edges_df = edges_df.explode('mentions').reset_index().drop('index',1)\n", |
||||
|
"\n", |
||||
|
"mention_list = edges_df.mentions.to_list()\n", |
||||
|
"user_list = edges_df.user_id.to_list()\n", |
||||
|
"nodes_list = set(user_list + mention_list)\n", |
||||
|
"\n", |
||||
|
"edges = edges_df.dropna().reset_index().drop('index',1)\n", |
||||
|
"nodes = pd.DataFrame(nodes_list)\n", |
||||
|
"nodes.columns = (['user_id'])\n", |
||||
|
"nodes = pd.merge(nodes, edges, on='user_id', how='left')\n", |
||||
|
"nodes = nodes.drop(columns=['mentions']).groupby(by='user_id').mean().reset_index()\n", |
||||
|
"\n", |
||||
|
"print(\"La liste des mentions entre les utilisateurs : \\n\",\n", |
||||
|
" edges)\n", |
||||
|
"print(\"La liste des utilisateurs Twitter ayant publié du contenu relatif à l'IA durant la semaine passée : \\n\",\n", |
||||
|
" nodes)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Générer le graphe représentant le réseau social avec le package iGraph\n", |
||||
|
"\n", |
||||
|
"kol_map = ig.Graph.DataFrame(edges,\n", |
||||
|
" directed = True,\n", |
||||
|
" vertices = nodes)\n", |
||||
|
"kol_map.vs['name'] = nodes['user_id']\n", |
||||
|
"kol_map.vs['num_followers'] = nodes['num_followers']*0.001" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Comparer le nombre d'abonnés des utilisateurs du réseau\n", |
||||
|
"nodes['num_followers'] = nodes['num_followers'].astype(pd.Int64Dtype())\n", |
||||
|
"rank_followers = nodes.sort_values(by='num_followers',\n", |
||||
|
" ascending=False)\n", |
||||
|
"rank_followers" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Calculer la centralité de degré en utilisant le package igraph\n", |
||||
|
"out_degrees = pd.DataFrame({'node': nodes['user_id'],\n", |
||||
|
" 'degree':kol_map.degree(mode=\"out\")})\n", |
||||
|
"out_degrees = out_degrees.sort_values(by='degree',\n", |
||||
|
" ascending=False)\n", |
||||
|
"\n", |
||||
|
"print(\"Les dix comptes Twitter ayant été le plus mentionnés durant la semaine passée sont : \\n\",\n", |
||||
|
" out_degrees.head(10))" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": {}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Apppliquer la méthode \"Fruchterman-Reingold force-directed\" pour construire le réseau\n", |
||||
|
"layout = kol_map.layout('fr')\n", |
||||
|
"\n", |
||||
|
"visual_style = {}\n", |
||||
|
"visual_style[\"vertex_size\"] = kol_map.degree()\n", |
||||
|
"visual_style[\"vertex_color\"] = \"#1DA1F2\"\n", |
||||
|
"visual_style[\"vertex_label\"] = kol_map.vs[\"name\"]\n", |
||||
|
"visual_style[\"vertex_label_size\"] = 5\n", |
||||
|
"visual_style[\"edge_arrow_size\"] = 0.5\n", |
||||
|
"visual_style[\"layout\"] = layout\n", |
||||
|
"visual_style[\"bbox\"] = (500, 500)\n", |
||||
|
"visual_style[\"margin\"] = 20\n", |
||||
|
"\n", |
||||
|
"kol_map0 = kol_map.copy()\n", |
||||
|
"visual_style0 = visual_style.copy()\n", |
||||
|
"visual_style0[\"vertex_size\"] = kol_map.vs['num_followers']\n", |
||||
|
"\n", |
||||
|
"# Afficher et sauvegarder les graphes générés\n", |
||||
|
"print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\",\n", |
||||
|
" \"(la taille des noeuds est proportionnelle à leur degré sortant) \\n\")\n", |
||||
|
"ig.plot(kol_map, \"twitter_network_mapping_degree.pdf\", **visual_style)\n", |
||||
|
"\n", |
||||
|
"print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\"\n", |
||||
|
" \"(la taille des noeuds est proportionnelle à leur nombre d'abonnés) \\n\")\n", |
||||
|
"ig.plot(kol_map0, \"twitter_network_mapping_follower.pdf\", **visual_style0)" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"metadata": { |
||||
|
"kernelspec": { |
||||
|
"display_name": "Python 3", |
||||
|
"language": "python", |
||||
|
"name": "python3" |
||||
|
}, |
||||
|
"language_info": { |
||||
|
"codemirror_mode": { |
||||
|
"name": "ipython", |
||||
|
"version": 3 |
||||
|
}, |
||||
|
"file_extension": ".py", |
||||
|
"mimetype": "text/x-python", |
||||
|
"name": "python", |
||||
|
"nbconvert_exporter": "python", |
||||
|
"pygments_lexer": "ipython3", |
||||
|
"version": "3.8.5" |
||||
|
} |
||||
|
}, |
||||
|
"nbformat": 4, |
||||
|
"nbformat_minor": 4 |
||||
|
} |
1194
tweets_database.csv
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
Write
Preview
Loading…
Cancel
Save
Reference in new issue