{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Identifier les leaders d’opinion du domaine de l’IA sur Twitter\n",
"\n",
"Auteur : Jiayue LIU (MSc Data Management, Paris School of Business)\n",
"\n",
"Date : 18 Avril 2021 "
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {},
"outputs": [],
"source": [
"# Installer toutes les librairies nécessaires à l'exercice\n",
"import tweepy\n",
"import pandas as pd\n",
"pd.options.mode.chained_assignment = None\n",
"import igraph as ig\n",
"import datetime\n",
"\n",
"# Authentification API\n",
"auth = tweepy.OAuthHandler(\n",
" 'g5ktEfyoenGVaxGFbbz5Xt6CH', \n",
" 'D5RFlzzO5FMDvFFkUf5piWFF1mNKpgzEZpZEjC40uP7ZA4QhrY')\n",
"auth.set_access_token(\n",
" '1313171160973139973-eVa2VAFWUoha0lLgUzVwCQwQycWJ0c', \n",
" 'c4DdmZV6DWV2NwjpBTy5cZlN9tdPvwACbUrwWQyj3RKfX')\n",
"api = tweepy.API(auth,wait_on_rate_limit=True)"
]
},
{
"cell_type": "code",
"execution_count": 321,
"metadata": {},
"outputs": [],
"source": [
"# Extraire les tweets contenant les mots-clés définis\n",
"hashtags = ['#IA', '#IntelligenceArtificielle']\n",
"results = tweepy.Cursor(api.search, q=hashtags, lang='fr').items()\n",
"\n",
"# Convertir les résultats de recherche du json en dataframe\n",
"json_data = [r._json for r in results]\n",
"results_df = pd.json_normalize(json_data)\n",
"\n",
"results_df.to_csv(\"tweets_database.csv\", sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 471,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pendant la semaine du 11/04/2021 au 18/04/2021 , les tweets en français et ayant pour hashtags #IA ou #IntelligenceArtificielle sont les suivants : \n",
" time location user_id \\\n",
"0 Sun Apr 18 14:27:29 +0000 2021 France /Japon cerise_masquee \n",
"1 Sun Apr 18 14:00:32 +0000 2021 Paris ORSYS \n",
"2 Sun Apr 18 13:48:21 +0000 2021 France mdrechsler \n",
"3 Sun Apr 18 13:34:32 +0000 2021 RISKINTEL4 \n",
"4 Sun Apr 18 13:30:17 +0000 2021 NACREspirale \n",
".. ... ... ... \n",
"384 Sat Apr 10 11:21:06 +0000 2021 Avignon, France ThibFay \n",
"385 Sat Apr 10 11:03:50 +0000 2021 Paris YvesPDB \n",
"386 Sat Apr 10 10:22:04 +0000 2021 Paris, France DailyDigital \n",
"387 Sat Apr 10 10:02:25 +0000 2021 PierreRamette \n",
"388 Sat Apr 10 10:00:01 +0000 2021 Paris, France LaForge_AI \n",
"\n",
" num_followers mentions \n",
"0 201 [{'screen_name': 'LaForge_AI', 'name': 'La For... \n",
"1 5440 [] \n",
"2 20850 [{'screen_name': 'mdrechsler', 'name': 'Michèl... \n",
"3 103 [{'screen_name': 'LaForge_AI', 'name': 'La For... \n",
"4 2753 [{'screen_name': 'LaForge_AI', 'name': 'La For... \n",
".. ... ... \n",
"384 164 [{'screen_name': 'LaForge_AI', 'name': 'La For... \n",
"385 31445 [{'screen_name': 'Inst_Lecanuet', 'name': 'Ins... \n",
"386 13819 [] \n",
"387 244 [{'screen_name': 'LaForge_AI', 'name': 'La For... \n",
"388 9932 [] \n",
"\n",
"[389 rows x 5 columns]\n"
]
}
],
"source": [
"# Garder des informations qui nous intéresseraient en renommant les colonnes\n",
"simple_results = results_df[['created_at',\n",
" 'user.location',\n",
" 'user.screen_name',\n",
" 'user.followers_count',\n",
" 'entities.user_mentions']]\n",
"simple_results.columns = ['time',\n",
" 'location',\n",
" 'user_id',\n",
" 'num_followers',\n",
" 'mentions']\n",
"\n",
"# Afficher le résultat brute mais simplifié\n",
"today = datetime.date.today()\n",
"week_ago = today - datetime.timedelta(days=7)\n",
"print(\"Pendant la semaine du\", week_ago.strftime(\"%d/%m/%Y\"),\n",
" \"au\", today.strftime(\"%d/%m/%Y\"),\n",
" \", les tweets en français et ayant pour hashtags #IA ou #IntelligenceArtificielle sont les suivants : \\n\",\n",
" simple_results)"
]
},
{
"cell_type": "code",
"execution_count": 472,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"La liste des mentions entre les utilisateurs : \n",
" mentions user_id num_followers\n",
"0 LaForge_AI cerise_masquee 201\n",
"1 mdrechsler mdrechsler 20850\n",
"2 LaForge_AI RISKINTEL4 103\n",
"3 LaForge_AI NACREspirale 2753\n",
"4 VincentCespedes mpbarrouillet 893\n",
".. ... ... ...\n",
"314 TeensInAI ActuIAFr 11902\n",
"315 LaForge_AI ClaudioCimelli 1470\n",
"316 LaForge_AI ThibFay 164\n",
"317 Inst_Lecanuet YvesPDB 31445\n",
"318 LaForge_AI PierreRamette 244\n",
"\n",
"[319 rows x 3 columns]\n",
"La liste des utilisateurs Twitter ayant publié du contenu relatif à l'IA durant la semaine passée : \n",
" user_id num_followers\n",
"0 236News 4417.0\n",
"1 49mamie51 244.0\n",
"2 4inData NaN\n",
"3 AFD_France 85619.0\n",
"4 AIVids NaN\n",
".. ... ...\n",
"292 thot NaN\n",
"293 toniojj 1617.0\n",
"294 univbordeaux NaN\n",
"295 xavierquerat 8685.0\n",
"296 zdnetfr NaN\n",
"\n",
"[297 rows x 2 columns]\n"
]
}
],
"source": [
"# Convertir la colonne \"mentions\" en liste simple\n",
"mentioned_users = []\n",
"for mention in simple_results.mentions:\n",
" mentioned_users.append(list(map(lambda d: d['screen_name'], mention)))\n",
"simple_results['mentions'] = mentioned_users\n",
"\n",
"# Stocker tous les edges et nodes dans des dataframes\n",
"edges_df = simple_results.loc[:, ['mentions', 'user_id', 'num_followers']]\n",
"edges_df = edges_df.explode('mentions').reset_index().drop('index',1)\n",
"\n",
"mention_list = edges_df.mentions.to_list()\n",
"user_list = edges_df.user_id.to_list()\n",
"nodes_list = set(user_list + mention_list)\n",
"\n",
"edges = edges_df.dropna().reset_index().drop('index',1)\n",
"nodes = pd.DataFrame(nodes_list)\n",
"nodes.columns = (['user_id'])\n",
"nodes = pd.merge(nodes, edges, on='user_id', how='left')\n",
"nodes = nodes.drop(columns=['mentions']).groupby(by='user_id').mean().reset_index()\n",
"\n",
"print(\"La liste des mentions entre les utilisateurs : \\n\",\n",
" edges)\n",
"print(\"La liste des utilisateurs Twitter ayant publié du contenu relatif à l'IA durant la semaine passée : \\n\",\n",
" nodes)"
]
},
{
"cell_type": "code",
"execution_count": 483,
"metadata": {},
"outputs": [],
"source": [
"# Générer le graphe représentant le réseau social avec le package iGraph\n",
"\n",
"kol_map = ig.Graph.DataFrame(edges,\n",
" directed = True,\n",
" vertices = nodes)\n",
"kol_map.vs['name'] = nodes['user_id']\n",
"kol_map.vs['num_followers'] = nodes['num_followers']*0.001"
]
},
{
"cell_type": "code",
"execution_count": 489,
"metadata": {},
"outputs": [],
"source": [
"# Comparer le nombre d'abonnés des utilisateurs du réseau\n",
"nodes['num_followers'] = nodes['num_followers'].astype(pd.Int64Dtype())\n",
"rank_followers = nodes.sort_values(by='num_followers',\n",
" ascending=False)\n",
"rank_followers"
]
},
{
"cell_type": "code",
"execution_count": 476,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Les dix comptes Twitter ayant été le plus mentionnés durant la semaine passée sont : \n",
" node degree\n",
"233 eduscol_EMI 26\n",
"130 ModisFrance 25\n",
"268 mdrechsler 22\n",
"71 Edu_Num 20\n",
"7 ActuIAFr 14\n",
"90 GroupeLaPoste 10\n",
"9 AgenceRecherche 10\n",
"3 AFD_France 9\n",
"143 OpenvalueFR 9\n",
"223 ctricot 8\n"
]
}
],
"source": [
"# Calculer la centralité de degré en utilisant le package igraph\n",
"out_degrees = pd.DataFrame({'node': nodes['user_id'],\n",
" 'degree':kol_map.degree(mode=\"out\")})\n",
"out_degrees = out_degrees.sort_values(by='degree',\n",
" ascending=False)\n",
"\n",
"print(\"Les dix comptes Twitter ayant été le plus mentionnés durant la semaine passée sont : \\n\",\n",
" out_degrees.head(10))"
]
},
{
"cell_type": "code",
"execution_count": 497,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \n",
" (la taille des noeuds est proportionnelle à leur degré sortant) \n",
"\n",
"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \n",
"(la taille des noeuds est proportionnelle à leur nombre d'abonnés) \n",
"\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 497,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"# Apppliquer la méthode \"Fruchterman-Reingold force-directed\" pour construire le réseau\n",
"layout = kol_map.layout('fr')\n",
"\n",
"visual_style = {}\n",
"visual_style[\"vertex_size\"] = kol_map.degree()\n",
"visual_style[\"vertex_color\"] = \"#1DA1F2\"\n",
"visual_style[\"vertex_label\"] = kol_map.vs[\"name\"]\n",
"visual_style[\"vertex_label_size\"] = 5\n",
"visual_style[\"edge_arrow_size\"] = 0.5\n",
"visual_style[\"layout\"] = layout\n",
"visual_style[\"bbox\"] = (500, 500)\n",
"visual_style[\"margin\"] = 20\n",
"\n",
"kol_map0 = kol_map.copy()\n",
"visual_style0 = visual_style.copy()\n",
"visual_style0[\"vertex_size\"] = kol_map.vs['num_followers']\n",
"\n",
"# Afficher et sauvegarder les graphes générés\n",
"print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\",\n",
" \"(la taille des noeuds est proportionnelle à leur degré sortant) \\n\")\n",
"ig.plot(kol_map, \"twitter_network_mapping_degree.pdf\", **visual_style)\n",
"\n",
"print(\"Carte représentant le réseau d'influence des comptes Twitter du domaine de l'IA : \\n\"\n",
" \"(la taille des noeuds est proportionnelle à leur nombre d'abonnés) \\n\")\n",
"ig.plot(kol_map0, \"twitter_network_mapping_follower.pdf\", **visual_style0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}