You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1584 lines
62 KiB
1584 lines
62 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "c425d5f5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#import de la bibliothèque pandas\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "a232e20f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#affichage que de 10 lignes\n",
|
|
"pd.options.display.max_rows = 10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "1c8fff82",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>siren</th>\n",
|
|
" <th>nic</th>\n",
|
|
" <th>siret</th>\n",
|
|
" <th>statutDiffusionEtablissement</th>\n",
|
|
" <th>dateCreationEtablissement</th>\n",
|
|
" <th>trancheEffectifsEtablissement</th>\n",
|
|
" <th>anneeEffectifsEtablissement</th>\n",
|
|
" <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
|
|
" <th>dateDernierTraitementEtablissement</th>\n",
|
|
" <th>etablissementSiege</th>\n",
|
|
" <th>...</th>\n",
|
|
" <th>codePaysEtranger2Etablissement</th>\n",
|
|
" <th>libellePaysEtranger2Etablissement</th>\n",
|
|
" <th>etatAdministratifEtablissement</th>\n",
|
|
" <th>enseigne1Etablissement</th>\n",
|
|
" <th>enseigne2Etablissement</th>\n",
|
|
" <th>enseigne3Etablissement</th>\n",
|
|
" <th>denominationUsuelleEtablissement</th>\n",
|
|
" <th>activitePrincipaleEtablissement</th>\n",
|
|
" <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
|
|
" <th>caractereEmployeurEtablissement</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5620190</td>\n",
|
|
" <td>65</td>\n",
|
|
" <td>562019000065</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2016-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5680145</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>568014500017</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1956-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-04T04:12:29</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>22.22Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5720164</td>\n",
|
|
" <td>28</td>\n",
|
|
" <td>572016400028</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1983-09-28</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>86.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5720784</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>572078400031</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-04-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>25.72Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5780960</td>\n",
|
|
" <td>26</td>\n",
|
|
" <td>578096000026</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1981-12-28</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>HOTEL ROYAL THALASSO</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>...</th>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82442</th>\n",
|
|
" <td>998893002</td>\n",
|
|
" <td>142</td>\n",
|
|
" <td>99889300200142</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2013-05-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39B</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82443</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>99999000500038</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-07-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82444</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>491</td>\n",
|
|
" <td>99999000500491</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2017-04-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82445</th>\n",
|
|
" <td>999990286</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>99999028600018</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1979-11-30</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82446</th>\n",
|
|
" <td>999990369</td>\n",
|
|
" <td>87</td>\n",
|
|
" <td>99999036900087</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2014-03-31</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-02T03:38:31</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>66.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"<p>82447 rows × 77 columns</p>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" siren nic siret statutDiffusionEtablissement \\\n",
|
|
"0 5620190 65 562019000065 O \n",
|
|
"1 5680145 17 568014500017 O \n",
|
|
"2 5720164 28 572016400028 O \n",
|
|
"3 5720784 31 572078400031 O \n",
|
|
"4 5780960 26 578096000026 O \n",
|
|
"... ... ... ... ... \n",
|
|
"82442 998893002 142 99889300200142 O \n",
|
|
"82443 999990005 38 99999000500038 O \n",
|
|
"82444 999990005 491 99999000500491 O \n",
|
|
"82445 999990286 18 99999028600018 O \n",
|
|
"82446 999990369 87 99999036900087 O \n",
|
|
"\n",
|
|
" dateCreationEtablissement trancheEffectifsEtablissement \\\n",
|
|
"0 2016-01-01 21 \n",
|
|
"1 1956-01-01 21 \n",
|
|
"2 1983-09-28 22 \n",
|
|
"3 1993-04-01 22 \n",
|
|
"4 1981-12-28 21 \n",
|
|
"... ... ... \n",
|
|
"82442 2013-05-01 21 \n",
|
|
"82443 1993-07-01 22 \n",
|
|
"82444 2017-04-01 21 \n",
|
|
"82445 1979-11-30 22 \n",
|
|
"82446 2014-03-31 21 \n",
|
|
"\n",
|
|
" anneeEffectifsEtablissement \\\n",
|
|
"0 2020.0 \n",
|
|
"1 2020.0 \n",
|
|
"2 2020.0 \n",
|
|
"3 2020.0 \n",
|
|
"4 2020.0 \n",
|
|
"... ... \n",
|
|
"82442 2020.0 \n",
|
|
"82443 2020.0 \n",
|
|
"82444 2020.0 \n",
|
|
"82445 2020.0 \n",
|
|
"82446 2020.0 \n",
|
|
"\n",
|
|
" activitePrincipaleRegistreMetiersEtablissement \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"... ... \n",
|
|
"82442 NaN \n",
|
|
"82443 NaN \n",
|
|
"82444 NaN \n",
|
|
"82445 NaN \n",
|
|
"82446 NaN \n",
|
|
"\n",
|
|
" dateDernierTraitementEtablissement etablissementSiege ... \\\n",
|
|
"0 2022-08-29T09:00:21 True ... \n",
|
|
"1 2022-10-04T04:12:29 True ... \n",
|
|
"2 2022-08-29T09:00:21 True ... \n",
|
|
"3 2022-08-29T09:00:21 False ... \n",
|
|
"4 2022-08-29T09:00:21 False ... \n",
|
|
"... ... ... ... \n",
|
|
"82442 2022-08-29T10:50:43 False ... \n",
|
|
"82443 2022-08-29T10:50:43 True ... \n",
|
|
"82444 2022-08-29T10:50:43 False ... \n",
|
|
"82445 2022-08-29T10:50:43 True ... \n",
|
|
"82446 2022-10-02T03:38:31 True ... \n",
|
|
"\n",
|
|
" codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" etatAdministratifEtablissement enseigne1Etablissement \\\n",
|
|
"0 A NaN \n",
|
|
"1 A NaN \n",
|
|
"2 A NaN \n",
|
|
"3 A NaN \n",
|
|
"4 A HOTEL ROYAL THALASSO \n",
|
|
"... ... ... \n",
|
|
"82442 A NaN \n",
|
|
"82443 A NaN \n",
|
|
"82444 A NaN \n",
|
|
"82445 A NaN \n",
|
|
"82446 A NaN \n",
|
|
"\n",
|
|
" enseigne2Etablissement enseigne3Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
|
|
"0 NaN 49.39A \n",
|
|
"1 NaN 22.22Z \n",
|
|
"2 NaN 86.10Z \n",
|
|
"3 NaN 25.72Z \n",
|
|
"4 NaN 55.10Z \n",
|
|
"... ... ... \n",
|
|
"82442 NaN 49.39B \n",
|
|
"82443 NaN 49.41A \n",
|
|
"82444 NaN 49.41A \n",
|
|
"82445 NaN 55.10Z \n",
|
|
"82446 NaN 66.30Z \n",
|
|
"\n",
|
|
" nomenclatureActivitePrincipaleEtablissement \\\n",
|
|
"0 NAFRev2 \n",
|
|
"1 NAFRev2 \n",
|
|
"2 NAFRev2 \n",
|
|
"3 NAFRev2 \n",
|
|
"4 NAFRev2 \n",
|
|
"... ... \n",
|
|
"82442 NAFRev2 \n",
|
|
"82443 NAFRev2 \n",
|
|
"82444 NAFRev2 \n",
|
|
"82445 NAFRev2 \n",
|
|
"82446 NAFRev2 \n",
|
|
"\n",
|
|
" caractereEmployeurEtablissement \n",
|
|
"0 O \n",
|
|
"1 O \n",
|
|
"2 O \n",
|
|
"3 O \n",
|
|
"4 O \n",
|
|
"... ... \n",
|
|
"82442 O \n",
|
|
"82443 O \n",
|
|
"82444 O \n",
|
|
"82445 O \n",
|
|
"82446 O \n",
|
|
"\n",
|
|
"[82447 rows x 77 columns]"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#import du fichier excel\n",
|
|
"entreprise50 = pd.read_excel('sirenentr.xlsx')\n",
|
|
"entreprise50"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "7400d118",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(82447, 77)"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#Afficher le nombre de lignes et de colonne\n",
|
|
"entreprise50.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "b7284e73",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>siren</th>\n",
|
|
" <th>nic</th>\n",
|
|
" <th>siret</th>\n",
|
|
" <th>statutDiffusionEtablissement</th>\n",
|
|
" <th>dateCreationEtablissement</th>\n",
|
|
" <th>trancheEffectifsEtablissement</th>\n",
|
|
" <th>anneeEffectifsEtablissement</th>\n",
|
|
" <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
|
|
" <th>dateDernierTraitementEtablissement</th>\n",
|
|
" <th>etablissementSiege</th>\n",
|
|
" <th>...</th>\n",
|
|
" <th>codeCedex2Etablissement</th>\n",
|
|
" <th>libelleCedex2Etablissement</th>\n",
|
|
" <th>etatAdministratifEtablissement</th>\n",
|
|
" <th>enseigne1Etablissement</th>\n",
|
|
" <th>enseigne2Etablissement</th>\n",
|
|
" <th>enseigne3Etablissement</th>\n",
|
|
" <th>denominationUsuelleEtablissement</th>\n",
|
|
" <th>activitePrincipaleEtablissement</th>\n",
|
|
" <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
|
|
" <th>caractereEmployeurEtablissement</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5620190</td>\n",
|
|
" <td>65</td>\n",
|
|
" <td>562019000065</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2016-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5680145</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>568014500017</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1956-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-04T04:12:29</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>22.22Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5720164</td>\n",
|
|
" <td>28</td>\n",
|
|
" <td>572016400028</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1983-09-28</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>86.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5720784</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>572078400031</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-04-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>25.72Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5780960</td>\n",
|
|
" <td>26</td>\n",
|
|
" <td>578096000026</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1981-12-28</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>HOTEL ROYAL THALASSO</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>...</th>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82442</th>\n",
|
|
" <td>998893002</td>\n",
|
|
" <td>142</td>\n",
|
|
" <td>99889300200142</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2013-05-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39B</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82443</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>99999000500038</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-07-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82444</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>491</td>\n",
|
|
" <td>99999000500491</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2017-04-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82445</th>\n",
|
|
" <td>999990286</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>99999028600018</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1979-11-30</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82446</th>\n",
|
|
" <td>999990369</td>\n",
|
|
" <td>87</td>\n",
|
|
" <td>99999036900087</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2014-03-31</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-02T03:38:31</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>66.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"<p>82447 rows × 71 columns</p>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" siren nic siret statutDiffusionEtablissement \\\n",
|
|
"0 5620190 65 562019000065 O \n",
|
|
"1 5680145 17 568014500017 O \n",
|
|
"2 5720164 28 572016400028 O \n",
|
|
"3 5720784 31 572078400031 O \n",
|
|
"4 5780960 26 578096000026 O \n",
|
|
"... ... ... ... ... \n",
|
|
"82442 998893002 142 99889300200142 O \n",
|
|
"82443 999990005 38 99999000500038 O \n",
|
|
"82444 999990005 491 99999000500491 O \n",
|
|
"82445 999990286 18 99999028600018 O \n",
|
|
"82446 999990369 87 99999036900087 O \n",
|
|
"\n",
|
|
" dateCreationEtablissement trancheEffectifsEtablissement \\\n",
|
|
"0 2016-01-01 21 \n",
|
|
"1 1956-01-01 21 \n",
|
|
"2 1983-09-28 22 \n",
|
|
"3 1993-04-01 22 \n",
|
|
"4 1981-12-28 21 \n",
|
|
"... ... ... \n",
|
|
"82442 2013-05-01 21 \n",
|
|
"82443 1993-07-01 22 \n",
|
|
"82444 2017-04-01 21 \n",
|
|
"82445 1979-11-30 22 \n",
|
|
"82446 2014-03-31 21 \n",
|
|
"\n",
|
|
" anneeEffectifsEtablissement \\\n",
|
|
"0 2020.0 \n",
|
|
"1 2020.0 \n",
|
|
"2 2020.0 \n",
|
|
"3 2020.0 \n",
|
|
"4 2020.0 \n",
|
|
"... ... \n",
|
|
"82442 2020.0 \n",
|
|
"82443 2020.0 \n",
|
|
"82444 2020.0 \n",
|
|
"82445 2020.0 \n",
|
|
"82446 2020.0 \n",
|
|
"\n",
|
|
" activitePrincipaleRegistreMetiersEtablissement \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"... ... \n",
|
|
"82442 NaN \n",
|
|
"82443 NaN \n",
|
|
"82444 NaN \n",
|
|
"82445 NaN \n",
|
|
"82446 NaN \n",
|
|
"\n",
|
|
" dateDernierTraitementEtablissement etablissementSiege ... \\\n",
|
|
"0 2022-08-29T09:00:21 True ... \n",
|
|
"1 2022-10-04T04:12:29 True ... \n",
|
|
"2 2022-08-29T09:00:21 True ... \n",
|
|
"3 2022-08-29T09:00:21 False ... \n",
|
|
"4 2022-08-29T09:00:21 False ... \n",
|
|
"... ... ... ... \n",
|
|
"82442 2022-08-29T10:50:43 False ... \n",
|
|
"82443 2022-08-29T10:50:43 True ... \n",
|
|
"82444 2022-08-29T10:50:43 False ... \n",
|
|
"82445 2022-08-29T10:50:43 True ... \n",
|
|
"82446 2022-10-02T03:38:31 True ... \n",
|
|
"\n",
|
|
" codeCedex2Etablissement libelleCedex2Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" etatAdministratifEtablissement enseigne1Etablissement \\\n",
|
|
"0 A NaN \n",
|
|
"1 A NaN \n",
|
|
"2 A NaN \n",
|
|
"3 A NaN \n",
|
|
"4 A HOTEL ROYAL THALASSO \n",
|
|
"... ... ... \n",
|
|
"82442 A NaN \n",
|
|
"82443 A NaN \n",
|
|
"82444 A NaN \n",
|
|
"82445 A NaN \n",
|
|
"82446 A NaN \n",
|
|
"\n",
|
|
" enseigne2Etablissement enseigne3Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
|
|
"0 NaN 49.39A \n",
|
|
"1 NaN 22.22Z \n",
|
|
"2 NaN 86.10Z \n",
|
|
"3 NaN 25.72Z \n",
|
|
"4 NaN 55.10Z \n",
|
|
"... ... ... \n",
|
|
"82442 NaN 49.39B \n",
|
|
"82443 NaN 49.41A \n",
|
|
"82444 NaN 49.41A \n",
|
|
"82445 NaN 55.10Z \n",
|
|
"82446 NaN 66.30Z \n",
|
|
"\n",
|
|
" nomenclatureActivitePrincipaleEtablissement \\\n",
|
|
"0 NAFRev2 \n",
|
|
"1 NAFRev2 \n",
|
|
"2 NAFRev2 \n",
|
|
"3 NAFRev2 \n",
|
|
"4 NAFRev2 \n",
|
|
"... ... \n",
|
|
"82442 NAFRev2 \n",
|
|
"82443 NAFRev2 \n",
|
|
"82444 NAFRev2 \n",
|
|
"82445 NAFRev2 \n",
|
|
"82446 NAFRev2 \n",
|
|
"\n",
|
|
" caractereEmployeurEtablissement \n",
|
|
"0 O \n",
|
|
"1 O \n",
|
|
"2 O \n",
|
|
"3 O \n",
|
|
"4 O \n",
|
|
"... ... \n",
|
|
"82442 O \n",
|
|
"82443 O \n",
|
|
"82444 O \n",
|
|
"82445 O \n",
|
|
"82446 O \n",
|
|
"\n",
|
|
"[82447 rows x 71 columns]"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#retrait des colonnes inutiles\n",
|
|
"entreprise50.drop(['unitePurgeeUniteLegale', 'prenom4UniteLegale', 'pseudonymeUniteLegale', 'libelleCommuneEtranger2Etablissement', 'codePaysEtranger2Etablissement', 'libellePaysEtranger2Etablissement'], axis=1)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f7e8e157",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#drop les colonnes inutiles\n",
|
|
"entreprise50.drop([''])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"id": "8f4fbab9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"0 False\n",
|
|
"1 False\n",
|
|
"2 False\n",
|
|
"3 False\n",
|
|
"4 False\n",
|
|
" ... \n",
|
|
"82442 False\n",
|
|
"82443 False\n",
|
|
"82444 False\n",
|
|
"82445 False\n",
|
|
"82446 False\n",
|
|
"Length: 82447, dtype: bool"
|
|
]
|
|
},
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#identifier les lignes dupliquées \n",
|
|
"entreprise50.duplicated()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"id": "cde83087",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>siren</th>\n",
|
|
" <th>nic</th>\n",
|
|
" <th>siret</th>\n",
|
|
" <th>statutDiffusionEtablissement</th>\n",
|
|
" <th>dateCreationEtablissement</th>\n",
|
|
" <th>trancheEffectifsEtablissement</th>\n",
|
|
" <th>anneeEffectifsEtablissement</th>\n",
|
|
" <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
|
|
" <th>dateDernierTraitementEtablissement</th>\n",
|
|
" <th>etablissementSiege</th>\n",
|
|
" <th>...</th>\n",
|
|
" <th>codePaysEtranger2Etablissement</th>\n",
|
|
" <th>libellePaysEtranger2Etablissement</th>\n",
|
|
" <th>etatAdministratifEtablissement</th>\n",
|
|
" <th>enseigne1Etablissement</th>\n",
|
|
" <th>enseigne2Etablissement</th>\n",
|
|
" <th>enseigne3Etablissement</th>\n",
|
|
" <th>denominationUsuelleEtablissement</th>\n",
|
|
" <th>activitePrincipaleEtablissement</th>\n",
|
|
" <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
|
|
" <th>caractereEmployeurEtablissement</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5620190</td>\n",
|
|
" <td>65</td>\n",
|
|
" <td>562019000065</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2016-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5680145</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>568014500017</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1956-01-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-04T04:12:29</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>22.22Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5720164</td>\n",
|
|
" <td>28</td>\n",
|
|
" <td>572016400028</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1983-09-28</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>86.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5720784</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>572078400031</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-04-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>25.72Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5780960</td>\n",
|
|
" <td>26</td>\n",
|
|
" <td>578096000026</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1981-12-28</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T09:00:21</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>HOTEL ROYAL THALASSO</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>...</th>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82442</th>\n",
|
|
" <td>998893002</td>\n",
|
|
" <td>142</td>\n",
|
|
" <td>99889300200142</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2013-05-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.39B</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82443</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>99999000500038</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1993-07-01</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82444</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>491</td>\n",
|
|
" <td>99999000500491</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2017-04-01</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82445</th>\n",
|
|
" <td>999990286</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>99999028600018</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>1979-11-30</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-08-29T10:50:43</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82446</th>\n",
|
|
" <td>999990369</td>\n",
|
|
" <td>87</td>\n",
|
|
" <td>99999036900087</td>\n",
|
|
" <td>O</td>\n",
|
|
" <td>2014-03-31</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2020.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2022-10-02T03:38:31</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>66.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"<p>82447 rows × 77 columns</p>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" siren nic siret statutDiffusionEtablissement \\\n",
|
|
"0 5620190 65 562019000065 O \n",
|
|
"1 5680145 17 568014500017 O \n",
|
|
"2 5720164 28 572016400028 O \n",
|
|
"3 5720784 31 572078400031 O \n",
|
|
"4 5780960 26 578096000026 O \n",
|
|
"... ... ... ... ... \n",
|
|
"82442 998893002 142 99889300200142 O \n",
|
|
"82443 999990005 38 99999000500038 O \n",
|
|
"82444 999990005 491 99999000500491 O \n",
|
|
"82445 999990286 18 99999028600018 O \n",
|
|
"82446 999990369 87 99999036900087 O \n",
|
|
"\n",
|
|
" dateCreationEtablissement trancheEffectifsEtablissement \\\n",
|
|
"0 2016-01-01 21 \n",
|
|
"1 1956-01-01 21 \n",
|
|
"2 1983-09-28 22 \n",
|
|
"3 1993-04-01 22 \n",
|
|
"4 1981-12-28 21 \n",
|
|
"... ... ... \n",
|
|
"82442 2013-05-01 21 \n",
|
|
"82443 1993-07-01 22 \n",
|
|
"82444 2017-04-01 21 \n",
|
|
"82445 1979-11-30 22 \n",
|
|
"82446 2014-03-31 21 \n",
|
|
"\n",
|
|
" anneeEffectifsEtablissement \\\n",
|
|
"0 2020.0 \n",
|
|
"1 2020.0 \n",
|
|
"2 2020.0 \n",
|
|
"3 2020.0 \n",
|
|
"4 2020.0 \n",
|
|
"... ... \n",
|
|
"82442 2020.0 \n",
|
|
"82443 2020.0 \n",
|
|
"82444 2020.0 \n",
|
|
"82445 2020.0 \n",
|
|
"82446 2020.0 \n",
|
|
"\n",
|
|
" activitePrincipaleRegistreMetiersEtablissement \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"... ... \n",
|
|
"82442 NaN \n",
|
|
"82443 NaN \n",
|
|
"82444 NaN \n",
|
|
"82445 NaN \n",
|
|
"82446 NaN \n",
|
|
"\n",
|
|
" dateDernierTraitementEtablissement etablissementSiege ... \\\n",
|
|
"0 2022-08-29T09:00:21 True ... \n",
|
|
"1 2022-10-04T04:12:29 True ... \n",
|
|
"2 2022-08-29T09:00:21 True ... \n",
|
|
"3 2022-08-29T09:00:21 False ... \n",
|
|
"4 2022-08-29T09:00:21 False ... \n",
|
|
"... ... ... ... \n",
|
|
"82442 2022-08-29T10:50:43 False ... \n",
|
|
"82443 2022-08-29T10:50:43 True ... \n",
|
|
"82444 2022-08-29T10:50:43 False ... \n",
|
|
"82445 2022-08-29T10:50:43 True ... \n",
|
|
"82446 2022-10-02T03:38:31 True ... \n",
|
|
"\n",
|
|
" codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" etatAdministratifEtablissement enseigne1Etablissement \\\n",
|
|
"0 A NaN \n",
|
|
"1 A NaN \n",
|
|
"2 A NaN \n",
|
|
"3 A NaN \n",
|
|
"4 A HOTEL ROYAL THALASSO \n",
|
|
"... ... ... \n",
|
|
"82442 A NaN \n",
|
|
"82443 A NaN \n",
|
|
"82444 A NaN \n",
|
|
"82445 A NaN \n",
|
|
"82446 A NaN \n",
|
|
"\n",
|
|
" enseigne2Etablissement enseigne3Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"82442 NaN NaN \n",
|
|
"82443 NaN NaN \n",
|
|
"82444 NaN NaN \n",
|
|
"82445 NaN NaN \n",
|
|
"82446 NaN NaN \n",
|
|
"\n",
|
|
" denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
|
|
"0 NaN 49.39A \n",
|
|
"1 NaN 22.22Z \n",
|
|
"2 NaN 86.10Z \n",
|
|
"3 NaN 25.72Z \n",
|
|
"4 NaN 55.10Z \n",
|
|
"... ... ... \n",
|
|
"82442 NaN 49.39B \n",
|
|
"82443 NaN 49.41A \n",
|
|
"82444 NaN 49.41A \n",
|
|
"82445 NaN 55.10Z \n",
|
|
"82446 NaN 66.30Z \n",
|
|
"\n",
|
|
" nomenclatureActivitePrincipaleEtablissement \\\n",
|
|
"0 NAFRev2 \n",
|
|
"1 NAFRev2 \n",
|
|
"2 NAFRev2 \n",
|
|
"3 NAFRev2 \n",
|
|
"4 NAFRev2 \n",
|
|
"... ... \n",
|
|
"82442 NAFRev2 \n",
|
|
"82443 NAFRev2 \n",
|
|
"82444 NAFRev2 \n",
|
|
"82445 NAFRev2 \n",
|
|
"82446 NAFRev2 \n",
|
|
"\n",
|
|
" caractereEmployeurEtablissement \n",
|
|
"0 O \n",
|
|
"1 O \n",
|
|
"2 O \n",
|
|
"3 O \n",
|
|
"4 O \n",
|
|
"... ... \n",
|
|
"82442 O \n",
|
|
"82443 O \n",
|
|
"82444 O \n",
|
|
"82445 O \n",
|
|
"82446 O \n",
|
|
"\n",
|
|
"[82447 rows x 77 columns]"
|
|
]
|
|
},
|
|
"execution_count": 44,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#supprimer les lignes dupliquées\n",
|
|
"entreprise50.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8cdf8558",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 45,
|
|
"id": "22e04dc2",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"DataFrame is written to Excel File successfully.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"#Télécharger le fichier nettoyé en format excel\n",
|
|
"file_name = 'Sirenplus.xlsx'\n",
|
|
"entreprise50.to_excel(file_name)\n",
|
|
"print('DataFrame is written to Excel File successfully.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6253e566",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|