You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
955 lines
39 KiB
955 lines
39 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "cf14db47",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#import de pandas\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "78c7afbb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/Users/angecharbelledurand/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (6,14,15) have mixed types.Specify dtype option on import or set low_memory=False.\n",
|
|
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>siren</th>\n",
|
|
" <th>nic</th>\n",
|
|
" <th>siret</th>\n",
|
|
" <th>dateCreationEtablissement</th>\n",
|
|
" <th>trancheEffectifsEtablissement</th>\n",
|
|
" <th>anneeEffectifsEtablissement</th>\n",
|
|
" <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
|
|
" <th>dateDernierTraitementEtablissement</th>\n",
|
|
" <th>etablissementSiege</th>\n",
|
|
" <th>nombrePeriodesEtablissement</th>\n",
|
|
" <th>dateDebut</th>\n",
|
|
" <th>etatAdministratifEtablissement</th>\n",
|
|
" <th>enseigne1Etablissement</th>\n",
|
|
" <th>enseigne2Etablissement</th>\n",
|
|
" <th>enseigne3Etablissement</th>\n",
|
|
" <th>denominationUsuelleEtablissement</th>\n",
|
|
" <th>activitePrincipaleEtablissement</th>\n",
|
|
" <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
|
|
" <th>caractereEmployeurEtablissement</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5420120</td>\n",
|
|
" <td>15</td>\n",
|
|
" <td>542012000015</td>\n",
|
|
" <td>1989-01-27 00:00:00</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2020-08-25 10:10:13</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>10.81Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5420120</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>542012000031</td>\n",
|
|
" <td>1900-01-01 00:00:00</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-01-01 03:35:01</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>2008-04-23 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>70.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5520176</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>552017600016</td>\n",
|
|
" <td>1955-01-01 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 21:30:57</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>17.21A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5520176</td>\n",
|
|
" <td>32</td>\n",
|
|
" <td>552017600032</td>\n",
|
|
" <td>1999-08-30 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2020-08-25 10:10:13</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>17.21A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5520242</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>552024200016</td>\n",
|
|
" <td>1900-01-01 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 21:30:57</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>20.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>...</th>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095676</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>99999000500038</td>\n",
|
|
" <td>1993-07-01 00:00:00</td>\n",
|
|
" <td>32</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 20:15:35</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2010-12-15 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095677</th>\n",
|
|
" <td>999990062</td>\n",
|
|
" <td>39</td>\n",
|
|
" <td>99999006200039</td>\n",
|
|
" <td>2007-11-05 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-02-23 18:21:09</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>64.19Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095678</th>\n",
|
|
" <td>999990286</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>99999028600018</td>\n",
|
|
" <td>1979-11-30 00:00:00</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-04-04 20:15:10</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095679</th>\n",
|
|
" <td>999990369</td>\n",
|
|
" <td>87</td>\n",
|
|
" <td>99999036900087</td>\n",
|
|
" <td>2014-03-31 00:00:00</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-02-23 18:21:09</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2014-03-31 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>66.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095680</th>\n",
|
|
" <td>999990401</td>\n",
|
|
" <td>96</td>\n",
|
|
" <td>99999040100096</td>\n",
|
|
" <td>2009-06-23 00:00:00</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>2712ZZ</td>\n",
|
|
" <td>2021-03-19 03:37:02</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2009-06-23 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>27.12Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"<p>1095681 rows × 19 columns</p>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" siren nic siret dateCreationEtablissement \\\n",
|
|
"0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
|
|
"1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
|
|
"2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
|
|
"3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
|
|
"4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
|
|
"... ... ... ... ... \n",
|
|
"1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
|
|
"1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
|
|
"1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
|
|
"1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
|
|
"1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
|
|
"\n",
|
|
" trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
|
|
"0 2 2018.0 \n",
|
|
"1 3 2018.0 \n",
|
|
"2 12 2018.0 \n",
|
|
"3 12 2018.0 \n",
|
|
"4 12 2018.0 \n",
|
|
"... ... ... \n",
|
|
"1095676 32 2018.0 \n",
|
|
"1095677 12 2018.0 \n",
|
|
"1095678 22 2018.0 \n",
|
|
"1095679 21 2018.0 \n",
|
|
"1095680 3 2018.0 \n",
|
|
"\n",
|
|
" activitePrincipaleRegistreMetiersEtablissement \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"... ... \n",
|
|
"1095676 NaN \n",
|
|
"1095677 NaN \n",
|
|
"1095678 NaN \n",
|
|
"1095679 NaN \n",
|
|
"1095680 2712ZZ \n",
|
|
"\n",
|
|
" dateDernierTraitementEtablissement etablissementSiege \\\n",
|
|
"0 2020-08-25 10:10:13 False \n",
|
|
"1 2021-01-01 03:35:01 True \n",
|
|
"2 2021-08-01 21:30:57 True \n",
|
|
"3 2020-08-25 10:10:13 False \n",
|
|
"4 2021-08-01 21:30:57 True \n",
|
|
"... ... ... \n",
|
|
"1095676 2021-08-01 20:15:35 True \n",
|
|
"1095677 2021-02-23 18:21:09 True \n",
|
|
"1095678 2021-04-04 20:15:10 True \n",
|
|
"1095679 2021-02-23 18:21:09 True \n",
|
|
"1095680 2021-03-19 03:37:02 True \n",
|
|
"\n",
|
|
" nombrePeriodesEtablissement dateDebut \\\n",
|
|
"0 4 2008-01-01 00:00:00 \n",
|
|
"1 6 2008-04-23 00:00:00 \n",
|
|
"2 4 2008-01-01 00:00:00 \n",
|
|
"3 4 2008-01-01 00:00:00 \n",
|
|
"4 4 2008-01-01 00:00:00 \n",
|
|
"... ... ... \n",
|
|
"1095676 5 2010-12-15 00:00:00 \n",
|
|
"1095677 2 2008-01-01 00:00:00 \n",
|
|
"1095678 4 2008-01-01 00:00:00 \n",
|
|
"1095679 1 2014-03-31 00:00:00 \n",
|
|
"1095680 1 2009-06-23 00:00:00 \n",
|
|
"\n",
|
|
" etatAdministratifEtablissement enseigne1Etablissement \\\n",
|
|
"0 A NaN \n",
|
|
"1 A NaN \n",
|
|
"2 A NaN \n",
|
|
"3 A NaN \n",
|
|
"4 A NaN \n",
|
|
"... ... ... \n",
|
|
"1095676 A NaN \n",
|
|
"1095677 A NaN \n",
|
|
"1095678 A NaN \n",
|
|
"1095679 A NaN \n",
|
|
"1095680 A NaN \n",
|
|
"\n",
|
|
" enseigne2Etablissement enseigne3Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"1095676 NaN NaN \n",
|
|
"1095677 NaN NaN \n",
|
|
"1095678 NaN NaN \n",
|
|
"1095679 NaN NaN \n",
|
|
"1095680 NaN NaN \n",
|
|
"\n",
|
|
" denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
|
|
"0 NaN 10.81Z \n",
|
|
"1 NaN 70.10Z \n",
|
|
"2 NaN 17.21A \n",
|
|
"3 NaN 17.21A \n",
|
|
"4 NaN 20.30Z \n",
|
|
"... ... ... \n",
|
|
"1095676 NaN 49.41A \n",
|
|
"1095677 NaN 64.19Z \n",
|
|
"1095678 NaN 55.10Z \n",
|
|
"1095679 NaN 66.30Z \n",
|
|
"1095680 NaN 27.12Z \n",
|
|
"\n",
|
|
" nomenclatureActivitePrincipaleEtablissement \\\n",
|
|
"0 NAFRev2 \n",
|
|
"1 NAFRev2 \n",
|
|
"2 NAFRev2 \n",
|
|
"3 NAFRev2 \n",
|
|
"4 NAFRev2 \n",
|
|
"... ... \n",
|
|
"1095676 NAFRev2 \n",
|
|
"1095677 NAFRev2 \n",
|
|
"1095678 NAFRev2 \n",
|
|
"1095679 NAFRev2 \n",
|
|
"1095680 NAFRev2 \n",
|
|
"\n",
|
|
" caractereEmployeurEtablissement \n",
|
|
"0 O \n",
|
|
"1 O \n",
|
|
"2 O \n",
|
|
"3 O \n",
|
|
"4 O \n",
|
|
"... ... \n",
|
|
"1095676 O \n",
|
|
"1095677 O \n",
|
|
"1095678 O \n",
|
|
"1095679 O \n",
|
|
"1095680 O \n",
|
|
"\n",
|
|
"[1095681 rows x 19 columns]"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#import du fichier csv\n",
|
|
"data = pd.read_csv('te_siren_admin.csv')\n",
|
|
"data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "0ddc4c99",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>siren</th>\n",
|
|
" <th>nic</th>\n",
|
|
" <th>siret</th>\n",
|
|
" <th>dateCreationEtablissement</th>\n",
|
|
" <th>trancheEffectifsEtablissement</th>\n",
|
|
" <th>anneeEffectifsEtablissement</th>\n",
|
|
" <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
|
|
" <th>dateDernierTraitementEtablissement</th>\n",
|
|
" <th>etablissementSiege</th>\n",
|
|
" <th>nombrePeriodesEtablissement</th>\n",
|
|
" <th>dateDebut</th>\n",
|
|
" <th>etatAdministratifEtablissement</th>\n",
|
|
" <th>enseigne1Etablissement</th>\n",
|
|
" <th>enseigne2Etablissement</th>\n",
|
|
" <th>enseigne3Etablissement</th>\n",
|
|
" <th>denominationUsuelleEtablissement</th>\n",
|
|
" <th>activitePrincipaleEtablissement</th>\n",
|
|
" <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
|
|
" <th>caractereEmployeurEtablissement</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>5420120</td>\n",
|
|
" <td>15</td>\n",
|
|
" <td>542012000015</td>\n",
|
|
" <td>1989-01-27 00:00:00</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2020-08-25 10:10:13</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>10.81Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>5420120</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>542012000031</td>\n",
|
|
" <td>1900-01-01 00:00:00</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-01-01 03:35:01</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>2008-04-23 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>70.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>5520176</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>552017600016</td>\n",
|
|
" <td>1955-01-01 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 21:30:57</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>17.21A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>5520176</td>\n",
|
|
" <td>32</td>\n",
|
|
" <td>552017600032</td>\n",
|
|
" <td>1999-08-30 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2020-08-25 10:10:13</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>17.21A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>5520242</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>552024200016</td>\n",
|
|
" <td>1900-01-01 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 21:30:57</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>20.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>...</th>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" <td>...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095676</th>\n",
|
|
" <td>999990005</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>99999000500038</td>\n",
|
|
" <td>1993-07-01 00:00:00</td>\n",
|
|
" <td>32</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-08-01 20:15:35</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2010-12-15 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>49.41A</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095677</th>\n",
|
|
" <td>999990062</td>\n",
|
|
" <td>39</td>\n",
|
|
" <td>99999006200039</td>\n",
|
|
" <td>2007-11-05 00:00:00</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-02-23 18:21:09</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>64.19Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095678</th>\n",
|
|
" <td>999990286</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>99999028600018</td>\n",
|
|
" <td>1979-11-30 00:00:00</td>\n",
|
|
" <td>22</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-04-04 20:15:10</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2008-01-01 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>55.10Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095679</th>\n",
|
|
" <td>999990369</td>\n",
|
|
" <td>87</td>\n",
|
|
" <td>99999036900087</td>\n",
|
|
" <td>2014-03-31 00:00:00</td>\n",
|
|
" <td>21</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2021-02-23 18:21:09</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2014-03-31 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>66.30Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1095680</th>\n",
|
|
" <td>999990401</td>\n",
|
|
" <td>96</td>\n",
|
|
" <td>99999040100096</td>\n",
|
|
" <td>2009-06-23 00:00:00</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2018.0</td>\n",
|
|
" <td>2712ZZ</td>\n",
|
|
" <td>2021-03-19 03:37:02</td>\n",
|
|
" <td>True</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2009-06-23 00:00:00</td>\n",
|
|
" <td>A</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>27.12Z</td>\n",
|
|
" <td>NAFRev2</td>\n",
|
|
" <td>O</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"<p>965958 rows × 19 columns</p>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" siren nic siret dateCreationEtablissement \\\n",
|
|
"0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
|
|
"1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
|
|
"2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
|
|
"3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
|
|
"4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
|
|
"... ... ... ... ... \n",
|
|
"1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
|
|
"1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
|
|
"1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
|
|
"1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
|
|
"1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
|
|
"\n",
|
|
" trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
|
|
"0 2 2018.0 \n",
|
|
"1 3 2018.0 \n",
|
|
"2 12 2018.0 \n",
|
|
"3 12 2018.0 \n",
|
|
"4 12 2018.0 \n",
|
|
"... ... ... \n",
|
|
"1095676 32 2018.0 \n",
|
|
"1095677 12 2018.0 \n",
|
|
"1095678 22 2018.0 \n",
|
|
"1095679 21 2018.0 \n",
|
|
"1095680 3 2018.0 \n",
|
|
"\n",
|
|
" activitePrincipaleRegistreMetiersEtablissement \\\n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"... ... \n",
|
|
"1095676 NaN \n",
|
|
"1095677 NaN \n",
|
|
"1095678 NaN \n",
|
|
"1095679 NaN \n",
|
|
"1095680 2712ZZ \n",
|
|
"\n",
|
|
" dateDernierTraitementEtablissement etablissementSiege \\\n",
|
|
"0 2020-08-25 10:10:13 False \n",
|
|
"1 2021-01-01 03:35:01 True \n",
|
|
"2 2021-08-01 21:30:57 True \n",
|
|
"3 2020-08-25 10:10:13 False \n",
|
|
"4 2021-08-01 21:30:57 True \n",
|
|
"... ... ... \n",
|
|
"1095676 2021-08-01 20:15:35 True \n",
|
|
"1095677 2021-02-23 18:21:09 True \n",
|
|
"1095678 2021-04-04 20:15:10 True \n",
|
|
"1095679 2021-02-23 18:21:09 True \n",
|
|
"1095680 2021-03-19 03:37:02 True \n",
|
|
"\n",
|
|
" nombrePeriodesEtablissement dateDebut \\\n",
|
|
"0 4 2008-01-01 00:00:00 \n",
|
|
"1 6 2008-04-23 00:00:00 \n",
|
|
"2 4 2008-01-01 00:00:00 \n",
|
|
"3 4 2008-01-01 00:00:00 \n",
|
|
"4 4 2008-01-01 00:00:00 \n",
|
|
"... ... ... \n",
|
|
"1095676 5 2010-12-15 00:00:00 \n",
|
|
"1095677 2 2008-01-01 00:00:00 \n",
|
|
"1095678 4 2008-01-01 00:00:00 \n",
|
|
"1095679 1 2014-03-31 00:00:00 \n",
|
|
"1095680 1 2009-06-23 00:00:00 \n",
|
|
"\n",
|
|
" etatAdministratifEtablissement enseigne1Etablissement \\\n",
|
|
"0 A NaN \n",
|
|
"1 A NaN \n",
|
|
"2 A NaN \n",
|
|
"3 A NaN \n",
|
|
"4 A NaN \n",
|
|
"... ... ... \n",
|
|
"1095676 A NaN \n",
|
|
"1095677 A NaN \n",
|
|
"1095678 A NaN \n",
|
|
"1095679 A NaN \n",
|
|
"1095680 A NaN \n",
|
|
"\n",
|
|
" enseigne2Etablissement enseigne3Etablissement \\\n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 NaN NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"... ... ... \n",
|
|
"1095676 NaN NaN \n",
|
|
"1095677 NaN NaN \n",
|
|
"1095678 NaN NaN \n",
|
|
"1095679 NaN NaN \n",
|
|
"1095680 NaN NaN \n",
|
|
"\n",
|
|
" denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
|
|
"0 NaN 10.81Z \n",
|
|
"1 NaN 70.10Z \n",
|
|
"2 NaN 17.21A \n",
|
|
"3 NaN 17.21A \n",
|
|
"4 NaN 20.30Z \n",
|
|
"... ... ... \n",
|
|
"1095676 NaN 49.41A \n",
|
|
"1095677 NaN 64.19Z \n",
|
|
"1095678 NaN 55.10Z \n",
|
|
"1095679 NaN 66.30Z \n",
|
|
"1095680 NaN 27.12Z \n",
|
|
"\n",
|
|
" nomenclatureActivitePrincipaleEtablissement \\\n",
|
|
"0 NAFRev2 \n",
|
|
"1 NAFRev2 \n",
|
|
"2 NAFRev2 \n",
|
|
"3 NAFRev2 \n",
|
|
"4 NAFRev2 \n",
|
|
"... ... \n",
|
|
"1095676 NAFRev2 \n",
|
|
"1095677 NAFRev2 \n",
|
|
"1095678 NAFRev2 \n",
|
|
"1095679 NAFRev2 \n",
|
|
"1095680 NAFRev2 \n",
|
|
"\n",
|
|
" caractereEmployeurEtablissement \n",
|
|
"0 O \n",
|
|
"1 O \n",
|
|
"2 O \n",
|
|
"3 O \n",
|
|
"4 O \n",
|
|
"... ... \n",
|
|
"1095676 O \n",
|
|
"1095677 O \n",
|
|
"1095678 O \n",
|
|
"1095679 O \n",
|
|
"1095680 O \n",
|
|
"\n",
|
|
"[965958 rows x 19 columns]"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#filtrer sur les etatAdministratifEtablissement Actif\n",
|
|
"data=data.loc[data.etatAdministratifEtablissement=='A']\n",
|
|
"data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "ca5a157e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"DataFrame is written to Excel File successfully.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"#export du fichier en .xlsx\n",
|
|
"file_name = 'SirenAdminNet.xlsx'\n",
|
|
"data.to_excel(file_name)\n",
|
|
"print('DataFrame is written to Excel File successfully.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d6213677",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|