{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "c425d5f5", "metadata": {}, "outputs": [], "source": [ "#import de la bibliothèque pandas\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "id": "a232e20f", "metadata": {}, "outputs": [], "source": [ "#affichage que de 10 lignes\n", "pd.options.display.max_rows = 10" ] }, { "cell_type": "code", "execution_count": 5, "id": "1c8fff82", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sirennicsiretstatutDiffusionEtablissementdateCreationEtablissementtrancheEffectifsEtablissementanneeEffectifsEtablissementactivitePrincipaleRegistreMetiersEtablissementdateDernierTraitementEtablissementetablissementSiege...codePaysEtranger2EtablissementlibellePaysEtranger2EtablissementetatAdministratifEtablissementenseigne1Etablissementenseigne2Etablissementenseigne3EtablissementdenominationUsuelleEtablissementactivitePrincipaleEtablissementnomenclatureActivitePrincipaleEtablissementcaractereEmployeurEtablissement
0562019065562019000065O2016-01-01212020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN49.39ANAFRev2O
1568014517568014500017O1956-01-01212020.0NaN2022-10-04T04:12:29True...NaNNaNANaNNaNNaNNaN22.22ZNAFRev2O
2572016428572016400028O1983-09-28222020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN86.10ZNAFRev2O
3572078431572078400031O1993-04-01222020.0NaN2022-08-29T09:00:21False...NaNNaNANaNNaNNaNNaN25.72ZNAFRev2O
4578096026578096000026O1981-12-28212020.0NaN2022-08-29T09:00:21False...NaNNaNAHOTEL ROYAL THALASSONaNNaNNaN55.10ZNAFRev2O
..................................................................
8244299889300214299889300200142O2013-05-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.39BNAFRev2O
824439999900053899999000500038O1993-07-01222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
8244499999000549199999000500491O2017-04-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
824459999902861899999028600018O1979-11-30222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN55.10ZNAFRev2O
824469999903698799999036900087O2014-03-31212020.0NaN2022-10-02T03:38:31True...NaNNaNANaNNaNNaNNaN66.30ZNAFRev2O
\n", "

82447 rows × 77 columns

\n", "
" ], "text/plain": [ " siren nic siret statutDiffusionEtablissement \\\n", "0 5620190 65 562019000065 O \n", "1 5680145 17 568014500017 O \n", "2 5720164 28 572016400028 O \n", "3 5720784 31 572078400031 O \n", "4 5780960 26 578096000026 O \n", "... ... ... ... ... \n", "82442 998893002 142 99889300200142 O \n", "82443 999990005 38 99999000500038 O \n", "82444 999990005 491 99999000500491 O \n", "82445 999990286 18 99999028600018 O \n", "82446 999990369 87 99999036900087 O \n", "\n", " dateCreationEtablissement trancheEffectifsEtablissement \\\n", "0 2016-01-01 21 \n", "1 1956-01-01 21 \n", "2 1983-09-28 22 \n", "3 1993-04-01 22 \n", "4 1981-12-28 21 \n", "... ... ... \n", "82442 2013-05-01 21 \n", "82443 1993-07-01 22 \n", "82444 2017-04-01 21 \n", "82445 1979-11-30 22 \n", "82446 2014-03-31 21 \n", "\n", " anneeEffectifsEtablissement \\\n", "0 2020.0 \n", "1 2020.0 \n", "2 2020.0 \n", "3 2020.0 \n", "4 2020.0 \n", "... ... \n", "82442 2020.0 \n", "82443 2020.0 \n", "82444 2020.0 \n", "82445 2020.0 \n", "82446 2020.0 \n", "\n", " activitePrincipaleRegistreMetiersEtablissement \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "82442 NaN \n", "82443 NaN \n", "82444 NaN \n", "82445 NaN \n", "82446 NaN \n", "\n", " dateDernierTraitementEtablissement etablissementSiege ... \\\n", "0 2022-08-29T09:00:21 True ... \n", "1 2022-10-04T04:12:29 True ... \n", "2 2022-08-29T09:00:21 True ... \n", "3 2022-08-29T09:00:21 False ... \n", "4 2022-08-29T09:00:21 False ... \n", "... ... ... ... \n", "82442 2022-08-29T10:50:43 False ... \n", "82443 2022-08-29T10:50:43 True ... \n", "82444 2022-08-29T10:50:43 False ... \n", "82445 2022-08-29T10:50:43 True ... \n", "82446 2022-10-02T03:38:31 True ... \n", "\n", " codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " etatAdministratifEtablissement enseigne1Etablissement \\\n", "0 A NaN \n", "1 A NaN \n", "2 A NaN \n", "3 A NaN \n", "4 A HOTEL ROYAL THALASSO \n", "... ... ... \n", "82442 A NaN \n", "82443 A NaN \n", "82444 A NaN \n", "82445 A NaN \n", "82446 A NaN \n", "\n", " enseigne2Etablissement enseigne3Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n", "0 NaN 49.39A \n", "1 NaN 22.22Z \n", "2 NaN 86.10Z \n", "3 NaN 25.72Z \n", "4 NaN 55.10Z \n", "... ... ... \n", "82442 NaN 49.39B \n", "82443 NaN 49.41A \n", "82444 NaN 49.41A \n", "82445 NaN 55.10Z \n", "82446 NaN 66.30Z \n", "\n", " nomenclatureActivitePrincipaleEtablissement \\\n", "0 NAFRev2 \n", "1 NAFRev2 \n", "2 NAFRev2 \n", "3 NAFRev2 \n", "4 NAFRev2 \n", "... ... \n", "82442 NAFRev2 \n", "82443 NAFRev2 \n", "82444 NAFRev2 \n", "82445 NAFRev2 \n", "82446 NAFRev2 \n", "\n", " caractereEmployeurEtablissement \n", "0 O \n", "1 O \n", "2 O \n", "3 O \n", "4 O \n", "... ... \n", "82442 O \n", "82443 O \n", "82444 O \n", "82445 O \n", "82446 O \n", "\n", "[82447 rows x 77 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#import du fichier excel\n", "entreprise50 = pd.read_excel('sirenentr.xlsx')\n", "entreprise50" ] }, { "cell_type": "code", "execution_count": 6, "id": "7400d118", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(82447, 77)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Afficher le nombre de lignes et de colonne\n", "entreprise50.shape" ] }, { "cell_type": "code", "execution_count": 7, "id": "b7284e73", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sirennicsiretstatutDiffusionEtablissementdateCreationEtablissementtrancheEffectifsEtablissementanneeEffectifsEtablissementactivitePrincipaleRegistreMetiersEtablissementdateDernierTraitementEtablissementetablissementSiege...codeCedex2EtablissementlibelleCedex2EtablissementetatAdministratifEtablissementenseigne1Etablissementenseigne2Etablissementenseigne3EtablissementdenominationUsuelleEtablissementactivitePrincipaleEtablissementnomenclatureActivitePrincipaleEtablissementcaractereEmployeurEtablissement
0562019065562019000065O2016-01-01212020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN49.39ANAFRev2O
1568014517568014500017O1956-01-01212020.0NaN2022-10-04T04:12:29True...NaNNaNANaNNaNNaNNaN22.22ZNAFRev2O
2572016428572016400028O1983-09-28222020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN86.10ZNAFRev2O
3572078431572078400031O1993-04-01222020.0NaN2022-08-29T09:00:21False...NaNNaNANaNNaNNaNNaN25.72ZNAFRev2O
4578096026578096000026O1981-12-28212020.0NaN2022-08-29T09:00:21False...NaNNaNAHOTEL ROYAL THALASSONaNNaNNaN55.10ZNAFRev2O
..................................................................
8244299889300214299889300200142O2013-05-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.39BNAFRev2O
824439999900053899999000500038O1993-07-01222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
8244499999000549199999000500491O2017-04-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
824459999902861899999028600018O1979-11-30222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN55.10ZNAFRev2O
824469999903698799999036900087O2014-03-31212020.0NaN2022-10-02T03:38:31True...NaNNaNANaNNaNNaNNaN66.30ZNAFRev2O
\n", "

82447 rows × 71 columns

\n", "
" ], "text/plain": [ " siren nic siret statutDiffusionEtablissement \\\n", "0 5620190 65 562019000065 O \n", "1 5680145 17 568014500017 O \n", "2 5720164 28 572016400028 O \n", "3 5720784 31 572078400031 O \n", "4 5780960 26 578096000026 O \n", "... ... ... ... ... \n", "82442 998893002 142 99889300200142 O \n", "82443 999990005 38 99999000500038 O \n", "82444 999990005 491 99999000500491 O \n", "82445 999990286 18 99999028600018 O \n", "82446 999990369 87 99999036900087 O \n", "\n", " dateCreationEtablissement trancheEffectifsEtablissement \\\n", "0 2016-01-01 21 \n", "1 1956-01-01 21 \n", "2 1983-09-28 22 \n", "3 1993-04-01 22 \n", "4 1981-12-28 21 \n", "... ... ... \n", "82442 2013-05-01 21 \n", "82443 1993-07-01 22 \n", "82444 2017-04-01 21 \n", "82445 1979-11-30 22 \n", "82446 2014-03-31 21 \n", "\n", " anneeEffectifsEtablissement \\\n", "0 2020.0 \n", "1 2020.0 \n", "2 2020.0 \n", "3 2020.0 \n", "4 2020.0 \n", "... ... \n", "82442 2020.0 \n", "82443 2020.0 \n", "82444 2020.0 \n", "82445 2020.0 \n", "82446 2020.0 \n", "\n", " activitePrincipaleRegistreMetiersEtablissement \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "82442 NaN \n", "82443 NaN \n", "82444 NaN \n", "82445 NaN \n", "82446 NaN \n", "\n", " dateDernierTraitementEtablissement etablissementSiege ... \\\n", "0 2022-08-29T09:00:21 True ... \n", "1 2022-10-04T04:12:29 True ... \n", "2 2022-08-29T09:00:21 True ... \n", "3 2022-08-29T09:00:21 False ... \n", "4 2022-08-29T09:00:21 False ... \n", "... ... ... ... \n", "82442 2022-08-29T10:50:43 False ... \n", "82443 2022-08-29T10:50:43 True ... \n", "82444 2022-08-29T10:50:43 False ... \n", "82445 2022-08-29T10:50:43 True ... \n", "82446 2022-10-02T03:38:31 True ... \n", "\n", " codeCedex2Etablissement libelleCedex2Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " etatAdministratifEtablissement enseigne1Etablissement \\\n", "0 A NaN \n", "1 A NaN \n", "2 A NaN \n", "3 A NaN \n", "4 A HOTEL ROYAL THALASSO \n", "... ... ... \n", "82442 A NaN \n", "82443 A NaN \n", "82444 A NaN \n", "82445 A NaN \n", "82446 A NaN \n", "\n", " enseigne2Etablissement enseigne3Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n", "0 NaN 49.39A \n", "1 NaN 22.22Z \n", "2 NaN 86.10Z \n", "3 NaN 25.72Z \n", "4 NaN 55.10Z \n", "... ... ... \n", "82442 NaN 49.39B \n", "82443 NaN 49.41A \n", "82444 NaN 49.41A \n", "82445 NaN 55.10Z \n", "82446 NaN 66.30Z \n", "\n", " nomenclatureActivitePrincipaleEtablissement \\\n", "0 NAFRev2 \n", "1 NAFRev2 \n", "2 NAFRev2 \n", "3 NAFRev2 \n", "4 NAFRev2 \n", "... ... \n", "82442 NAFRev2 \n", "82443 NAFRev2 \n", "82444 NAFRev2 \n", "82445 NAFRev2 \n", "82446 NAFRev2 \n", "\n", " caractereEmployeurEtablissement \n", "0 O \n", "1 O \n", "2 O \n", "3 O \n", "4 O \n", "... ... \n", "82442 O \n", "82443 O \n", "82444 O \n", "82445 O \n", "82446 O \n", "\n", "[82447 rows x 71 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#retrait des colonnes inutiles\n", "entreprise50.drop(['unitePurgeeUniteLegale', 'prenom4UniteLegale', 'pseudonymeUniteLegale', 'libelleCommuneEtranger2Etablissement', 'codePaysEtranger2Etablissement', 'libellePaysEtranger2Etablissement'], axis=1)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f7e8e157", "metadata": {}, "outputs": [], "source": [ "#drop les colonnes inutiles\n", "entreprise50.drop([''])" ] }, { "cell_type": "code", "execution_count": 43, "id": "8f4fbab9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", " ... \n", "82442 False\n", "82443 False\n", "82444 False\n", "82445 False\n", "82446 False\n", "Length: 82447, dtype: bool" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#identifier les lignes dupliquées \n", "entreprise50.duplicated()" ] }, { "cell_type": "code", "execution_count": 44, "id": "cde83087", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sirennicsiretstatutDiffusionEtablissementdateCreationEtablissementtrancheEffectifsEtablissementanneeEffectifsEtablissementactivitePrincipaleRegistreMetiersEtablissementdateDernierTraitementEtablissementetablissementSiege...codePaysEtranger2EtablissementlibellePaysEtranger2EtablissementetatAdministratifEtablissementenseigne1Etablissementenseigne2Etablissementenseigne3EtablissementdenominationUsuelleEtablissementactivitePrincipaleEtablissementnomenclatureActivitePrincipaleEtablissementcaractereEmployeurEtablissement
0562019065562019000065O2016-01-01212020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN49.39ANAFRev2O
1568014517568014500017O1956-01-01212020.0NaN2022-10-04T04:12:29True...NaNNaNANaNNaNNaNNaN22.22ZNAFRev2O
2572016428572016400028O1983-09-28222020.0NaN2022-08-29T09:00:21True...NaNNaNANaNNaNNaNNaN86.10ZNAFRev2O
3572078431572078400031O1993-04-01222020.0NaN2022-08-29T09:00:21False...NaNNaNANaNNaNNaNNaN25.72ZNAFRev2O
4578096026578096000026O1981-12-28212020.0NaN2022-08-29T09:00:21False...NaNNaNAHOTEL ROYAL THALASSONaNNaNNaN55.10ZNAFRev2O
..................................................................
8244299889300214299889300200142O2013-05-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.39BNAFRev2O
824439999900053899999000500038O1993-07-01222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
8244499999000549199999000500491O2017-04-01212020.0NaN2022-08-29T10:50:43False...NaNNaNANaNNaNNaNNaN49.41ANAFRev2O
824459999902861899999028600018O1979-11-30222020.0NaN2022-08-29T10:50:43True...NaNNaNANaNNaNNaNNaN55.10ZNAFRev2O
824469999903698799999036900087O2014-03-31212020.0NaN2022-10-02T03:38:31True...NaNNaNANaNNaNNaNNaN66.30ZNAFRev2O
\n", "

82447 rows × 77 columns

\n", "
" ], "text/plain": [ " siren nic siret statutDiffusionEtablissement \\\n", "0 5620190 65 562019000065 O \n", "1 5680145 17 568014500017 O \n", "2 5720164 28 572016400028 O \n", "3 5720784 31 572078400031 O \n", "4 5780960 26 578096000026 O \n", "... ... ... ... ... \n", "82442 998893002 142 99889300200142 O \n", "82443 999990005 38 99999000500038 O \n", "82444 999990005 491 99999000500491 O \n", "82445 999990286 18 99999028600018 O \n", "82446 999990369 87 99999036900087 O \n", "\n", " dateCreationEtablissement trancheEffectifsEtablissement \\\n", "0 2016-01-01 21 \n", "1 1956-01-01 21 \n", "2 1983-09-28 22 \n", "3 1993-04-01 22 \n", "4 1981-12-28 21 \n", "... ... ... \n", "82442 2013-05-01 21 \n", "82443 1993-07-01 22 \n", "82444 2017-04-01 21 \n", "82445 1979-11-30 22 \n", "82446 2014-03-31 21 \n", "\n", " anneeEffectifsEtablissement \\\n", "0 2020.0 \n", "1 2020.0 \n", "2 2020.0 \n", "3 2020.0 \n", "4 2020.0 \n", "... ... \n", "82442 2020.0 \n", "82443 2020.0 \n", "82444 2020.0 \n", "82445 2020.0 \n", "82446 2020.0 \n", "\n", " activitePrincipaleRegistreMetiersEtablissement \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "82442 NaN \n", "82443 NaN \n", "82444 NaN \n", "82445 NaN \n", "82446 NaN \n", "\n", " dateDernierTraitementEtablissement etablissementSiege ... \\\n", "0 2022-08-29T09:00:21 True ... \n", "1 2022-10-04T04:12:29 True ... \n", "2 2022-08-29T09:00:21 True ... \n", "3 2022-08-29T09:00:21 False ... \n", "4 2022-08-29T09:00:21 False ... \n", "... ... ... ... \n", "82442 2022-08-29T10:50:43 False ... \n", "82443 2022-08-29T10:50:43 True ... \n", "82444 2022-08-29T10:50:43 False ... \n", "82445 2022-08-29T10:50:43 True ... \n", "82446 2022-10-02T03:38:31 True ... \n", "\n", " codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " etatAdministratifEtablissement enseigne1Etablissement \\\n", "0 A NaN \n", "1 A NaN \n", "2 A NaN \n", "3 A NaN \n", "4 A HOTEL ROYAL THALASSO \n", "... ... ... \n", "82442 A NaN \n", "82443 A NaN \n", "82444 A NaN \n", "82445 A NaN \n", "82446 A NaN \n", "\n", " enseigne2Etablissement enseigne3Etablissement \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "... ... ... \n", "82442 NaN NaN \n", "82443 NaN NaN \n", "82444 NaN NaN \n", "82445 NaN NaN \n", "82446 NaN NaN \n", "\n", " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n", "0 NaN 49.39A \n", "1 NaN 22.22Z \n", "2 NaN 86.10Z \n", "3 NaN 25.72Z \n", "4 NaN 55.10Z \n", "... ... ... \n", "82442 NaN 49.39B \n", "82443 NaN 49.41A \n", "82444 NaN 49.41A \n", "82445 NaN 55.10Z \n", "82446 NaN 66.30Z \n", "\n", " nomenclatureActivitePrincipaleEtablissement \\\n", "0 NAFRev2 \n", "1 NAFRev2 \n", "2 NAFRev2 \n", "3 NAFRev2 \n", "4 NAFRev2 \n", "... ... \n", "82442 NAFRev2 \n", "82443 NAFRev2 \n", "82444 NAFRev2 \n", "82445 NAFRev2 \n", "82446 NAFRev2 \n", "\n", " caractereEmployeurEtablissement \n", "0 O \n", "1 O \n", "2 O \n", "3 O \n", "4 O \n", "... ... \n", "82442 O \n", "82443 O \n", "82444 O \n", "82445 O \n", "82446 O \n", "\n", "[82447 rows x 77 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#supprimer les lignes dupliquées\n", "entreprise50.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": null, "id": "8cdf8558", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 45, "id": "22e04dc2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataFrame is written to Excel File successfully.\n" ] } ], "source": [ "#Télécharger le fichier nettoyé en format excel\n", "file_name = 'Sirenplus.xlsx'\n", "entreprise50.to_excel(file_name)\n", "print('DataFrame is written to Excel File successfully.')" ] }, { "cell_type": "code", "execution_count": null, "id": "6253e566", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }