You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

955 lines
39 KiB

2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "id": "cf14db47",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": [
  10. "#import de pandas\n",
  11. "import pandas as pd"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 2,
  17. "id": "78c7afbb",
  18. "metadata": {},
  19. "outputs": [
  20. {
  21. "name": "stderr",
  22. "output_type": "stream",
  23. "text": [
  24. "/Users/angecharbelledurand/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (6,14,15) have mixed types.Specify dtype option on import or set low_memory=False.\n",
  25. " exec(code_obj, self.user_global_ns, self.user_ns)\n"
  26. ]
  27. },
  28. {
  29. "data": {
  30. "text/html": [
  31. "<div>\n",
  32. "<style scoped>\n",
  33. " .dataframe tbody tr th:only-of-type {\n",
  34. " vertical-align: middle;\n",
  35. " }\n",
  36. "\n",
  37. " .dataframe tbody tr th {\n",
  38. " vertical-align: top;\n",
  39. " }\n",
  40. "\n",
  41. " .dataframe thead th {\n",
  42. " text-align: right;\n",
  43. " }\n",
  44. "</style>\n",
  45. "<table border=\"1\" class=\"dataframe\">\n",
  46. " <thead>\n",
  47. " <tr style=\"text-align: right;\">\n",
  48. " <th></th>\n",
  49. " <th>siren</th>\n",
  50. " <th>nic</th>\n",
  51. " <th>siret</th>\n",
  52. " <th>dateCreationEtablissement</th>\n",
  53. " <th>trancheEffectifsEtablissement</th>\n",
  54. " <th>anneeEffectifsEtablissement</th>\n",
  55. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  56. " <th>dateDernierTraitementEtablissement</th>\n",
  57. " <th>etablissementSiege</th>\n",
  58. " <th>nombrePeriodesEtablissement</th>\n",
  59. " <th>dateDebut</th>\n",
  60. " <th>etatAdministratifEtablissement</th>\n",
  61. " <th>enseigne1Etablissement</th>\n",
  62. " <th>enseigne2Etablissement</th>\n",
  63. " <th>enseigne3Etablissement</th>\n",
  64. " <th>denominationUsuelleEtablissement</th>\n",
  65. " <th>activitePrincipaleEtablissement</th>\n",
  66. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  67. " <th>caractereEmployeurEtablissement</th>\n",
  68. " </tr>\n",
  69. " </thead>\n",
  70. " <tbody>\n",
  71. " <tr>\n",
  72. " <th>0</th>\n",
  73. " <td>5420120</td>\n",
  74. " <td>15</td>\n",
  75. " <td>542012000015</td>\n",
  76. " <td>1989-01-27 00:00:00</td>\n",
  77. " <td>2</td>\n",
  78. " <td>2018.0</td>\n",
  79. " <td>NaN</td>\n",
  80. " <td>2020-08-25 10:10:13</td>\n",
  81. " <td>False</td>\n",
  82. " <td>4</td>\n",
  83. " <td>2008-01-01 00:00:00</td>\n",
  84. " <td>A</td>\n",
  85. " <td>NaN</td>\n",
  86. " <td>NaN</td>\n",
  87. " <td>NaN</td>\n",
  88. " <td>NaN</td>\n",
  89. " <td>10.81Z</td>\n",
  90. " <td>NAFRev2</td>\n",
  91. " <td>O</td>\n",
  92. " </tr>\n",
  93. " <tr>\n",
  94. " <th>1</th>\n",
  95. " <td>5420120</td>\n",
  96. " <td>31</td>\n",
  97. " <td>542012000031</td>\n",
  98. " <td>1900-01-01 00:00:00</td>\n",
  99. " <td>3</td>\n",
  100. " <td>2018.0</td>\n",
  101. " <td>NaN</td>\n",
  102. " <td>2021-01-01 03:35:01</td>\n",
  103. " <td>True</td>\n",
  104. " <td>6</td>\n",
  105. " <td>2008-04-23 00:00:00</td>\n",
  106. " <td>A</td>\n",
  107. " <td>NaN</td>\n",
  108. " <td>NaN</td>\n",
  109. " <td>NaN</td>\n",
  110. " <td>NaN</td>\n",
  111. " <td>70.10Z</td>\n",
  112. " <td>NAFRev2</td>\n",
  113. " <td>O</td>\n",
  114. " </tr>\n",
  115. " <tr>\n",
  116. " <th>2</th>\n",
  117. " <td>5520176</td>\n",
  118. " <td>16</td>\n",
  119. " <td>552017600016</td>\n",
  120. " <td>1955-01-01 00:00:00</td>\n",
  121. " <td>12</td>\n",
  122. " <td>2018.0</td>\n",
  123. " <td>NaN</td>\n",
  124. " <td>2021-08-01 21:30:57</td>\n",
  125. " <td>True</td>\n",
  126. " <td>4</td>\n",
  127. " <td>2008-01-01 00:00:00</td>\n",
  128. " <td>A</td>\n",
  129. " <td>NaN</td>\n",
  130. " <td>NaN</td>\n",
  131. " <td>NaN</td>\n",
  132. " <td>NaN</td>\n",
  133. " <td>17.21A</td>\n",
  134. " <td>NAFRev2</td>\n",
  135. " <td>O</td>\n",
  136. " </tr>\n",
  137. " <tr>\n",
  138. " <th>3</th>\n",
  139. " <td>5520176</td>\n",
  140. " <td>32</td>\n",
  141. " <td>552017600032</td>\n",
  142. " <td>1999-08-30 00:00:00</td>\n",
  143. " <td>12</td>\n",
  144. " <td>2018.0</td>\n",
  145. " <td>NaN</td>\n",
  146. " <td>2020-08-25 10:10:13</td>\n",
  147. " <td>False</td>\n",
  148. " <td>4</td>\n",
  149. " <td>2008-01-01 00:00:00</td>\n",
  150. " <td>A</td>\n",
  151. " <td>NaN</td>\n",
  152. " <td>NaN</td>\n",
  153. " <td>NaN</td>\n",
  154. " <td>NaN</td>\n",
  155. " <td>17.21A</td>\n",
  156. " <td>NAFRev2</td>\n",
  157. " <td>O</td>\n",
  158. " </tr>\n",
  159. " <tr>\n",
  160. " <th>4</th>\n",
  161. " <td>5520242</td>\n",
  162. " <td>16</td>\n",
  163. " <td>552024200016</td>\n",
  164. " <td>1900-01-01 00:00:00</td>\n",
  165. " <td>12</td>\n",
  166. " <td>2018.0</td>\n",
  167. " <td>NaN</td>\n",
  168. " <td>2021-08-01 21:30:57</td>\n",
  169. " <td>True</td>\n",
  170. " <td>4</td>\n",
  171. " <td>2008-01-01 00:00:00</td>\n",
  172. " <td>A</td>\n",
  173. " <td>NaN</td>\n",
  174. " <td>NaN</td>\n",
  175. " <td>NaN</td>\n",
  176. " <td>NaN</td>\n",
  177. " <td>20.30Z</td>\n",
  178. " <td>NAFRev2</td>\n",
  179. " <td>O</td>\n",
  180. " </tr>\n",
  181. " <tr>\n",
  182. " <th>...</th>\n",
  183. " <td>...</td>\n",
  184. " <td>...</td>\n",
  185. " <td>...</td>\n",
  186. " <td>...</td>\n",
  187. " <td>...</td>\n",
  188. " <td>...</td>\n",
  189. " <td>...</td>\n",
  190. " <td>...</td>\n",
  191. " <td>...</td>\n",
  192. " <td>...</td>\n",
  193. " <td>...</td>\n",
  194. " <td>...</td>\n",
  195. " <td>...</td>\n",
  196. " <td>...</td>\n",
  197. " <td>...</td>\n",
  198. " <td>...</td>\n",
  199. " <td>...</td>\n",
  200. " <td>...</td>\n",
  201. " <td>...</td>\n",
  202. " </tr>\n",
  203. " <tr>\n",
  204. " <th>1095676</th>\n",
  205. " <td>999990005</td>\n",
  206. " <td>38</td>\n",
  207. " <td>99999000500038</td>\n",
  208. " <td>1993-07-01 00:00:00</td>\n",
  209. " <td>32</td>\n",
  210. " <td>2018.0</td>\n",
  211. " <td>NaN</td>\n",
  212. " <td>2021-08-01 20:15:35</td>\n",
  213. " <td>True</td>\n",
  214. " <td>5</td>\n",
  215. " <td>2010-12-15 00:00:00</td>\n",
  216. " <td>A</td>\n",
  217. " <td>NaN</td>\n",
  218. " <td>NaN</td>\n",
  219. " <td>NaN</td>\n",
  220. " <td>NaN</td>\n",
  221. " <td>49.41A</td>\n",
  222. " <td>NAFRev2</td>\n",
  223. " <td>O</td>\n",
  224. " </tr>\n",
  225. " <tr>\n",
  226. " <th>1095677</th>\n",
  227. " <td>999990062</td>\n",
  228. " <td>39</td>\n",
  229. " <td>99999006200039</td>\n",
  230. " <td>2007-11-05 00:00:00</td>\n",
  231. " <td>12</td>\n",
  232. " <td>2018.0</td>\n",
  233. " <td>NaN</td>\n",
  234. " <td>2021-02-23 18:21:09</td>\n",
  235. " <td>True</td>\n",
  236. " <td>2</td>\n",
  237. " <td>2008-01-01 00:00:00</td>\n",
  238. " <td>A</td>\n",
  239. " <td>NaN</td>\n",
  240. " <td>NaN</td>\n",
  241. " <td>NaN</td>\n",
  242. " <td>NaN</td>\n",
  243. " <td>64.19Z</td>\n",
  244. " <td>NAFRev2</td>\n",
  245. " <td>O</td>\n",
  246. " </tr>\n",
  247. " <tr>\n",
  248. " <th>1095678</th>\n",
  249. " <td>999990286</td>\n",
  250. " <td>18</td>\n",
  251. " <td>99999028600018</td>\n",
  252. " <td>1979-11-30 00:00:00</td>\n",
  253. " <td>22</td>\n",
  254. " <td>2018.0</td>\n",
  255. " <td>NaN</td>\n",
  256. " <td>2021-04-04 20:15:10</td>\n",
  257. " <td>True</td>\n",
  258. " <td>4</td>\n",
  259. " <td>2008-01-01 00:00:00</td>\n",
  260. " <td>A</td>\n",
  261. " <td>NaN</td>\n",
  262. " <td>NaN</td>\n",
  263. " <td>NaN</td>\n",
  264. " <td>NaN</td>\n",
  265. " <td>55.10Z</td>\n",
  266. " <td>NAFRev2</td>\n",
  267. " <td>O</td>\n",
  268. " </tr>\n",
  269. " <tr>\n",
  270. " <th>1095679</th>\n",
  271. " <td>999990369</td>\n",
  272. " <td>87</td>\n",
  273. " <td>99999036900087</td>\n",
  274. " <td>2014-03-31 00:00:00</td>\n",
  275. " <td>21</td>\n",
  276. " <td>2018.0</td>\n",
  277. " <td>NaN</td>\n",
  278. " <td>2021-02-23 18:21:09</td>\n",
  279. " <td>True</td>\n",
  280. " <td>1</td>\n",
  281. " <td>2014-03-31 00:00:00</td>\n",
  282. " <td>A</td>\n",
  283. " <td>NaN</td>\n",
  284. " <td>NaN</td>\n",
  285. " <td>NaN</td>\n",
  286. " <td>NaN</td>\n",
  287. " <td>66.30Z</td>\n",
  288. " <td>NAFRev2</td>\n",
  289. " <td>O</td>\n",
  290. " </tr>\n",
  291. " <tr>\n",
  292. " <th>1095680</th>\n",
  293. " <td>999990401</td>\n",
  294. " <td>96</td>\n",
  295. " <td>99999040100096</td>\n",
  296. " <td>2009-06-23 00:00:00</td>\n",
  297. " <td>3</td>\n",
  298. " <td>2018.0</td>\n",
  299. " <td>2712ZZ</td>\n",
  300. " <td>2021-03-19 03:37:02</td>\n",
  301. " <td>True</td>\n",
  302. " <td>1</td>\n",
  303. " <td>2009-06-23 00:00:00</td>\n",
  304. " <td>A</td>\n",
  305. " <td>NaN</td>\n",
  306. " <td>NaN</td>\n",
  307. " <td>NaN</td>\n",
  308. " <td>NaN</td>\n",
  309. " <td>27.12Z</td>\n",
  310. " <td>NAFRev2</td>\n",
  311. " <td>O</td>\n",
  312. " </tr>\n",
  313. " </tbody>\n",
  314. "</table>\n",
  315. "<p>1095681 rows × 19 columns</p>\n",
  316. "</div>"
  317. ],
  318. "text/plain": [
  319. " siren nic siret dateCreationEtablissement \\\n",
  320. "0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
  321. "1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
  322. "2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
  323. "3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
  324. "4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
  325. "... ... ... ... ... \n",
  326. "1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
  327. "1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
  328. "1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
  329. "1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
  330. "1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
  331. "\n",
  332. " trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
  333. "0 2 2018.0 \n",
  334. "1 3 2018.0 \n",
  335. "2 12 2018.0 \n",
  336. "3 12 2018.0 \n",
  337. "4 12 2018.0 \n",
  338. "... ... ... \n",
  339. "1095676 32 2018.0 \n",
  340. "1095677 12 2018.0 \n",
  341. "1095678 22 2018.0 \n",
  342. "1095679 21 2018.0 \n",
  343. "1095680 3 2018.0 \n",
  344. "\n",
  345. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  346. "0 NaN \n",
  347. "1 NaN \n",
  348. "2 NaN \n",
  349. "3 NaN \n",
  350. "4 NaN \n",
  351. "... ... \n",
  352. "1095676 NaN \n",
  353. "1095677 NaN \n",
  354. "1095678 NaN \n",
  355. "1095679 NaN \n",
  356. "1095680 2712ZZ \n",
  357. "\n",
  358. " dateDernierTraitementEtablissement etablissementSiege \\\n",
  359. "0 2020-08-25 10:10:13 False \n",
  360. "1 2021-01-01 03:35:01 True \n",
  361. "2 2021-08-01 21:30:57 True \n",
  362. "3 2020-08-25 10:10:13 False \n",
  363. "4 2021-08-01 21:30:57 True \n",
  364. "... ... ... \n",
  365. "1095676 2021-08-01 20:15:35 True \n",
  366. "1095677 2021-02-23 18:21:09 True \n",
  367. "1095678 2021-04-04 20:15:10 True \n",
  368. "1095679 2021-02-23 18:21:09 True \n",
  369. "1095680 2021-03-19 03:37:02 True \n",
  370. "\n",
  371. " nombrePeriodesEtablissement dateDebut \\\n",
  372. "0 4 2008-01-01 00:00:00 \n",
  373. "1 6 2008-04-23 00:00:00 \n",
  374. "2 4 2008-01-01 00:00:00 \n",
  375. "3 4 2008-01-01 00:00:00 \n",
  376. "4 4 2008-01-01 00:00:00 \n",
  377. "... ... ... \n",
  378. "1095676 5 2010-12-15 00:00:00 \n",
  379. "1095677 2 2008-01-01 00:00:00 \n",
  380. "1095678 4 2008-01-01 00:00:00 \n",
  381. "1095679 1 2014-03-31 00:00:00 \n",
  382. "1095680 1 2009-06-23 00:00:00 \n",
  383. "\n",
  384. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  385. "0 A NaN \n",
  386. "1 A NaN \n",
  387. "2 A NaN \n",
  388. "3 A NaN \n",
  389. "4 A NaN \n",
  390. "... ... ... \n",
  391. "1095676 A NaN \n",
  392. "1095677 A NaN \n",
  393. "1095678 A NaN \n",
  394. "1095679 A NaN \n",
  395. "1095680 A NaN \n",
  396. "\n",
  397. " enseigne2Etablissement enseigne3Etablissement \\\n",
  398. "0 NaN NaN \n",
  399. "1 NaN NaN \n",
  400. "2 NaN NaN \n",
  401. "3 NaN NaN \n",
  402. "4 NaN NaN \n",
  403. "... ... ... \n",
  404. "1095676 NaN NaN \n",
  405. "1095677 NaN NaN \n",
  406. "1095678 NaN NaN \n",
  407. "1095679 NaN NaN \n",
  408. "1095680 NaN NaN \n",
  409. "\n",
  410. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  411. "0 NaN 10.81Z \n",
  412. "1 NaN 70.10Z \n",
  413. "2 NaN 17.21A \n",
  414. "3 NaN 17.21A \n",
  415. "4 NaN 20.30Z \n",
  416. "... ... ... \n",
  417. "1095676 NaN 49.41A \n",
  418. "1095677 NaN 64.19Z \n",
  419. "1095678 NaN 55.10Z \n",
  420. "1095679 NaN 66.30Z \n",
  421. "1095680 NaN 27.12Z \n",
  422. "\n",
  423. " nomenclatureActivitePrincipaleEtablissement \\\n",
  424. "0 NAFRev2 \n",
  425. "1 NAFRev2 \n",
  426. "2 NAFRev2 \n",
  427. "3 NAFRev2 \n",
  428. "4 NAFRev2 \n",
  429. "... ... \n",
  430. "1095676 NAFRev2 \n",
  431. "1095677 NAFRev2 \n",
  432. "1095678 NAFRev2 \n",
  433. "1095679 NAFRev2 \n",
  434. "1095680 NAFRev2 \n",
  435. "\n",
  436. " caractereEmployeurEtablissement \n",
  437. "0 O \n",
  438. "1 O \n",
  439. "2 O \n",
  440. "3 O \n",
  441. "4 O \n",
  442. "... ... \n",
  443. "1095676 O \n",
  444. "1095677 O \n",
  445. "1095678 O \n",
  446. "1095679 O \n",
  447. "1095680 O \n",
  448. "\n",
  449. "[1095681 rows x 19 columns]"
  450. ]
  451. },
  452. "execution_count": 2,
  453. "metadata": {},
  454. "output_type": "execute_result"
  455. }
  456. ],
  457. "source": [
  458. "#import du fichier csv\n",
  459. "data = pd.read_csv('te_siren_admin.csv')\n",
  460. "data"
  461. ]
  462. },
  463. {
  464. "cell_type": "code",
  465. "execution_count": 3,
  466. "id": "0ddc4c99",
  467. "metadata": {},
  468. "outputs": [
  469. {
  470. "data": {
  471. "text/html": [
  472. "<div>\n",
  473. "<style scoped>\n",
  474. " .dataframe tbody tr th:only-of-type {\n",
  475. " vertical-align: middle;\n",
  476. " }\n",
  477. "\n",
  478. " .dataframe tbody tr th {\n",
  479. " vertical-align: top;\n",
  480. " }\n",
  481. "\n",
  482. " .dataframe thead th {\n",
  483. " text-align: right;\n",
  484. " }\n",
  485. "</style>\n",
  486. "<table border=\"1\" class=\"dataframe\">\n",
  487. " <thead>\n",
  488. " <tr style=\"text-align: right;\">\n",
  489. " <th></th>\n",
  490. " <th>siren</th>\n",
  491. " <th>nic</th>\n",
  492. " <th>siret</th>\n",
  493. " <th>dateCreationEtablissement</th>\n",
  494. " <th>trancheEffectifsEtablissement</th>\n",
  495. " <th>anneeEffectifsEtablissement</th>\n",
  496. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  497. " <th>dateDernierTraitementEtablissement</th>\n",
  498. " <th>etablissementSiege</th>\n",
  499. " <th>nombrePeriodesEtablissement</th>\n",
  500. " <th>dateDebut</th>\n",
  501. " <th>etatAdministratifEtablissement</th>\n",
  502. " <th>enseigne1Etablissement</th>\n",
  503. " <th>enseigne2Etablissement</th>\n",
  504. " <th>enseigne3Etablissement</th>\n",
  505. " <th>denominationUsuelleEtablissement</th>\n",
  506. " <th>activitePrincipaleEtablissement</th>\n",
  507. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  508. " <th>caractereEmployeurEtablissement</th>\n",
  509. " </tr>\n",
  510. " </thead>\n",
  511. " <tbody>\n",
  512. " <tr>\n",
  513. " <th>0</th>\n",
  514. " <td>5420120</td>\n",
  515. " <td>15</td>\n",
  516. " <td>542012000015</td>\n",
  517. " <td>1989-01-27 00:00:00</td>\n",
  518. " <td>2</td>\n",
  519. " <td>2018.0</td>\n",
  520. " <td>NaN</td>\n",
  521. " <td>2020-08-25 10:10:13</td>\n",
  522. " <td>False</td>\n",
  523. " <td>4</td>\n",
  524. " <td>2008-01-01 00:00:00</td>\n",
  525. " <td>A</td>\n",
  526. " <td>NaN</td>\n",
  527. " <td>NaN</td>\n",
  528. " <td>NaN</td>\n",
  529. " <td>NaN</td>\n",
  530. " <td>10.81Z</td>\n",
  531. " <td>NAFRev2</td>\n",
  532. " <td>O</td>\n",
  533. " </tr>\n",
  534. " <tr>\n",
  535. " <th>1</th>\n",
  536. " <td>5420120</td>\n",
  537. " <td>31</td>\n",
  538. " <td>542012000031</td>\n",
  539. " <td>1900-01-01 00:00:00</td>\n",
  540. " <td>3</td>\n",
  541. " <td>2018.0</td>\n",
  542. " <td>NaN</td>\n",
  543. " <td>2021-01-01 03:35:01</td>\n",
  544. " <td>True</td>\n",
  545. " <td>6</td>\n",
  546. " <td>2008-04-23 00:00:00</td>\n",
  547. " <td>A</td>\n",
  548. " <td>NaN</td>\n",
  549. " <td>NaN</td>\n",
  550. " <td>NaN</td>\n",
  551. " <td>NaN</td>\n",
  552. " <td>70.10Z</td>\n",
  553. " <td>NAFRev2</td>\n",
  554. " <td>O</td>\n",
  555. " </tr>\n",
  556. " <tr>\n",
  557. " <th>2</th>\n",
  558. " <td>5520176</td>\n",
  559. " <td>16</td>\n",
  560. " <td>552017600016</td>\n",
  561. " <td>1955-01-01 00:00:00</td>\n",
  562. " <td>12</td>\n",
  563. " <td>2018.0</td>\n",
  564. " <td>NaN</td>\n",
  565. " <td>2021-08-01 21:30:57</td>\n",
  566. " <td>True</td>\n",
  567. " <td>4</td>\n",
  568. " <td>2008-01-01 00:00:00</td>\n",
  569. " <td>A</td>\n",
  570. " <td>NaN</td>\n",
  571. " <td>NaN</td>\n",
  572. " <td>NaN</td>\n",
  573. " <td>NaN</td>\n",
  574. " <td>17.21A</td>\n",
  575. " <td>NAFRev2</td>\n",
  576. " <td>O</td>\n",
  577. " </tr>\n",
  578. " <tr>\n",
  579. " <th>3</th>\n",
  580. " <td>5520176</td>\n",
  581. " <td>32</td>\n",
  582. " <td>552017600032</td>\n",
  583. " <td>1999-08-30 00:00:00</td>\n",
  584. " <td>12</td>\n",
  585. " <td>2018.0</td>\n",
  586. " <td>NaN</td>\n",
  587. " <td>2020-08-25 10:10:13</td>\n",
  588. " <td>False</td>\n",
  589. " <td>4</td>\n",
  590. " <td>2008-01-01 00:00:00</td>\n",
  591. " <td>A</td>\n",
  592. " <td>NaN</td>\n",
  593. " <td>NaN</td>\n",
  594. " <td>NaN</td>\n",
  595. " <td>NaN</td>\n",
  596. " <td>17.21A</td>\n",
  597. " <td>NAFRev2</td>\n",
  598. " <td>O</td>\n",
  599. " </tr>\n",
  600. " <tr>\n",
  601. " <th>4</th>\n",
  602. " <td>5520242</td>\n",
  603. " <td>16</td>\n",
  604. " <td>552024200016</td>\n",
  605. " <td>1900-01-01 00:00:00</td>\n",
  606. " <td>12</td>\n",
  607. " <td>2018.0</td>\n",
  608. " <td>NaN</td>\n",
  609. " <td>2021-08-01 21:30:57</td>\n",
  610. " <td>True</td>\n",
  611. " <td>4</td>\n",
  612. " <td>2008-01-01 00:00:00</td>\n",
  613. " <td>A</td>\n",
  614. " <td>NaN</td>\n",
  615. " <td>NaN</td>\n",
  616. " <td>NaN</td>\n",
  617. " <td>NaN</td>\n",
  618. " <td>20.30Z</td>\n",
  619. " <td>NAFRev2</td>\n",
  620. " <td>O</td>\n",
  621. " </tr>\n",
  622. " <tr>\n",
  623. " <th>...</th>\n",
  624. " <td>...</td>\n",
  625. " <td>...</td>\n",
  626. " <td>...</td>\n",
  627. " <td>...</td>\n",
  628. " <td>...</td>\n",
  629. " <td>...</td>\n",
  630. " <td>...</td>\n",
  631. " <td>...</td>\n",
  632. " <td>...</td>\n",
  633. " <td>...</td>\n",
  634. " <td>...</td>\n",
  635. " <td>...</td>\n",
  636. " <td>...</td>\n",
  637. " <td>...</td>\n",
  638. " <td>...</td>\n",
  639. " <td>...</td>\n",
  640. " <td>...</td>\n",
  641. " <td>...</td>\n",
  642. " <td>...</td>\n",
  643. " </tr>\n",
  644. " <tr>\n",
  645. " <th>1095676</th>\n",
  646. " <td>999990005</td>\n",
  647. " <td>38</td>\n",
  648. " <td>99999000500038</td>\n",
  649. " <td>1993-07-01 00:00:00</td>\n",
  650. " <td>32</td>\n",
  651. " <td>2018.0</td>\n",
  652. " <td>NaN</td>\n",
  653. " <td>2021-08-01 20:15:35</td>\n",
  654. " <td>True</td>\n",
  655. " <td>5</td>\n",
  656. " <td>2010-12-15 00:00:00</td>\n",
  657. " <td>A</td>\n",
  658. " <td>NaN</td>\n",
  659. " <td>NaN</td>\n",
  660. " <td>NaN</td>\n",
  661. " <td>NaN</td>\n",
  662. " <td>49.41A</td>\n",
  663. " <td>NAFRev2</td>\n",
  664. " <td>O</td>\n",
  665. " </tr>\n",
  666. " <tr>\n",
  667. " <th>1095677</th>\n",
  668. " <td>999990062</td>\n",
  669. " <td>39</td>\n",
  670. " <td>99999006200039</td>\n",
  671. " <td>2007-11-05 00:00:00</td>\n",
  672. " <td>12</td>\n",
  673. " <td>2018.0</td>\n",
  674. " <td>NaN</td>\n",
  675. " <td>2021-02-23 18:21:09</td>\n",
  676. " <td>True</td>\n",
  677. " <td>2</td>\n",
  678. " <td>2008-01-01 00:00:00</td>\n",
  679. " <td>A</td>\n",
  680. " <td>NaN</td>\n",
  681. " <td>NaN</td>\n",
  682. " <td>NaN</td>\n",
  683. " <td>NaN</td>\n",
  684. " <td>64.19Z</td>\n",
  685. " <td>NAFRev2</td>\n",
  686. " <td>O</td>\n",
  687. " </tr>\n",
  688. " <tr>\n",
  689. " <th>1095678</th>\n",
  690. " <td>999990286</td>\n",
  691. " <td>18</td>\n",
  692. " <td>99999028600018</td>\n",
  693. " <td>1979-11-30 00:00:00</td>\n",
  694. " <td>22</td>\n",
  695. " <td>2018.0</td>\n",
  696. " <td>NaN</td>\n",
  697. " <td>2021-04-04 20:15:10</td>\n",
  698. " <td>True</td>\n",
  699. " <td>4</td>\n",
  700. " <td>2008-01-01 00:00:00</td>\n",
  701. " <td>A</td>\n",
  702. " <td>NaN</td>\n",
  703. " <td>NaN</td>\n",
  704. " <td>NaN</td>\n",
  705. " <td>NaN</td>\n",
  706. " <td>55.10Z</td>\n",
  707. " <td>NAFRev2</td>\n",
  708. " <td>O</td>\n",
  709. " </tr>\n",
  710. " <tr>\n",
  711. " <th>1095679</th>\n",
  712. " <td>999990369</td>\n",
  713. " <td>87</td>\n",
  714. " <td>99999036900087</td>\n",
  715. " <td>2014-03-31 00:00:00</td>\n",
  716. " <td>21</td>\n",
  717. " <td>2018.0</td>\n",
  718. " <td>NaN</td>\n",
  719. " <td>2021-02-23 18:21:09</td>\n",
  720. " <td>True</td>\n",
  721. " <td>1</td>\n",
  722. " <td>2014-03-31 00:00:00</td>\n",
  723. " <td>A</td>\n",
  724. " <td>NaN</td>\n",
  725. " <td>NaN</td>\n",
  726. " <td>NaN</td>\n",
  727. " <td>NaN</td>\n",
  728. " <td>66.30Z</td>\n",
  729. " <td>NAFRev2</td>\n",
  730. " <td>O</td>\n",
  731. " </tr>\n",
  732. " <tr>\n",
  733. " <th>1095680</th>\n",
  734. " <td>999990401</td>\n",
  735. " <td>96</td>\n",
  736. " <td>99999040100096</td>\n",
  737. " <td>2009-06-23 00:00:00</td>\n",
  738. " <td>3</td>\n",
  739. " <td>2018.0</td>\n",
  740. " <td>2712ZZ</td>\n",
  741. " <td>2021-03-19 03:37:02</td>\n",
  742. " <td>True</td>\n",
  743. " <td>1</td>\n",
  744. " <td>2009-06-23 00:00:00</td>\n",
  745. " <td>A</td>\n",
  746. " <td>NaN</td>\n",
  747. " <td>NaN</td>\n",
  748. " <td>NaN</td>\n",
  749. " <td>NaN</td>\n",
  750. " <td>27.12Z</td>\n",
  751. " <td>NAFRev2</td>\n",
  752. " <td>O</td>\n",
  753. " </tr>\n",
  754. " </tbody>\n",
  755. "</table>\n",
  756. "<p>965958 rows × 19 columns</p>\n",
  757. "</div>"
  758. ],
  759. "text/plain": [
  760. " siren nic siret dateCreationEtablissement \\\n",
  761. "0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
  762. "1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
  763. "2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
  764. "3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
  765. "4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
  766. "... ... ... ... ... \n",
  767. "1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
  768. "1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
  769. "1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
  770. "1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
  771. "1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
  772. "\n",
  773. " trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
  774. "0 2 2018.0 \n",
  775. "1 3 2018.0 \n",
  776. "2 12 2018.0 \n",
  777. "3 12 2018.0 \n",
  778. "4 12 2018.0 \n",
  779. "... ... ... \n",
  780. "1095676 32 2018.0 \n",
  781. "1095677 12 2018.0 \n",
  782. "1095678 22 2018.0 \n",
  783. "1095679 21 2018.0 \n",
  784. "1095680 3 2018.0 \n",
  785. "\n",
  786. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  787. "0 NaN \n",
  788. "1 NaN \n",
  789. "2 NaN \n",
  790. "3 NaN \n",
  791. "4 NaN \n",
  792. "... ... \n",
  793. "1095676 NaN \n",
  794. "1095677 NaN \n",
  795. "1095678 NaN \n",
  796. "1095679 NaN \n",
  797. "1095680 2712ZZ \n",
  798. "\n",
  799. " dateDernierTraitementEtablissement etablissementSiege \\\n",
  800. "0 2020-08-25 10:10:13 False \n",
  801. "1 2021-01-01 03:35:01 True \n",
  802. "2 2021-08-01 21:30:57 True \n",
  803. "3 2020-08-25 10:10:13 False \n",
  804. "4 2021-08-01 21:30:57 True \n",
  805. "... ... ... \n",
  806. "1095676 2021-08-01 20:15:35 True \n",
  807. "1095677 2021-02-23 18:21:09 True \n",
  808. "1095678 2021-04-04 20:15:10 True \n",
  809. "1095679 2021-02-23 18:21:09 True \n",
  810. "1095680 2021-03-19 03:37:02 True \n",
  811. "\n",
  812. " nombrePeriodesEtablissement dateDebut \\\n",
  813. "0 4 2008-01-01 00:00:00 \n",
  814. "1 6 2008-04-23 00:00:00 \n",
  815. "2 4 2008-01-01 00:00:00 \n",
  816. "3 4 2008-01-01 00:00:00 \n",
  817. "4 4 2008-01-01 00:00:00 \n",
  818. "... ... ... \n",
  819. "1095676 5 2010-12-15 00:00:00 \n",
  820. "1095677 2 2008-01-01 00:00:00 \n",
  821. "1095678 4 2008-01-01 00:00:00 \n",
  822. "1095679 1 2014-03-31 00:00:00 \n",
  823. "1095680 1 2009-06-23 00:00:00 \n",
  824. "\n",
  825. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  826. "0 A NaN \n",
  827. "1 A NaN \n",
  828. "2 A NaN \n",
  829. "3 A NaN \n",
  830. "4 A NaN \n",
  831. "... ... ... \n",
  832. "1095676 A NaN \n",
  833. "1095677 A NaN \n",
  834. "1095678 A NaN \n",
  835. "1095679 A NaN \n",
  836. "1095680 A NaN \n",
  837. "\n",
  838. " enseigne2Etablissement enseigne3Etablissement \\\n",
  839. "0 NaN NaN \n",
  840. "1 NaN NaN \n",
  841. "2 NaN NaN \n",
  842. "3 NaN NaN \n",
  843. "4 NaN NaN \n",
  844. "... ... ... \n",
  845. "1095676 NaN NaN \n",
  846. "1095677 NaN NaN \n",
  847. "1095678 NaN NaN \n",
  848. "1095679 NaN NaN \n",
  849. "1095680 NaN NaN \n",
  850. "\n",
  851. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  852. "0 NaN 10.81Z \n",
  853. "1 NaN 70.10Z \n",
  854. "2 NaN 17.21A \n",
  855. "3 NaN 17.21A \n",
  856. "4 NaN 20.30Z \n",
  857. "... ... ... \n",
  858. "1095676 NaN 49.41A \n",
  859. "1095677 NaN 64.19Z \n",
  860. "1095678 NaN 55.10Z \n",
  861. "1095679 NaN 66.30Z \n",
  862. "1095680 NaN 27.12Z \n",
  863. "\n",
  864. " nomenclatureActivitePrincipaleEtablissement \\\n",
  865. "0 NAFRev2 \n",
  866. "1 NAFRev2 \n",
  867. "2 NAFRev2 \n",
  868. "3 NAFRev2 \n",
  869. "4 NAFRev2 \n",
  870. "... ... \n",
  871. "1095676 NAFRev2 \n",
  872. "1095677 NAFRev2 \n",
  873. "1095678 NAFRev2 \n",
  874. "1095679 NAFRev2 \n",
  875. "1095680 NAFRev2 \n",
  876. "\n",
  877. " caractereEmployeurEtablissement \n",
  878. "0 O \n",
  879. "1 O \n",
  880. "2 O \n",
  881. "3 O \n",
  882. "4 O \n",
  883. "... ... \n",
  884. "1095676 O \n",
  885. "1095677 O \n",
  886. "1095678 O \n",
  887. "1095679 O \n",
  888. "1095680 O \n",
  889. "\n",
  890. "[965958 rows x 19 columns]"
  891. ]
  892. },
  893. "execution_count": 3,
  894. "metadata": {},
  895. "output_type": "execute_result"
  896. }
  897. ],
  898. "source": [
  899. "#filtrer sur les etatAdministratifEtablissement Actif\n",
  900. "data=data.loc[data.etatAdministratifEtablissement=='A']\n",
  901. "data"
  902. ]
  903. },
  904. {
  905. "cell_type": "code",
  906. "execution_count": 7,
  907. "id": "ca5a157e",
  908. "metadata": {},
  909. "outputs": [
  910. {
  911. "name": "stdout",
  912. "output_type": "stream",
  913. "text": [
  914. "DataFrame is written to Excel File successfully.\n"
  915. ]
  916. }
  917. ],
  918. "source": [
  919. "#export du fichier en .xlsx\n",
  920. "file_name = 'SirenAdminNet.xlsx'\n",
  921. "data.to_excel(file_name)\n",
  922. "print('DataFrame is written to Excel File successfully.')"
  923. ]
  924. },
  925. {
  926. "cell_type": "code",
  927. "execution_count": null,
  928. "id": "d6213677",
  929. "metadata": {},
  930. "outputs": [],
  931. "source": []
  932. }
  933. ],
  934. "metadata": {
  935. "kernelspec": {
  936. "display_name": "Python 3 (ipykernel)",
  937. "language": "python",
  938. "name": "python3"
  939. },
  940. "language_info": {
  941. "codemirror_mode": {
  942. "name": "ipython",
  943. "version": 3
  944. },
  945. "file_extension": ".py",
  946. "mimetype": "text/x-python",
  947. "name": "python",
  948. "nbconvert_exporter": "python",
  949. "pygments_lexer": "ipython3",
  950. "version": "3.9.7"
  951. }
  952. },
  953. "nbformat": 4,
  954. "nbformat_minor": 5
  955. }