You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

979 lines
40 KiB

2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 6,
  6. "id": "33e83475",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": [
  10. "#import de la bibliothèque pandas\n",
  11. "import pandas as pd"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 7,
  17. "id": "f091507f",
  18. "metadata": {},
  19. "outputs": [
  20. {
  21. "name": "stderr",
  22. "output_type": "stream",
  23. "text": [
  24. "/Users/angecharbelledurand/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (7,15,16) have mixed types.Specify dtype option on import or set low_memory=False.\n",
  25. " exec(code_obj, self.user_global_ns, self.user_ns)\n"
  26. ]
  27. },
  28. {
  29. "data": {
  30. "text/html": [
  31. "<div>\n",
  32. "<style scoped>\n",
  33. " .dataframe tbody tr th:only-of-type {\n",
  34. " vertical-align: middle;\n",
  35. " }\n",
  36. "\n",
  37. " .dataframe tbody tr th {\n",
  38. " vertical-align: top;\n",
  39. " }\n",
  40. "\n",
  41. " .dataframe thead th {\n",
  42. " text-align: right;\n",
  43. " }\n",
  44. "</style>\n",
  45. "<table border=\"1\" class=\"dataframe\">\n",
  46. " <thead>\n",
  47. " <tr style=\"text-align: right;\">\n",
  48. " <th></th>\n",
  49. " <th>Unnamed: 0</th>\n",
  50. " <th>siren</th>\n",
  51. " <th>nic</th>\n",
  52. " <th>siret</th>\n",
  53. " <th>dateCreationEtablissement</th>\n",
  54. " <th>trancheEffectifsEtablissement</th>\n",
  55. " <th>anneeEffectifsEtablissement</th>\n",
  56. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  57. " <th>dateDernierTraitementEtablissement</th>\n",
  58. " <th>etablissementSiege</th>\n",
  59. " <th>nombrePeriodesEtablissement</th>\n",
  60. " <th>dateDebut</th>\n",
  61. " <th>etatAdministratifEtablissement</th>\n",
  62. " <th>enseigne1Etablissement</th>\n",
  63. " <th>enseigne2Etablissement</th>\n",
  64. " <th>enseigne3Etablissement</th>\n",
  65. " <th>denominationUsuelleEtablissement</th>\n",
  66. " <th>activitePrincipaleEtablissement</th>\n",
  67. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  68. " <th>caractereEmployeurEtablissement</th>\n",
  69. " </tr>\n",
  70. " </thead>\n",
  71. " <tbody>\n",
  72. " <tr>\n",
  73. " <th>0</th>\n",
  74. " <td>0</td>\n",
  75. " <td>5420120</td>\n",
  76. " <td>15</td>\n",
  77. " <td>542012000015</td>\n",
  78. " <td>1989-01-27 00:00:00</td>\n",
  79. " <td>2</td>\n",
  80. " <td>2018.0</td>\n",
  81. " <td>NaN</td>\n",
  82. " <td>2020-08-25 10:10:13</td>\n",
  83. " <td>False</td>\n",
  84. " <td>4</td>\n",
  85. " <td>2008-01-01 00:00:00</td>\n",
  86. " <td>A</td>\n",
  87. " <td>NaN</td>\n",
  88. " <td>NaN</td>\n",
  89. " <td>NaN</td>\n",
  90. " <td>NaN</td>\n",
  91. " <td>10.81Z</td>\n",
  92. " <td>NAFRev2</td>\n",
  93. " <td>O</td>\n",
  94. " </tr>\n",
  95. " <tr>\n",
  96. " <th>1</th>\n",
  97. " <td>1</td>\n",
  98. " <td>5420120</td>\n",
  99. " <td>31</td>\n",
  100. " <td>542012000031</td>\n",
  101. " <td>1900-01-01 00:00:00</td>\n",
  102. " <td>3</td>\n",
  103. " <td>2018.0</td>\n",
  104. " <td>NaN</td>\n",
  105. " <td>2021-01-01 03:35:01</td>\n",
  106. " <td>True</td>\n",
  107. " <td>6</td>\n",
  108. " <td>2008-04-23 00:00:00</td>\n",
  109. " <td>A</td>\n",
  110. " <td>NaN</td>\n",
  111. " <td>NaN</td>\n",
  112. " <td>NaN</td>\n",
  113. " <td>NaN</td>\n",
  114. " <td>70.10Z</td>\n",
  115. " <td>NAFRev2</td>\n",
  116. " <td>O</td>\n",
  117. " </tr>\n",
  118. " <tr>\n",
  119. " <th>2</th>\n",
  120. " <td>2</td>\n",
  121. " <td>5520176</td>\n",
  122. " <td>16</td>\n",
  123. " <td>552017600016</td>\n",
  124. " <td>1955-01-01 00:00:00</td>\n",
  125. " <td>12</td>\n",
  126. " <td>2018.0</td>\n",
  127. " <td>NaN</td>\n",
  128. " <td>2021-08-01 21:30:57</td>\n",
  129. " <td>True</td>\n",
  130. " <td>4</td>\n",
  131. " <td>2008-01-01 00:00:00</td>\n",
  132. " <td>A</td>\n",
  133. " <td>NaN</td>\n",
  134. " <td>NaN</td>\n",
  135. " <td>NaN</td>\n",
  136. " <td>NaN</td>\n",
  137. " <td>17.21A</td>\n",
  138. " <td>NAFRev2</td>\n",
  139. " <td>O</td>\n",
  140. " </tr>\n",
  141. " <tr>\n",
  142. " <th>3</th>\n",
  143. " <td>3</td>\n",
  144. " <td>5520176</td>\n",
  145. " <td>32</td>\n",
  146. " <td>552017600032</td>\n",
  147. " <td>1999-08-30 00:00:00</td>\n",
  148. " <td>12</td>\n",
  149. " <td>2018.0</td>\n",
  150. " <td>NaN</td>\n",
  151. " <td>2020-08-25 10:10:13</td>\n",
  152. " <td>False</td>\n",
  153. " <td>4</td>\n",
  154. " <td>2008-01-01 00:00:00</td>\n",
  155. " <td>A</td>\n",
  156. " <td>NaN</td>\n",
  157. " <td>NaN</td>\n",
  158. " <td>NaN</td>\n",
  159. " <td>NaN</td>\n",
  160. " <td>17.21A</td>\n",
  161. " <td>NAFRev2</td>\n",
  162. " <td>O</td>\n",
  163. " </tr>\n",
  164. " <tr>\n",
  165. " <th>4</th>\n",
  166. " <td>4</td>\n",
  167. " <td>5520242</td>\n",
  168. " <td>16</td>\n",
  169. " <td>552024200016</td>\n",
  170. " <td>1900-01-01 00:00:00</td>\n",
  171. " <td>12</td>\n",
  172. " <td>2018.0</td>\n",
  173. " <td>NaN</td>\n",
  174. " <td>2021-08-01 21:30:57</td>\n",
  175. " <td>True</td>\n",
  176. " <td>4</td>\n",
  177. " <td>2008-01-01 00:00:00</td>\n",
  178. " <td>A</td>\n",
  179. " <td>NaN</td>\n",
  180. " <td>NaN</td>\n",
  181. " <td>NaN</td>\n",
  182. " <td>NaN</td>\n",
  183. " <td>20.30Z</td>\n",
  184. " <td>NAFRev2</td>\n",
  185. " <td>O</td>\n",
  186. " </tr>\n",
  187. " <tr>\n",
  188. " <th>...</th>\n",
  189. " <td>...</td>\n",
  190. " <td>...</td>\n",
  191. " <td>...</td>\n",
  192. " <td>...</td>\n",
  193. " <td>...</td>\n",
  194. " <td>...</td>\n",
  195. " <td>...</td>\n",
  196. " <td>...</td>\n",
  197. " <td>...</td>\n",
  198. " <td>...</td>\n",
  199. " <td>...</td>\n",
  200. " <td>...</td>\n",
  201. " <td>...</td>\n",
  202. " <td>...</td>\n",
  203. " <td>...</td>\n",
  204. " <td>...</td>\n",
  205. " <td>...</td>\n",
  206. " <td>...</td>\n",
  207. " <td>...</td>\n",
  208. " <td>...</td>\n",
  209. " </tr>\n",
  210. " <tr>\n",
  211. " <th>965953</th>\n",
  212. " <td>1095676</td>\n",
  213. " <td>999990005</td>\n",
  214. " <td>38</td>\n",
  215. " <td>99999000500038</td>\n",
  216. " <td>1993-07-01 00:00:00</td>\n",
  217. " <td>32</td>\n",
  218. " <td>2018.0</td>\n",
  219. " <td>NaN</td>\n",
  220. " <td>2021-08-01 20:15:35</td>\n",
  221. " <td>True</td>\n",
  222. " <td>5</td>\n",
  223. " <td>2010-12-15 00:00:00</td>\n",
  224. " <td>A</td>\n",
  225. " <td>NaN</td>\n",
  226. " <td>NaN</td>\n",
  227. " <td>NaN</td>\n",
  228. " <td>NaN</td>\n",
  229. " <td>49.41A</td>\n",
  230. " <td>NAFRev2</td>\n",
  231. " <td>O</td>\n",
  232. " </tr>\n",
  233. " <tr>\n",
  234. " <th>965954</th>\n",
  235. " <td>1095677</td>\n",
  236. " <td>999990062</td>\n",
  237. " <td>39</td>\n",
  238. " <td>99999006200039</td>\n",
  239. " <td>2007-11-05 00:00:00</td>\n",
  240. " <td>12</td>\n",
  241. " <td>2018.0</td>\n",
  242. " <td>NaN</td>\n",
  243. " <td>2021-02-23 18:21:09</td>\n",
  244. " <td>True</td>\n",
  245. " <td>2</td>\n",
  246. " <td>2008-01-01 00:00:00</td>\n",
  247. " <td>A</td>\n",
  248. " <td>NaN</td>\n",
  249. " <td>NaN</td>\n",
  250. " <td>NaN</td>\n",
  251. " <td>NaN</td>\n",
  252. " <td>64.19Z</td>\n",
  253. " <td>NAFRev2</td>\n",
  254. " <td>O</td>\n",
  255. " </tr>\n",
  256. " <tr>\n",
  257. " <th>965955</th>\n",
  258. " <td>1095678</td>\n",
  259. " <td>999990286</td>\n",
  260. " <td>18</td>\n",
  261. " <td>99999028600018</td>\n",
  262. " <td>1979-11-30 00:00:00</td>\n",
  263. " <td>22</td>\n",
  264. " <td>2018.0</td>\n",
  265. " <td>NaN</td>\n",
  266. " <td>2021-04-04 20:15:10</td>\n",
  267. " <td>True</td>\n",
  268. " <td>4</td>\n",
  269. " <td>2008-01-01 00:00:00</td>\n",
  270. " <td>A</td>\n",
  271. " <td>NaN</td>\n",
  272. " <td>NaN</td>\n",
  273. " <td>NaN</td>\n",
  274. " <td>NaN</td>\n",
  275. " <td>55.10Z</td>\n",
  276. " <td>NAFRev2</td>\n",
  277. " <td>O</td>\n",
  278. " </tr>\n",
  279. " <tr>\n",
  280. " <th>965956</th>\n",
  281. " <td>1095679</td>\n",
  282. " <td>999990369</td>\n",
  283. " <td>87</td>\n",
  284. " <td>99999036900087</td>\n",
  285. " <td>2014-03-31 00:00:00</td>\n",
  286. " <td>21</td>\n",
  287. " <td>2018.0</td>\n",
  288. " <td>NaN</td>\n",
  289. " <td>2021-02-23 18:21:09</td>\n",
  290. " <td>True</td>\n",
  291. " <td>1</td>\n",
  292. " <td>2014-03-31 00:00:00</td>\n",
  293. " <td>A</td>\n",
  294. " <td>NaN</td>\n",
  295. " <td>NaN</td>\n",
  296. " <td>NaN</td>\n",
  297. " <td>NaN</td>\n",
  298. " <td>66.30Z</td>\n",
  299. " <td>NAFRev2</td>\n",
  300. " <td>O</td>\n",
  301. " </tr>\n",
  302. " <tr>\n",
  303. " <th>965957</th>\n",
  304. " <td>1095680</td>\n",
  305. " <td>999990401</td>\n",
  306. " <td>96</td>\n",
  307. " <td>99999040100096</td>\n",
  308. " <td>2009-06-23 00:00:00</td>\n",
  309. " <td>3</td>\n",
  310. " <td>2018.0</td>\n",
  311. " <td>2712ZZ</td>\n",
  312. " <td>2021-03-19 03:37:02</td>\n",
  313. " <td>True</td>\n",
  314. " <td>1</td>\n",
  315. " <td>2009-06-23 00:00:00</td>\n",
  316. " <td>A</td>\n",
  317. " <td>NaN</td>\n",
  318. " <td>NaN</td>\n",
  319. " <td>NaN</td>\n",
  320. " <td>NaN</td>\n",
  321. " <td>27.12Z</td>\n",
  322. " <td>NAFRev2</td>\n",
  323. " <td>O</td>\n",
  324. " </tr>\n",
  325. " </tbody>\n",
  326. "</table>\n",
  327. "<p>965958 rows × 20 columns</p>\n",
  328. "</div>"
  329. ],
  330. "text/plain": [
  331. " Unnamed: 0 siren nic siret dateCreationEtablissement \\\n",
  332. "0 0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
  333. "1 1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
  334. "2 2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
  335. "3 3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
  336. "4 4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
  337. "... ... ... ... ... ... \n",
  338. "965953 1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
  339. "965954 1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
  340. "965955 1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
  341. "965956 1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
  342. "965957 1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
  343. "\n",
  344. " trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
  345. "0 2 2018.0 \n",
  346. "1 3 2018.0 \n",
  347. "2 12 2018.0 \n",
  348. "3 12 2018.0 \n",
  349. "4 12 2018.0 \n",
  350. "... ... ... \n",
  351. "965953 32 2018.0 \n",
  352. "965954 12 2018.0 \n",
  353. "965955 22 2018.0 \n",
  354. "965956 21 2018.0 \n",
  355. "965957 3 2018.0 \n",
  356. "\n",
  357. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  358. "0 NaN \n",
  359. "1 NaN \n",
  360. "2 NaN \n",
  361. "3 NaN \n",
  362. "4 NaN \n",
  363. "... ... \n",
  364. "965953 NaN \n",
  365. "965954 NaN \n",
  366. "965955 NaN \n",
  367. "965956 NaN \n",
  368. "965957 2712ZZ \n",
  369. "\n",
  370. " dateDernierTraitementEtablissement etablissementSiege \\\n",
  371. "0 2020-08-25 10:10:13 False \n",
  372. "1 2021-01-01 03:35:01 True \n",
  373. "2 2021-08-01 21:30:57 True \n",
  374. "3 2020-08-25 10:10:13 False \n",
  375. "4 2021-08-01 21:30:57 True \n",
  376. "... ... ... \n",
  377. "965953 2021-08-01 20:15:35 True \n",
  378. "965954 2021-02-23 18:21:09 True \n",
  379. "965955 2021-04-04 20:15:10 True \n",
  380. "965956 2021-02-23 18:21:09 True \n",
  381. "965957 2021-03-19 03:37:02 True \n",
  382. "\n",
  383. " nombrePeriodesEtablissement dateDebut \\\n",
  384. "0 4 2008-01-01 00:00:00 \n",
  385. "1 6 2008-04-23 00:00:00 \n",
  386. "2 4 2008-01-01 00:00:00 \n",
  387. "3 4 2008-01-01 00:00:00 \n",
  388. "4 4 2008-01-01 00:00:00 \n",
  389. "... ... ... \n",
  390. "965953 5 2010-12-15 00:00:00 \n",
  391. "965954 2 2008-01-01 00:00:00 \n",
  392. "965955 4 2008-01-01 00:00:00 \n",
  393. "965956 1 2014-03-31 00:00:00 \n",
  394. "965957 1 2009-06-23 00:00:00 \n",
  395. "\n",
  396. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  397. "0 A NaN \n",
  398. "1 A NaN \n",
  399. "2 A NaN \n",
  400. "3 A NaN \n",
  401. "4 A NaN \n",
  402. "... ... ... \n",
  403. "965953 A NaN \n",
  404. "965954 A NaN \n",
  405. "965955 A NaN \n",
  406. "965956 A NaN \n",
  407. "965957 A NaN \n",
  408. "\n",
  409. " enseigne2Etablissement enseigne3Etablissement \\\n",
  410. "0 NaN NaN \n",
  411. "1 NaN NaN \n",
  412. "2 NaN NaN \n",
  413. "3 NaN NaN \n",
  414. "4 NaN NaN \n",
  415. "... ... ... \n",
  416. "965953 NaN NaN \n",
  417. "965954 NaN NaN \n",
  418. "965955 NaN NaN \n",
  419. "965956 NaN NaN \n",
  420. "965957 NaN NaN \n",
  421. "\n",
  422. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  423. "0 NaN 10.81Z \n",
  424. "1 NaN 70.10Z \n",
  425. "2 NaN 17.21A \n",
  426. "3 NaN 17.21A \n",
  427. "4 NaN 20.30Z \n",
  428. "... ... ... \n",
  429. "965953 NaN 49.41A \n",
  430. "965954 NaN 64.19Z \n",
  431. "965955 NaN 55.10Z \n",
  432. "965956 NaN 66.30Z \n",
  433. "965957 NaN 27.12Z \n",
  434. "\n",
  435. " nomenclatureActivitePrincipaleEtablissement \\\n",
  436. "0 NAFRev2 \n",
  437. "1 NAFRev2 \n",
  438. "2 NAFRev2 \n",
  439. "3 NAFRev2 \n",
  440. "4 NAFRev2 \n",
  441. "... ... \n",
  442. "965953 NAFRev2 \n",
  443. "965954 NAFRev2 \n",
  444. "965955 NAFRev2 \n",
  445. "965956 NAFRev2 \n",
  446. "965957 NAFRev2 \n",
  447. "\n",
  448. " caractereEmployeurEtablissement \n",
  449. "0 O \n",
  450. "1 O \n",
  451. "2 O \n",
  452. "3 O \n",
  453. "4 O \n",
  454. "... ... \n",
  455. "965953 O \n",
  456. "965954 O \n",
  457. "965955 O \n",
  458. "965956 O \n",
  459. "965957 O \n",
  460. "\n",
  461. "[965958 rows x 20 columns]"
  462. ]
  463. },
  464. "execution_count": 7,
  465. "metadata": {},
  466. "output_type": "execute_result"
  467. }
  468. ],
  469. "source": [
  470. "#import du fichier csv\n",
  471. "dt = pd.read_csv('SirenAdminNet.csv')\n",
  472. "dt"
  473. ]
  474. },
  475. {
  476. "cell_type": "code",
  477. "execution_count": 8,
  478. "id": "b430c37b",
  479. "metadata": {},
  480. "outputs": [
  481. {
  482. "data": {
  483. "text/html": [
  484. "<div>\n",
  485. "<style scoped>\n",
  486. " .dataframe tbody tr th:only-of-type {\n",
  487. " vertical-align: middle;\n",
  488. " }\n",
  489. "\n",
  490. " .dataframe tbody tr th {\n",
  491. " vertical-align: top;\n",
  492. " }\n",
  493. "\n",
  494. " .dataframe thead th {\n",
  495. " text-align: right;\n",
  496. " }\n",
  497. "</style>\n",
  498. "<table border=\"1\" class=\"dataframe\">\n",
  499. " <thead>\n",
  500. " <tr style=\"text-align: right;\">\n",
  501. " <th></th>\n",
  502. " <th>Unnamed: 0</th>\n",
  503. " <th>siren</th>\n",
  504. " <th>nic</th>\n",
  505. " <th>siret</th>\n",
  506. " <th>dateCreationEtablissement</th>\n",
  507. " <th>trancheEffectifsEtablissement</th>\n",
  508. " <th>anneeEffectifsEtablissement</th>\n",
  509. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  510. " <th>dateDernierTraitementEtablissement</th>\n",
  511. " <th>etablissementSiege</th>\n",
  512. " <th>nombrePeriodesEtablissement</th>\n",
  513. " <th>dateDebut</th>\n",
  514. " <th>etatAdministratifEtablissement</th>\n",
  515. " <th>enseigne1Etablissement</th>\n",
  516. " <th>enseigne2Etablissement</th>\n",
  517. " <th>enseigne3Etablissement</th>\n",
  518. " <th>denominationUsuelleEtablissement</th>\n",
  519. " <th>activitePrincipaleEtablissement</th>\n",
  520. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  521. " <th>caractereEmployeurEtablissement</th>\n",
  522. " </tr>\n",
  523. " </thead>\n",
  524. " <tbody>\n",
  525. " <tr>\n",
  526. " <th>0</th>\n",
  527. " <td>0</td>\n",
  528. " <td>5420120</td>\n",
  529. " <td>15</td>\n",
  530. " <td>542012000015</td>\n",
  531. " <td>1989-01-27 00:00:00</td>\n",
  532. " <td>2</td>\n",
  533. " <td>2018.0</td>\n",
  534. " <td>NaN</td>\n",
  535. " <td>2020-08-25 10:10:13</td>\n",
  536. " <td>False</td>\n",
  537. " <td>4</td>\n",
  538. " <td>2008-01-01 00:00:00</td>\n",
  539. " <td>A</td>\n",
  540. " <td>NaN</td>\n",
  541. " <td>NaN</td>\n",
  542. " <td>NaN</td>\n",
  543. " <td>NaN</td>\n",
  544. " <td>10.81Z</td>\n",
  545. " <td>NAFRev2</td>\n",
  546. " <td>O</td>\n",
  547. " </tr>\n",
  548. " <tr>\n",
  549. " <th>1</th>\n",
  550. " <td>1</td>\n",
  551. " <td>5420120</td>\n",
  552. " <td>31</td>\n",
  553. " <td>542012000031</td>\n",
  554. " <td>1900-01-01 00:00:00</td>\n",
  555. " <td>3</td>\n",
  556. " <td>2018.0</td>\n",
  557. " <td>NaN</td>\n",
  558. " <td>2021-01-01 03:35:01</td>\n",
  559. " <td>True</td>\n",
  560. " <td>6</td>\n",
  561. " <td>2008-04-23 00:00:00</td>\n",
  562. " <td>A</td>\n",
  563. " <td>NaN</td>\n",
  564. " <td>NaN</td>\n",
  565. " <td>NaN</td>\n",
  566. " <td>NaN</td>\n",
  567. " <td>70.10Z</td>\n",
  568. " <td>NAFRev2</td>\n",
  569. " <td>O</td>\n",
  570. " </tr>\n",
  571. " <tr>\n",
  572. " <th>2</th>\n",
  573. " <td>2</td>\n",
  574. " <td>5520176</td>\n",
  575. " <td>16</td>\n",
  576. " <td>552017600016</td>\n",
  577. " <td>1955-01-01 00:00:00</td>\n",
  578. " <td>12</td>\n",
  579. " <td>2018.0</td>\n",
  580. " <td>NaN</td>\n",
  581. " <td>2021-08-01 21:30:57</td>\n",
  582. " <td>True</td>\n",
  583. " <td>4</td>\n",
  584. " <td>2008-01-01 00:00:00</td>\n",
  585. " <td>A</td>\n",
  586. " <td>NaN</td>\n",
  587. " <td>NaN</td>\n",
  588. " <td>NaN</td>\n",
  589. " <td>NaN</td>\n",
  590. " <td>17.21A</td>\n",
  591. " <td>NAFRev2</td>\n",
  592. " <td>O</td>\n",
  593. " </tr>\n",
  594. " <tr>\n",
  595. " <th>3</th>\n",
  596. " <td>3</td>\n",
  597. " <td>5520176</td>\n",
  598. " <td>32</td>\n",
  599. " <td>552017600032</td>\n",
  600. " <td>1999-08-30 00:00:00</td>\n",
  601. " <td>12</td>\n",
  602. " <td>2018.0</td>\n",
  603. " <td>NaN</td>\n",
  604. " <td>2020-08-25 10:10:13</td>\n",
  605. " <td>False</td>\n",
  606. " <td>4</td>\n",
  607. " <td>2008-01-01 00:00:00</td>\n",
  608. " <td>A</td>\n",
  609. " <td>NaN</td>\n",
  610. " <td>NaN</td>\n",
  611. " <td>NaN</td>\n",
  612. " <td>NaN</td>\n",
  613. " <td>17.21A</td>\n",
  614. " <td>NAFRev2</td>\n",
  615. " <td>O</td>\n",
  616. " </tr>\n",
  617. " <tr>\n",
  618. " <th>4</th>\n",
  619. " <td>4</td>\n",
  620. " <td>5520242</td>\n",
  621. " <td>16</td>\n",
  622. " <td>552024200016</td>\n",
  623. " <td>1900-01-01 00:00:00</td>\n",
  624. " <td>12</td>\n",
  625. " <td>2018.0</td>\n",
  626. " <td>NaN</td>\n",
  627. " <td>2021-08-01 21:30:57</td>\n",
  628. " <td>True</td>\n",
  629. " <td>4</td>\n",
  630. " <td>2008-01-01 00:00:00</td>\n",
  631. " <td>A</td>\n",
  632. " <td>NaN</td>\n",
  633. " <td>NaN</td>\n",
  634. " <td>NaN</td>\n",
  635. " <td>NaN</td>\n",
  636. " <td>20.30Z</td>\n",
  637. " <td>NAFRev2</td>\n",
  638. " <td>O</td>\n",
  639. " </tr>\n",
  640. " <tr>\n",
  641. " <th>...</th>\n",
  642. " <td>...</td>\n",
  643. " <td>...</td>\n",
  644. " <td>...</td>\n",
  645. " <td>...</td>\n",
  646. " <td>...</td>\n",
  647. " <td>...</td>\n",
  648. " <td>...</td>\n",
  649. " <td>...</td>\n",
  650. " <td>...</td>\n",
  651. " <td>...</td>\n",
  652. " <td>...</td>\n",
  653. " <td>...</td>\n",
  654. " <td>...</td>\n",
  655. " <td>...</td>\n",
  656. " <td>...</td>\n",
  657. " <td>...</td>\n",
  658. " <td>...</td>\n",
  659. " <td>...</td>\n",
  660. " <td>...</td>\n",
  661. " <td>...</td>\n",
  662. " </tr>\n",
  663. " <tr>\n",
  664. " <th>965953</th>\n",
  665. " <td>1095676</td>\n",
  666. " <td>999990005</td>\n",
  667. " <td>38</td>\n",
  668. " <td>99999000500038</td>\n",
  669. " <td>1993-07-01 00:00:00</td>\n",
  670. " <td>32</td>\n",
  671. " <td>2018.0</td>\n",
  672. " <td>NaN</td>\n",
  673. " <td>2021-08-01 20:15:35</td>\n",
  674. " <td>True</td>\n",
  675. " <td>5</td>\n",
  676. " <td>2010-12-15 00:00:00</td>\n",
  677. " <td>A</td>\n",
  678. " <td>NaN</td>\n",
  679. " <td>NaN</td>\n",
  680. " <td>NaN</td>\n",
  681. " <td>NaN</td>\n",
  682. " <td>49.41A</td>\n",
  683. " <td>NAFRev2</td>\n",
  684. " <td>O</td>\n",
  685. " </tr>\n",
  686. " <tr>\n",
  687. " <th>965954</th>\n",
  688. " <td>1095677</td>\n",
  689. " <td>999990062</td>\n",
  690. " <td>39</td>\n",
  691. " <td>99999006200039</td>\n",
  692. " <td>2007-11-05 00:00:00</td>\n",
  693. " <td>12</td>\n",
  694. " <td>2018.0</td>\n",
  695. " <td>NaN</td>\n",
  696. " <td>2021-02-23 18:21:09</td>\n",
  697. " <td>True</td>\n",
  698. " <td>2</td>\n",
  699. " <td>2008-01-01 00:00:00</td>\n",
  700. " <td>A</td>\n",
  701. " <td>NaN</td>\n",
  702. " <td>NaN</td>\n",
  703. " <td>NaN</td>\n",
  704. " <td>NaN</td>\n",
  705. " <td>64.19Z</td>\n",
  706. " <td>NAFRev2</td>\n",
  707. " <td>O</td>\n",
  708. " </tr>\n",
  709. " <tr>\n",
  710. " <th>965955</th>\n",
  711. " <td>1095678</td>\n",
  712. " <td>999990286</td>\n",
  713. " <td>18</td>\n",
  714. " <td>99999028600018</td>\n",
  715. " <td>1979-11-30 00:00:00</td>\n",
  716. " <td>22</td>\n",
  717. " <td>2018.0</td>\n",
  718. " <td>NaN</td>\n",
  719. " <td>2021-04-04 20:15:10</td>\n",
  720. " <td>True</td>\n",
  721. " <td>4</td>\n",
  722. " <td>2008-01-01 00:00:00</td>\n",
  723. " <td>A</td>\n",
  724. " <td>NaN</td>\n",
  725. " <td>NaN</td>\n",
  726. " <td>NaN</td>\n",
  727. " <td>NaN</td>\n",
  728. " <td>55.10Z</td>\n",
  729. " <td>NAFRev2</td>\n",
  730. " <td>O</td>\n",
  731. " </tr>\n",
  732. " <tr>\n",
  733. " <th>965956</th>\n",
  734. " <td>1095679</td>\n",
  735. " <td>999990369</td>\n",
  736. " <td>87</td>\n",
  737. " <td>99999036900087</td>\n",
  738. " <td>2014-03-31 00:00:00</td>\n",
  739. " <td>21</td>\n",
  740. " <td>2018.0</td>\n",
  741. " <td>NaN</td>\n",
  742. " <td>2021-02-23 18:21:09</td>\n",
  743. " <td>True</td>\n",
  744. " <td>1</td>\n",
  745. " <td>2014-03-31 00:00:00</td>\n",
  746. " <td>A</td>\n",
  747. " <td>NaN</td>\n",
  748. " <td>NaN</td>\n",
  749. " <td>NaN</td>\n",
  750. " <td>NaN</td>\n",
  751. " <td>66.30Z</td>\n",
  752. " <td>NAFRev2</td>\n",
  753. " <td>O</td>\n",
  754. " </tr>\n",
  755. " <tr>\n",
  756. " <th>965957</th>\n",
  757. " <td>1095680</td>\n",
  758. " <td>999990401</td>\n",
  759. " <td>96</td>\n",
  760. " <td>99999040100096</td>\n",
  761. " <td>2009-06-23 00:00:00</td>\n",
  762. " <td>3</td>\n",
  763. " <td>2018.0</td>\n",
  764. " <td>2712ZZ</td>\n",
  765. " <td>2021-03-19 03:37:02</td>\n",
  766. " <td>True</td>\n",
  767. " <td>1</td>\n",
  768. " <td>2009-06-23 00:00:00</td>\n",
  769. " <td>A</td>\n",
  770. " <td>NaN</td>\n",
  771. " <td>NaN</td>\n",
  772. " <td>NaN</td>\n",
  773. " <td>NaN</td>\n",
  774. " <td>27.12Z</td>\n",
  775. " <td>NAFRev2</td>\n",
  776. " <td>O</td>\n",
  777. " </tr>\n",
  778. " </tbody>\n",
  779. "</table>\n",
  780. "<p>832575 rows × 20 columns</p>\n",
  781. "</div>"
  782. ],
  783. "text/plain": [
  784. " Unnamed: 0 siren nic siret dateCreationEtablissement \\\n",
  785. "0 0 5420120 15 542012000015 1989-01-27 00:00:00 \n",
  786. "1 1 5420120 31 542012000031 1900-01-01 00:00:00 \n",
  787. "2 2 5520176 16 552017600016 1955-01-01 00:00:00 \n",
  788. "3 3 5520176 32 552017600032 1999-08-30 00:00:00 \n",
  789. "4 4 5520242 16 552024200016 1900-01-01 00:00:00 \n",
  790. "... ... ... ... ... ... \n",
  791. "965953 1095676 999990005 38 99999000500038 1993-07-01 00:00:00 \n",
  792. "965954 1095677 999990062 39 99999006200039 2007-11-05 00:00:00 \n",
  793. "965955 1095678 999990286 18 99999028600018 1979-11-30 00:00:00 \n",
  794. "965956 1095679 999990369 87 99999036900087 2014-03-31 00:00:00 \n",
  795. "965957 1095680 999990401 96 99999040100096 2009-06-23 00:00:00 \n",
  796. "\n",
  797. " trancheEffectifsEtablissement anneeEffectifsEtablissement \\\n",
  798. "0 2 2018.0 \n",
  799. "1 3 2018.0 \n",
  800. "2 12 2018.0 \n",
  801. "3 12 2018.0 \n",
  802. "4 12 2018.0 \n",
  803. "... ... ... \n",
  804. "965953 32 2018.0 \n",
  805. "965954 12 2018.0 \n",
  806. "965955 22 2018.0 \n",
  807. "965956 21 2018.0 \n",
  808. "965957 3 2018.0 \n",
  809. "\n",
  810. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  811. "0 NaN \n",
  812. "1 NaN \n",
  813. "2 NaN \n",
  814. "3 NaN \n",
  815. "4 NaN \n",
  816. "... ... \n",
  817. "965953 NaN \n",
  818. "965954 NaN \n",
  819. "965955 NaN \n",
  820. "965956 NaN \n",
  821. "965957 2712ZZ \n",
  822. "\n",
  823. " dateDernierTraitementEtablissement etablissementSiege \\\n",
  824. "0 2020-08-25 10:10:13 False \n",
  825. "1 2021-01-01 03:35:01 True \n",
  826. "2 2021-08-01 21:30:57 True \n",
  827. "3 2020-08-25 10:10:13 False \n",
  828. "4 2021-08-01 21:30:57 True \n",
  829. "... ... ... \n",
  830. "965953 2021-08-01 20:15:35 True \n",
  831. "965954 2021-02-23 18:21:09 True \n",
  832. "965955 2021-04-04 20:15:10 True \n",
  833. "965956 2021-02-23 18:21:09 True \n",
  834. "965957 2021-03-19 03:37:02 True \n",
  835. "\n",
  836. " nombrePeriodesEtablissement dateDebut \\\n",
  837. "0 4 2008-01-01 00:00:00 \n",
  838. "1 6 2008-04-23 00:00:00 \n",
  839. "2 4 2008-01-01 00:00:00 \n",
  840. "3 4 2008-01-01 00:00:00 \n",
  841. "4 4 2008-01-01 00:00:00 \n",
  842. "... ... ... \n",
  843. "965953 5 2010-12-15 00:00:00 \n",
  844. "965954 2 2008-01-01 00:00:00 \n",
  845. "965955 4 2008-01-01 00:00:00 \n",
  846. "965956 1 2014-03-31 00:00:00 \n",
  847. "965957 1 2009-06-23 00:00:00 \n",
  848. "\n",
  849. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  850. "0 A NaN \n",
  851. "1 A NaN \n",
  852. "2 A NaN \n",
  853. "3 A NaN \n",
  854. "4 A NaN \n",
  855. "... ... ... \n",
  856. "965953 A NaN \n",
  857. "965954 A NaN \n",
  858. "965955 A NaN \n",
  859. "965956 A NaN \n",
  860. "965957 A NaN \n",
  861. "\n",
  862. " enseigne2Etablissement enseigne3Etablissement \\\n",
  863. "0 NaN NaN \n",
  864. "1 NaN NaN \n",
  865. "2 NaN NaN \n",
  866. "3 NaN NaN \n",
  867. "4 NaN NaN \n",
  868. "... ... ... \n",
  869. "965953 NaN NaN \n",
  870. "965954 NaN NaN \n",
  871. "965955 NaN NaN \n",
  872. "965956 NaN NaN \n",
  873. "965957 NaN NaN \n",
  874. "\n",
  875. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  876. "0 NaN 10.81Z \n",
  877. "1 NaN 70.10Z \n",
  878. "2 NaN 17.21A \n",
  879. "3 NaN 17.21A \n",
  880. "4 NaN 20.30Z \n",
  881. "... ... ... \n",
  882. "965953 NaN 49.41A \n",
  883. "965954 NaN 64.19Z \n",
  884. "965955 NaN 55.10Z \n",
  885. "965956 NaN 66.30Z \n",
  886. "965957 NaN 27.12Z \n",
  887. "\n",
  888. " nomenclatureActivitePrincipaleEtablissement \\\n",
  889. "0 NAFRev2 \n",
  890. "1 NAFRev2 \n",
  891. "2 NAFRev2 \n",
  892. "3 NAFRev2 \n",
  893. "4 NAFRev2 \n",
  894. "... ... \n",
  895. "965953 NAFRev2 \n",
  896. "965954 NAFRev2 \n",
  897. "965955 NAFRev2 \n",
  898. "965956 NAFRev2 \n",
  899. "965957 NAFRev2 \n",
  900. "\n",
  901. " caractereEmployeurEtablissement \n",
  902. "0 O \n",
  903. "1 O \n",
  904. "2 O \n",
  905. "3 O \n",
  906. "4 O \n",
  907. "... ... \n",
  908. "965953 O \n",
  909. "965954 O \n",
  910. "965955 O \n",
  911. "965956 O \n",
  912. "965957 O \n",
  913. "\n",
  914. "[832575 rows x 20 columns]"
  915. ]
  916. },
  917. "execution_count": 8,
  918. "metadata": {},
  919. "output_type": "execute_result"
  920. }
  921. ],
  922. "source": [
  923. "#filtrer sur les caractereEmployeurEtablissement O : unité légale employeuse\n",
  924. "dt=dt.loc[dt.caractereEmployeurEtablissement=='O']\n",
  925. "dt"
  926. ]
  927. },
  928. {
  929. "cell_type": "code",
  930. "execution_count": 11,
  931. "id": "b16cdb56",
  932. "metadata": {},
  933. "outputs": [
  934. {
  935. "name": "stdout",
  936. "output_type": "stream",
  937. "text": [
  938. "DataFrame is written to Excel File successfully.\n"
  939. ]
  940. }
  941. ],
  942. "source": [
  943. "#export du fichier en .xlsx\n",
  944. "file_name = 'SirenAdminNetFinal.xlsx'\n",
  945. "dt.to_excel(file_name)\n",
  946. "print('DataFrame is written to Excel File successfully.')"
  947. ]
  948. },
  949. {
  950. "cell_type": "code",
  951. "execution_count": null,
  952. "id": "9ecdee25",
  953. "metadata": {},
  954. "outputs": [],
  955. "source": []
  956. }
  957. ],
  958. "metadata": {
  959. "kernelspec": {
  960. "display_name": "Python 3 (ipykernel)",
  961. "language": "python",
  962. "name": "python3"
  963. },
  964. "language_info": {
  965. "codemirror_mode": {
  966. "name": "ipython",
  967. "version": 3
  968. },
  969. "file_extension": ".py",
  970. "mimetype": "text/x-python",
  971. "name": "python",
  972. "nbconvert_exporter": "python",
  973. "pygments_lexer": "ipython3",
  974. "version": "3.9.7"
  975. }
  976. },
  977. "nbformat": 4,
  978. "nbformat_minor": 5
  979. }