You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1584 lines
62 KiB

2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 3,
  6. "id": "c425d5f5",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": [
  10. "#import de la bibliothèque pandas\n",
  11. "import pandas as pd"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 4,
  17. "id": "a232e20f",
  18. "metadata": {},
  19. "outputs": [],
  20. "source": [
  21. "#affichage que de 10 lignes\n",
  22. "pd.options.display.max_rows = 10"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": 5,
  28. "id": "1c8fff82",
  29. "metadata": {},
  30. "outputs": [
  31. {
  32. "data": {
  33. "text/html": [
  34. "<div>\n",
  35. "<style scoped>\n",
  36. " .dataframe tbody tr th:only-of-type {\n",
  37. " vertical-align: middle;\n",
  38. " }\n",
  39. "\n",
  40. " .dataframe tbody tr th {\n",
  41. " vertical-align: top;\n",
  42. " }\n",
  43. "\n",
  44. " .dataframe thead th {\n",
  45. " text-align: right;\n",
  46. " }\n",
  47. "</style>\n",
  48. "<table border=\"1\" class=\"dataframe\">\n",
  49. " <thead>\n",
  50. " <tr style=\"text-align: right;\">\n",
  51. " <th></th>\n",
  52. " <th>siren</th>\n",
  53. " <th>nic</th>\n",
  54. " <th>siret</th>\n",
  55. " <th>statutDiffusionEtablissement</th>\n",
  56. " <th>dateCreationEtablissement</th>\n",
  57. " <th>trancheEffectifsEtablissement</th>\n",
  58. " <th>anneeEffectifsEtablissement</th>\n",
  59. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  60. " <th>dateDernierTraitementEtablissement</th>\n",
  61. " <th>etablissementSiege</th>\n",
  62. " <th>...</th>\n",
  63. " <th>codePaysEtranger2Etablissement</th>\n",
  64. " <th>libellePaysEtranger2Etablissement</th>\n",
  65. " <th>etatAdministratifEtablissement</th>\n",
  66. " <th>enseigne1Etablissement</th>\n",
  67. " <th>enseigne2Etablissement</th>\n",
  68. " <th>enseigne3Etablissement</th>\n",
  69. " <th>denominationUsuelleEtablissement</th>\n",
  70. " <th>activitePrincipaleEtablissement</th>\n",
  71. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  72. " <th>caractereEmployeurEtablissement</th>\n",
  73. " </tr>\n",
  74. " </thead>\n",
  75. " <tbody>\n",
  76. " <tr>\n",
  77. " <th>0</th>\n",
  78. " <td>5620190</td>\n",
  79. " <td>65</td>\n",
  80. " <td>562019000065</td>\n",
  81. " <td>O</td>\n",
  82. " <td>2016-01-01</td>\n",
  83. " <td>21</td>\n",
  84. " <td>2020.0</td>\n",
  85. " <td>NaN</td>\n",
  86. " <td>2022-08-29T09:00:21</td>\n",
  87. " <td>True</td>\n",
  88. " <td>...</td>\n",
  89. " <td>NaN</td>\n",
  90. " <td>NaN</td>\n",
  91. " <td>A</td>\n",
  92. " <td>NaN</td>\n",
  93. " <td>NaN</td>\n",
  94. " <td>NaN</td>\n",
  95. " <td>NaN</td>\n",
  96. " <td>49.39A</td>\n",
  97. " <td>NAFRev2</td>\n",
  98. " <td>O</td>\n",
  99. " </tr>\n",
  100. " <tr>\n",
  101. " <th>1</th>\n",
  102. " <td>5680145</td>\n",
  103. " <td>17</td>\n",
  104. " <td>568014500017</td>\n",
  105. " <td>O</td>\n",
  106. " <td>1956-01-01</td>\n",
  107. " <td>21</td>\n",
  108. " <td>2020.0</td>\n",
  109. " <td>NaN</td>\n",
  110. " <td>2022-10-04T04:12:29</td>\n",
  111. " <td>True</td>\n",
  112. " <td>...</td>\n",
  113. " <td>NaN</td>\n",
  114. " <td>NaN</td>\n",
  115. " <td>A</td>\n",
  116. " <td>NaN</td>\n",
  117. " <td>NaN</td>\n",
  118. " <td>NaN</td>\n",
  119. " <td>NaN</td>\n",
  120. " <td>22.22Z</td>\n",
  121. " <td>NAFRev2</td>\n",
  122. " <td>O</td>\n",
  123. " </tr>\n",
  124. " <tr>\n",
  125. " <th>2</th>\n",
  126. " <td>5720164</td>\n",
  127. " <td>28</td>\n",
  128. " <td>572016400028</td>\n",
  129. " <td>O</td>\n",
  130. " <td>1983-09-28</td>\n",
  131. " <td>22</td>\n",
  132. " <td>2020.0</td>\n",
  133. " <td>NaN</td>\n",
  134. " <td>2022-08-29T09:00:21</td>\n",
  135. " <td>True</td>\n",
  136. " <td>...</td>\n",
  137. " <td>NaN</td>\n",
  138. " <td>NaN</td>\n",
  139. " <td>A</td>\n",
  140. " <td>NaN</td>\n",
  141. " <td>NaN</td>\n",
  142. " <td>NaN</td>\n",
  143. " <td>NaN</td>\n",
  144. " <td>86.10Z</td>\n",
  145. " <td>NAFRev2</td>\n",
  146. " <td>O</td>\n",
  147. " </tr>\n",
  148. " <tr>\n",
  149. " <th>3</th>\n",
  150. " <td>5720784</td>\n",
  151. " <td>31</td>\n",
  152. " <td>572078400031</td>\n",
  153. " <td>O</td>\n",
  154. " <td>1993-04-01</td>\n",
  155. " <td>22</td>\n",
  156. " <td>2020.0</td>\n",
  157. " <td>NaN</td>\n",
  158. " <td>2022-08-29T09:00:21</td>\n",
  159. " <td>False</td>\n",
  160. " <td>...</td>\n",
  161. " <td>NaN</td>\n",
  162. " <td>NaN</td>\n",
  163. " <td>A</td>\n",
  164. " <td>NaN</td>\n",
  165. " <td>NaN</td>\n",
  166. " <td>NaN</td>\n",
  167. " <td>NaN</td>\n",
  168. " <td>25.72Z</td>\n",
  169. " <td>NAFRev2</td>\n",
  170. " <td>O</td>\n",
  171. " </tr>\n",
  172. " <tr>\n",
  173. " <th>4</th>\n",
  174. " <td>5780960</td>\n",
  175. " <td>26</td>\n",
  176. " <td>578096000026</td>\n",
  177. " <td>O</td>\n",
  178. " <td>1981-12-28</td>\n",
  179. " <td>21</td>\n",
  180. " <td>2020.0</td>\n",
  181. " <td>NaN</td>\n",
  182. " <td>2022-08-29T09:00:21</td>\n",
  183. " <td>False</td>\n",
  184. " <td>...</td>\n",
  185. " <td>NaN</td>\n",
  186. " <td>NaN</td>\n",
  187. " <td>A</td>\n",
  188. " <td>HOTEL ROYAL THALASSO</td>\n",
  189. " <td>NaN</td>\n",
  190. " <td>NaN</td>\n",
  191. " <td>NaN</td>\n",
  192. " <td>55.10Z</td>\n",
  193. " <td>NAFRev2</td>\n",
  194. " <td>O</td>\n",
  195. " </tr>\n",
  196. " <tr>\n",
  197. " <th>...</th>\n",
  198. " <td>...</td>\n",
  199. " <td>...</td>\n",
  200. " <td>...</td>\n",
  201. " <td>...</td>\n",
  202. " <td>...</td>\n",
  203. " <td>...</td>\n",
  204. " <td>...</td>\n",
  205. " <td>...</td>\n",
  206. " <td>...</td>\n",
  207. " <td>...</td>\n",
  208. " <td>...</td>\n",
  209. " <td>...</td>\n",
  210. " <td>...</td>\n",
  211. " <td>...</td>\n",
  212. " <td>...</td>\n",
  213. " <td>...</td>\n",
  214. " <td>...</td>\n",
  215. " <td>...</td>\n",
  216. " <td>...</td>\n",
  217. " <td>...</td>\n",
  218. " <td>...</td>\n",
  219. " </tr>\n",
  220. " <tr>\n",
  221. " <th>82442</th>\n",
  222. " <td>998893002</td>\n",
  223. " <td>142</td>\n",
  224. " <td>99889300200142</td>\n",
  225. " <td>O</td>\n",
  226. " <td>2013-05-01</td>\n",
  227. " <td>21</td>\n",
  228. " <td>2020.0</td>\n",
  229. " <td>NaN</td>\n",
  230. " <td>2022-08-29T10:50:43</td>\n",
  231. " <td>False</td>\n",
  232. " <td>...</td>\n",
  233. " <td>NaN</td>\n",
  234. " <td>NaN</td>\n",
  235. " <td>A</td>\n",
  236. " <td>NaN</td>\n",
  237. " <td>NaN</td>\n",
  238. " <td>NaN</td>\n",
  239. " <td>NaN</td>\n",
  240. " <td>49.39B</td>\n",
  241. " <td>NAFRev2</td>\n",
  242. " <td>O</td>\n",
  243. " </tr>\n",
  244. " <tr>\n",
  245. " <th>82443</th>\n",
  246. " <td>999990005</td>\n",
  247. " <td>38</td>\n",
  248. " <td>99999000500038</td>\n",
  249. " <td>O</td>\n",
  250. " <td>1993-07-01</td>\n",
  251. " <td>22</td>\n",
  252. " <td>2020.0</td>\n",
  253. " <td>NaN</td>\n",
  254. " <td>2022-08-29T10:50:43</td>\n",
  255. " <td>True</td>\n",
  256. " <td>...</td>\n",
  257. " <td>NaN</td>\n",
  258. " <td>NaN</td>\n",
  259. " <td>A</td>\n",
  260. " <td>NaN</td>\n",
  261. " <td>NaN</td>\n",
  262. " <td>NaN</td>\n",
  263. " <td>NaN</td>\n",
  264. " <td>49.41A</td>\n",
  265. " <td>NAFRev2</td>\n",
  266. " <td>O</td>\n",
  267. " </tr>\n",
  268. " <tr>\n",
  269. " <th>82444</th>\n",
  270. " <td>999990005</td>\n",
  271. " <td>491</td>\n",
  272. " <td>99999000500491</td>\n",
  273. " <td>O</td>\n",
  274. " <td>2017-04-01</td>\n",
  275. " <td>21</td>\n",
  276. " <td>2020.0</td>\n",
  277. " <td>NaN</td>\n",
  278. " <td>2022-08-29T10:50:43</td>\n",
  279. " <td>False</td>\n",
  280. " <td>...</td>\n",
  281. " <td>NaN</td>\n",
  282. " <td>NaN</td>\n",
  283. " <td>A</td>\n",
  284. " <td>NaN</td>\n",
  285. " <td>NaN</td>\n",
  286. " <td>NaN</td>\n",
  287. " <td>NaN</td>\n",
  288. " <td>49.41A</td>\n",
  289. " <td>NAFRev2</td>\n",
  290. " <td>O</td>\n",
  291. " </tr>\n",
  292. " <tr>\n",
  293. " <th>82445</th>\n",
  294. " <td>999990286</td>\n",
  295. " <td>18</td>\n",
  296. " <td>99999028600018</td>\n",
  297. " <td>O</td>\n",
  298. " <td>1979-11-30</td>\n",
  299. " <td>22</td>\n",
  300. " <td>2020.0</td>\n",
  301. " <td>NaN</td>\n",
  302. " <td>2022-08-29T10:50:43</td>\n",
  303. " <td>True</td>\n",
  304. " <td>...</td>\n",
  305. " <td>NaN</td>\n",
  306. " <td>NaN</td>\n",
  307. " <td>A</td>\n",
  308. " <td>NaN</td>\n",
  309. " <td>NaN</td>\n",
  310. " <td>NaN</td>\n",
  311. " <td>NaN</td>\n",
  312. " <td>55.10Z</td>\n",
  313. " <td>NAFRev2</td>\n",
  314. " <td>O</td>\n",
  315. " </tr>\n",
  316. " <tr>\n",
  317. " <th>82446</th>\n",
  318. " <td>999990369</td>\n",
  319. " <td>87</td>\n",
  320. " <td>99999036900087</td>\n",
  321. " <td>O</td>\n",
  322. " <td>2014-03-31</td>\n",
  323. " <td>21</td>\n",
  324. " <td>2020.0</td>\n",
  325. " <td>NaN</td>\n",
  326. " <td>2022-10-02T03:38:31</td>\n",
  327. " <td>True</td>\n",
  328. " <td>...</td>\n",
  329. " <td>NaN</td>\n",
  330. " <td>NaN</td>\n",
  331. " <td>A</td>\n",
  332. " <td>NaN</td>\n",
  333. " <td>NaN</td>\n",
  334. " <td>NaN</td>\n",
  335. " <td>NaN</td>\n",
  336. " <td>66.30Z</td>\n",
  337. " <td>NAFRev2</td>\n",
  338. " <td>O</td>\n",
  339. " </tr>\n",
  340. " </tbody>\n",
  341. "</table>\n",
  342. "<p>82447 rows × 77 columns</p>\n",
  343. "</div>"
  344. ],
  345. "text/plain": [
  346. " siren nic siret statutDiffusionEtablissement \\\n",
  347. "0 5620190 65 562019000065 O \n",
  348. "1 5680145 17 568014500017 O \n",
  349. "2 5720164 28 572016400028 O \n",
  350. "3 5720784 31 572078400031 O \n",
  351. "4 5780960 26 578096000026 O \n",
  352. "... ... ... ... ... \n",
  353. "82442 998893002 142 99889300200142 O \n",
  354. "82443 999990005 38 99999000500038 O \n",
  355. "82444 999990005 491 99999000500491 O \n",
  356. "82445 999990286 18 99999028600018 O \n",
  357. "82446 999990369 87 99999036900087 O \n",
  358. "\n",
  359. " dateCreationEtablissement trancheEffectifsEtablissement \\\n",
  360. "0 2016-01-01 21 \n",
  361. "1 1956-01-01 21 \n",
  362. "2 1983-09-28 22 \n",
  363. "3 1993-04-01 22 \n",
  364. "4 1981-12-28 21 \n",
  365. "... ... ... \n",
  366. "82442 2013-05-01 21 \n",
  367. "82443 1993-07-01 22 \n",
  368. "82444 2017-04-01 21 \n",
  369. "82445 1979-11-30 22 \n",
  370. "82446 2014-03-31 21 \n",
  371. "\n",
  372. " anneeEffectifsEtablissement \\\n",
  373. "0 2020.0 \n",
  374. "1 2020.0 \n",
  375. "2 2020.0 \n",
  376. "3 2020.0 \n",
  377. "4 2020.0 \n",
  378. "... ... \n",
  379. "82442 2020.0 \n",
  380. "82443 2020.0 \n",
  381. "82444 2020.0 \n",
  382. "82445 2020.0 \n",
  383. "82446 2020.0 \n",
  384. "\n",
  385. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  386. "0 NaN \n",
  387. "1 NaN \n",
  388. "2 NaN \n",
  389. "3 NaN \n",
  390. "4 NaN \n",
  391. "... ... \n",
  392. "82442 NaN \n",
  393. "82443 NaN \n",
  394. "82444 NaN \n",
  395. "82445 NaN \n",
  396. "82446 NaN \n",
  397. "\n",
  398. " dateDernierTraitementEtablissement etablissementSiege ... \\\n",
  399. "0 2022-08-29T09:00:21 True ... \n",
  400. "1 2022-10-04T04:12:29 True ... \n",
  401. "2 2022-08-29T09:00:21 True ... \n",
  402. "3 2022-08-29T09:00:21 False ... \n",
  403. "4 2022-08-29T09:00:21 False ... \n",
  404. "... ... ... ... \n",
  405. "82442 2022-08-29T10:50:43 False ... \n",
  406. "82443 2022-08-29T10:50:43 True ... \n",
  407. "82444 2022-08-29T10:50:43 False ... \n",
  408. "82445 2022-08-29T10:50:43 True ... \n",
  409. "82446 2022-10-02T03:38:31 True ... \n",
  410. "\n",
  411. " codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n",
  412. "0 NaN NaN \n",
  413. "1 NaN NaN \n",
  414. "2 NaN NaN \n",
  415. "3 NaN NaN \n",
  416. "4 NaN NaN \n",
  417. "... ... ... \n",
  418. "82442 NaN NaN \n",
  419. "82443 NaN NaN \n",
  420. "82444 NaN NaN \n",
  421. "82445 NaN NaN \n",
  422. "82446 NaN NaN \n",
  423. "\n",
  424. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  425. "0 A NaN \n",
  426. "1 A NaN \n",
  427. "2 A NaN \n",
  428. "3 A NaN \n",
  429. "4 A HOTEL ROYAL THALASSO \n",
  430. "... ... ... \n",
  431. "82442 A NaN \n",
  432. "82443 A NaN \n",
  433. "82444 A NaN \n",
  434. "82445 A NaN \n",
  435. "82446 A NaN \n",
  436. "\n",
  437. " enseigne2Etablissement enseigne3Etablissement \\\n",
  438. "0 NaN NaN \n",
  439. "1 NaN NaN \n",
  440. "2 NaN NaN \n",
  441. "3 NaN NaN \n",
  442. "4 NaN NaN \n",
  443. "... ... ... \n",
  444. "82442 NaN NaN \n",
  445. "82443 NaN NaN \n",
  446. "82444 NaN NaN \n",
  447. "82445 NaN NaN \n",
  448. "82446 NaN NaN \n",
  449. "\n",
  450. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  451. "0 NaN 49.39A \n",
  452. "1 NaN 22.22Z \n",
  453. "2 NaN 86.10Z \n",
  454. "3 NaN 25.72Z \n",
  455. "4 NaN 55.10Z \n",
  456. "... ... ... \n",
  457. "82442 NaN 49.39B \n",
  458. "82443 NaN 49.41A \n",
  459. "82444 NaN 49.41A \n",
  460. "82445 NaN 55.10Z \n",
  461. "82446 NaN 66.30Z \n",
  462. "\n",
  463. " nomenclatureActivitePrincipaleEtablissement \\\n",
  464. "0 NAFRev2 \n",
  465. "1 NAFRev2 \n",
  466. "2 NAFRev2 \n",
  467. "3 NAFRev2 \n",
  468. "4 NAFRev2 \n",
  469. "... ... \n",
  470. "82442 NAFRev2 \n",
  471. "82443 NAFRev2 \n",
  472. "82444 NAFRev2 \n",
  473. "82445 NAFRev2 \n",
  474. "82446 NAFRev2 \n",
  475. "\n",
  476. " caractereEmployeurEtablissement \n",
  477. "0 O \n",
  478. "1 O \n",
  479. "2 O \n",
  480. "3 O \n",
  481. "4 O \n",
  482. "... ... \n",
  483. "82442 O \n",
  484. "82443 O \n",
  485. "82444 O \n",
  486. "82445 O \n",
  487. "82446 O \n",
  488. "\n",
  489. "[82447 rows x 77 columns]"
  490. ]
  491. },
  492. "execution_count": 5,
  493. "metadata": {},
  494. "output_type": "execute_result"
  495. }
  496. ],
  497. "source": [
  498. "#import du fichier excel\n",
  499. "entreprise50 = pd.read_excel('sirenentr.xlsx')\n",
  500. "entreprise50"
  501. ]
  502. },
  503. {
  504. "cell_type": "code",
  505. "execution_count": 6,
  506. "id": "7400d118",
  507. "metadata": {},
  508. "outputs": [
  509. {
  510. "data": {
  511. "text/plain": [
  512. "(82447, 77)"
  513. ]
  514. },
  515. "execution_count": 6,
  516. "metadata": {},
  517. "output_type": "execute_result"
  518. }
  519. ],
  520. "source": [
  521. "#Afficher le nombre de lignes et de colonne\n",
  522. "entreprise50.shape"
  523. ]
  524. },
  525. {
  526. "cell_type": "code",
  527. "execution_count": 7,
  528. "id": "b7284e73",
  529. "metadata": {
  530. "scrolled": true
  531. },
  532. "outputs": [
  533. {
  534. "data": {
  535. "text/html": [
  536. "<div>\n",
  537. "<style scoped>\n",
  538. " .dataframe tbody tr th:only-of-type {\n",
  539. " vertical-align: middle;\n",
  540. " }\n",
  541. "\n",
  542. " .dataframe tbody tr th {\n",
  543. " vertical-align: top;\n",
  544. " }\n",
  545. "\n",
  546. " .dataframe thead th {\n",
  547. " text-align: right;\n",
  548. " }\n",
  549. "</style>\n",
  550. "<table border=\"1\" class=\"dataframe\">\n",
  551. " <thead>\n",
  552. " <tr style=\"text-align: right;\">\n",
  553. " <th></th>\n",
  554. " <th>siren</th>\n",
  555. " <th>nic</th>\n",
  556. " <th>siret</th>\n",
  557. " <th>statutDiffusionEtablissement</th>\n",
  558. " <th>dateCreationEtablissement</th>\n",
  559. " <th>trancheEffectifsEtablissement</th>\n",
  560. " <th>anneeEffectifsEtablissement</th>\n",
  561. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  562. " <th>dateDernierTraitementEtablissement</th>\n",
  563. " <th>etablissementSiege</th>\n",
  564. " <th>...</th>\n",
  565. " <th>codeCedex2Etablissement</th>\n",
  566. " <th>libelleCedex2Etablissement</th>\n",
  567. " <th>etatAdministratifEtablissement</th>\n",
  568. " <th>enseigne1Etablissement</th>\n",
  569. " <th>enseigne2Etablissement</th>\n",
  570. " <th>enseigne3Etablissement</th>\n",
  571. " <th>denominationUsuelleEtablissement</th>\n",
  572. " <th>activitePrincipaleEtablissement</th>\n",
  573. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  574. " <th>caractereEmployeurEtablissement</th>\n",
  575. " </tr>\n",
  576. " </thead>\n",
  577. " <tbody>\n",
  578. " <tr>\n",
  579. " <th>0</th>\n",
  580. " <td>5620190</td>\n",
  581. " <td>65</td>\n",
  582. " <td>562019000065</td>\n",
  583. " <td>O</td>\n",
  584. " <td>2016-01-01</td>\n",
  585. " <td>21</td>\n",
  586. " <td>2020.0</td>\n",
  587. " <td>NaN</td>\n",
  588. " <td>2022-08-29T09:00:21</td>\n",
  589. " <td>True</td>\n",
  590. " <td>...</td>\n",
  591. " <td>NaN</td>\n",
  592. " <td>NaN</td>\n",
  593. " <td>A</td>\n",
  594. " <td>NaN</td>\n",
  595. " <td>NaN</td>\n",
  596. " <td>NaN</td>\n",
  597. " <td>NaN</td>\n",
  598. " <td>49.39A</td>\n",
  599. " <td>NAFRev2</td>\n",
  600. " <td>O</td>\n",
  601. " </tr>\n",
  602. " <tr>\n",
  603. " <th>1</th>\n",
  604. " <td>5680145</td>\n",
  605. " <td>17</td>\n",
  606. " <td>568014500017</td>\n",
  607. " <td>O</td>\n",
  608. " <td>1956-01-01</td>\n",
  609. " <td>21</td>\n",
  610. " <td>2020.0</td>\n",
  611. " <td>NaN</td>\n",
  612. " <td>2022-10-04T04:12:29</td>\n",
  613. " <td>True</td>\n",
  614. " <td>...</td>\n",
  615. " <td>NaN</td>\n",
  616. " <td>NaN</td>\n",
  617. " <td>A</td>\n",
  618. " <td>NaN</td>\n",
  619. " <td>NaN</td>\n",
  620. " <td>NaN</td>\n",
  621. " <td>NaN</td>\n",
  622. " <td>22.22Z</td>\n",
  623. " <td>NAFRev2</td>\n",
  624. " <td>O</td>\n",
  625. " </tr>\n",
  626. " <tr>\n",
  627. " <th>2</th>\n",
  628. " <td>5720164</td>\n",
  629. " <td>28</td>\n",
  630. " <td>572016400028</td>\n",
  631. " <td>O</td>\n",
  632. " <td>1983-09-28</td>\n",
  633. " <td>22</td>\n",
  634. " <td>2020.0</td>\n",
  635. " <td>NaN</td>\n",
  636. " <td>2022-08-29T09:00:21</td>\n",
  637. " <td>True</td>\n",
  638. " <td>...</td>\n",
  639. " <td>NaN</td>\n",
  640. " <td>NaN</td>\n",
  641. " <td>A</td>\n",
  642. " <td>NaN</td>\n",
  643. " <td>NaN</td>\n",
  644. " <td>NaN</td>\n",
  645. " <td>NaN</td>\n",
  646. " <td>86.10Z</td>\n",
  647. " <td>NAFRev2</td>\n",
  648. " <td>O</td>\n",
  649. " </tr>\n",
  650. " <tr>\n",
  651. " <th>3</th>\n",
  652. " <td>5720784</td>\n",
  653. " <td>31</td>\n",
  654. " <td>572078400031</td>\n",
  655. " <td>O</td>\n",
  656. " <td>1993-04-01</td>\n",
  657. " <td>22</td>\n",
  658. " <td>2020.0</td>\n",
  659. " <td>NaN</td>\n",
  660. " <td>2022-08-29T09:00:21</td>\n",
  661. " <td>False</td>\n",
  662. " <td>...</td>\n",
  663. " <td>NaN</td>\n",
  664. " <td>NaN</td>\n",
  665. " <td>A</td>\n",
  666. " <td>NaN</td>\n",
  667. " <td>NaN</td>\n",
  668. " <td>NaN</td>\n",
  669. " <td>NaN</td>\n",
  670. " <td>25.72Z</td>\n",
  671. " <td>NAFRev2</td>\n",
  672. " <td>O</td>\n",
  673. " </tr>\n",
  674. " <tr>\n",
  675. " <th>4</th>\n",
  676. " <td>5780960</td>\n",
  677. " <td>26</td>\n",
  678. " <td>578096000026</td>\n",
  679. " <td>O</td>\n",
  680. " <td>1981-12-28</td>\n",
  681. " <td>21</td>\n",
  682. " <td>2020.0</td>\n",
  683. " <td>NaN</td>\n",
  684. " <td>2022-08-29T09:00:21</td>\n",
  685. " <td>False</td>\n",
  686. " <td>...</td>\n",
  687. " <td>NaN</td>\n",
  688. " <td>NaN</td>\n",
  689. " <td>A</td>\n",
  690. " <td>HOTEL ROYAL THALASSO</td>\n",
  691. " <td>NaN</td>\n",
  692. " <td>NaN</td>\n",
  693. " <td>NaN</td>\n",
  694. " <td>55.10Z</td>\n",
  695. " <td>NAFRev2</td>\n",
  696. " <td>O</td>\n",
  697. " </tr>\n",
  698. " <tr>\n",
  699. " <th>...</th>\n",
  700. " <td>...</td>\n",
  701. " <td>...</td>\n",
  702. " <td>...</td>\n",
  703. " <td>...</td>\n",
  704. " <td>...</td>\n",
  705. " <td>...</td>\n",
  706. " <td>...</td>\n",
  707. " <td>...</td>\n",
  708. " <td>...</td>\n",
  709. " <td>...</td>\n",
  710. " <td>...</td>\n",
  711. " <td>...</td>\n",
  712. " <td>...</td>\n",
  713. " <td>...</td>\n",
  714. " <td>...</td>\n",
  715. " <td>...</td>\n",
  716. " <td>...</td>\n",
  717. " <td>...</td>\n",
  718. " <td>...</td>\n",
  719. " <td>...</td>\n",
  720. " <td>...</td>\n",
  721. " </tr>\n",
  722. " <tr>\n",
  723. " <th>82442</th>\n",
  724. " <td>998893002</td>\n",
  725. " <td>142</td>\n",
  726. " <td>99889300200142</td>\n",
  727. " <td>O</td>\n",
  728. " <td>2013-05-01</td>\n",
  729. " <td>21</td>\n",
  730. " <td>2020.0</td>\n",
  731. " <td>NaN</td>\n",
  732. " <td>2022-08-29T10:50:43</td>\n",
  733. " <td>False</td>\n",
  734. " <td>...</td>\n",
  735. " <td>NaN</td>\n",
  736. " <td>NaN</td>\n",
  737. " <td>A</td>\n",
  738. " <td>NaN</td>\n",
  739. " <td>NaN</td>\n",
  740. " <td>NaN</td>\n",
  741. " <td>NaN</td>\n",
  742. " <td>49.39B</td>\n",
  743. " <td>NAFRev2</td>\n",
  744. " <td>O</td>\n",
  745. " </tr>\n",
  746. " <tr>\n",
  747. " <th>82443</th>\n",
  748. " <td>999990005</td>\n",
  749. " <td>38</td>\n",
  750. " <td>99999000500038</td>\n",
  751. " <td>O</td>\n",
  752. " <td>1993-07-01</td>\n",
  753. " <td>22</td>\n",
  754. " <td>2020.0</td>\n",
  755. " <td>NaN</td>\n",
  756. " <td>2022-08-29T10:50:43</td>\n",
  757. " <td>True</td>\n",
  758. " <td>...</td>\n",
  759. " <td>NaN</td>\n",
  760. " <td>NaN</td>\n",
  761. " <td>A</td>\n",
  762. " <td>NaN</td>\n",
  763. " <td>NaN</td>\n",
  764. " <td>NaN</td>\n",
  765. " <td>NaN</td>\n",
  766. " <td>49.41A</td>\n",
  767. " <td>NAFRev2</td>\n",
  768. " <td>O</td>\n",
  769. " </tr>\n",
  770. " <tr>\n",
  771. " <th>82444</th>\n",
  772. " <td>999990005</td>\n",
  773. " <td>491</td>\n",
  774. " <td>99999000500491</td>\n",
  775. " <td>O</td>\n",
  776. " <td>2017-04-01</td>\n",
  777. " <td>21</td>\n",
  778. " <td>2020.0</td>\n",
  779. " <td>NaN</td>\n",
  780. " <td>2022-08-29T10:50:43</td>\n",
  781. " <td>False</td>\n",
  782. " <td>...</td>\n",
  783. " <td>NaN</td>\n",
  784. " <td>NaN</td>\n",
  785. " <td>A</td>\n",
  786. " <td>NaN</td>\n",
  787. " <td>NaN</td>\n",
  788. " <td>NaN</td>\n",
  789. " <td>NaN</td>\n",
  790. " <td>49.41A</td>\n",
  791. " <td>NAFRev2</td>\n",
  792. " <td>O</td>\n",
  793. " </tr>\n",
  794. " <tr>\n",
  795. " <th>82445</th>\n",
  796. " <td>999990286</td>\n",
  797. " <td>18</td>\n",
  798. " <td>99999028600018</td>\n",
  799. " <td>O</td>\n",
  800. " <td>1979-11-30</td>\n",
  801. " <td>22</td>\n",
  802. " <td>2020.0</td>\n",
  803. " <td>NaN</td>\n",
  804. " <td>2022-08-29T10:50:43</td>\n",
  805. " <td>True</td>\n",
  806. " <td>...</td>\n",
  807. " <td>NaN</td>\n",
  808. " <td>NaN</td>\n",
  809. " <td>A</td>\n",
  810. " <td>NaN</td>\n",
  811. " <td>NaN</td>\n",
  812. " <td>NaN</td>\n",
  813. " <td>NaN</td>\n",
  814. " <td>55.10Z</td>\n",
  815. " <td>NAFRev2</td>\n",
  816. " <td>O</td>\n",
  817. " </tr>\n",
  818. " <tr>\n",
  819. " <th>82446</th>\n",
  820. " <td>999990369</td>\n",
  821. " <td>87</td>\n",
  822. " <td>99999036900087</td>\n",
  823. " <td>O</td>\n",
  824. " <td>2014-03-31</td>\n",
  825. " <td>21</td>\n",
  826. " <td>2020.0</td>\n",
  827. " <td>NaN</td>\n",
  828. " <td>2022-10-02T03:38:31</td>\n",
  829. " <td>True</td>\n",
  830. " <td>...</td>\n",
  831. " <td>NaN</td>\n",
  832. " <td>NaN</td>\n",
  833. " <td>A</td>\n",
  834. " <td>NaN</td>\n",
  835. " <td>NaN</td>\n",
  836. " <td>NaN</td>\n",
  837. " <td>NaN</td>\n",
  838. " <td>66.30Z</td>\n",
  839. " <td>NAFRev2</td>\n",
  840. " <td>O</td>\n",
  841. " </tr>\n",
  842. " </tbody>\n",
  843. "</table>\n",
  844. "<p>82447 rows × 71 columns</p>\n",
  845. "</div>"
  846. ],
  847. "text/plain": [
  848. " siren nic siret statutDiffusionEtablissement \\\n",
  849. "0 5620190 65 562019000065 O \n",
  850. "1 5680145 17 568014500017 O \n",
  851. "2 5720164 28 572016400028 O \n",
  852. "3 5720784 31 572078400031 O \n",
  853. "4 5780960 26 578096000026 O \n",
  854. "... ... ... ... ... \n",
  855. "82442 998893002 142 99889300200142 O \n",
  856. "82443 999990005 38 99999000500038 O \n",
  857. "82444 999990005 491 99999000500491 O \n",
  858. "82445 999990286 18 99999028600018 O \n",
  859. "82446 999990369 87 99999036900087 O \n",
  860. "\n",
  861. " dateCreationEtablissement trancheEffectifsEtablissement \\\n",
  862. "0 2016-01-01 21 \n",
  863. "1 1956-01-01 21 \n",
  864. "2 1983-09-28 22 \n",
  865. "3 1993-04-01 22 \n",
  866. "4 1981-12-28 21 \n",
  867. "... ... ... \n",
  868. "82442 2013-05-01 21 \n",
  869. "82443 1993-07-01 22 \n",
  870. "82444 2017-04-01 21 \n",
  871. "82445 1979-11-30 22 \n",
  872. "82446 2014-03-31 21 \n",
  873. "\n",
  874. " anneeEffectifsEtablissement \\\n",
  875. "0 2020.0 \n",
  876. "1 2020.0 \n",
  877. "2 2020.0 \n",
  878. "3 2020.0 \n",
  879. "4 2020.0 \n",
  880. "... ... \n",
  881. "82442 2020.0 \n",
  882. "82443 2020.0 \n",
  883. "82444 2020.0 \n",
  884. "82445 2020.0 \n",
  885. "82446 2020.0 \n",
  886. "\n",
  887. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  888. "0 NaN \n",
  889. "1 NaN \n",
  890. "2 NaN \n",
  891. "3 NaN \n",
  892. "4 NaN \n",
  893. "... ... \n",
  894. "82442 NaN \n",
  895. "82443 NaN \n",
  896. "82444 NaN \n",
  897. "82445 NaN \n",
  898. "82446 NaN \n",
  899. "\n",
  900. " dateDernierTraitementEtablissement etablissementSiege ... \\\n",
  901. "0 2022-08-29T09:00:21 True ... \n",
  902. "1 2022-10-04T04:12:29 True ... \n",
  903. "2 2022-08-29T09:00:21 True ... \n",
  904. "3 2022-08-29T09:00:21 False ... \n",
  905. "4 2022-08-29T09:00:21 False ... \n",
  906. "... ... ... ... \n",
  907. "82442 2022-08-29T10:50:43 False ... \n",
  908. "82443 2022-08-29T10:50:43 True ... \n",
  909. "82444 2022-08-29T10:50:43 False ... \n",
  910. "82445 2022-08-29T10:50:43 True ... \n",
  911. "82446 2022-10-02T03:38:31 True ... \n",
  912. "\n",
  913. " codeCedex2Etablissement libelleCedex2Etablissement \\\n",
  914. "0 NaN NaN \n",
  915. "1 NaN NaN \n",
  916. "2 NaN NaN \n",
  917. "3 NaN NaN \n",
  918. "4 NaN NaN \n",
  919. "... ... ... \n",
  920. "82442 NaN NaN \n",
  921. "82443 NaN NaN \n",
  922. "82444 NaN NaN \n",
  923. "82445 NaN NaN \n",
  924. "82446 NaN NaN \n",
  925. "\n",
  926. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  927. "0 A NaN \n",
  928. "1 A NaN \n",
  929. "2 A NaN \n",
  930. "3 A NaN \n",
  931. "4 A HOTEL ROYAL THALASSO \n",
  932. "... ... ... \n",
  933. "82442 A NaN \n",
  934. "82443 A NaN \n",
  935. "82444 A NaN \n",
  936. "82445 A NaN \n",
  937. "82446 A NaN \n",
  938. "\n",
  939. " enseigne2Etablissement enseigne3Etablissement \\\n",
  940. "0 NaN NaN \n",
  941. "1 NaN NaN \n",
  942. "2 NaN NaN \n",
  943. "3 NaN NaN \n",
  944. "4 NaN NaN \n",
  945. "... ... ... \n",
  946. "82442 NaN NaN \n",
  947. "82443 NaN NaN \n",
  948. "82444 NaN NaN \n",
  949. "82445 NaN NaN \n",
  950. "82446 NaN NaN \n",
  951. "\n",
  952. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  953. "0 NaN 49.39A \n",
  954. "1 NaN 22.22Z \n",
  955. "2 NaN 86.10Z \n",
  956. "3 NaN 25.72Z \n",
  957. "4 NaN 55.10Z \n",
  958. "... ... ... \n",
  959. "82442 NaN 49.39B \n",
  960. "82443 NaN 49.41A \n",
  961. "82444 NaN 49.41A \n",
  962. "82445 NaN 55.10Z \n",
  963. "82446 NaN 66.30Z \n",
  964. "\n",
  965. " nomenclatureActivitePrincipaleEtablissement \\\n",
  966. "0 NAFRev2 \n",
  967. "1 NAFRev2 \n",
  968. "2 NAFRev2 \n",
  969. "3 NAFRev2 \n",
  970. "4 NAFRev2 \n",
  971. "... ... \n",
  972. "82442 NAFRev2 \n",
  973. "82443 NAFRev2 \n",
  974. "82444 NAFRev2 \n",
  975. "82445 NAFRev2 \n",
  976. "82446 NAFRev2 \n",
  977. "\n",
  978. " caractereEmployeurEtablissement \n",
  979. "0 O \n",
  980. "1 O \n",
  981. "2 O \n",
  982. "3 O \n",
  983. "4 O \n",
  984. "... ... \n",
  985. "82442 O \n",
  986. "82443 O \n",
  987. "82444 O \n",
  988. "82445 O \n",
  989. "82446 O \n",
  990. "\n",
  991. "[82447 rows x 71 columns]"
  992. ]
  993. },
  994. "execution_count": 7,
  995. "metadata": {},
  996. "output_type": "execute_result"
  997. }
  998. ],
  999. "source": [
  1000. "#retrait des colonnes inutiles\n",
  1001. "entreprise50.drop(['unitePurgeeUniteLegale', 'prenom4UniteLegale', 'pseudonymeUniteLegale', 'libelleCommuneEtranger2Etablissement', 'codePaysEtranger2Etablissement', 'libellePaysEtranger2Etablissement'], axis=1)\n"
  1002. ]
  1003. },
  1004. {
  1005. "cell_type": "code",
  1006. "execution_count": null,
  1007. "id": "f7e8e157",
  1008. "metadata": {},
  1009. "outputs": [],
  1010. "source": [
  1011. "#drop les colonnes inutiles\n",
  1012. "entreprise50.drop([''])"
  1013. ]
  1014. },
  1015. {
  1016. "cell_type": "code",
  1017. "execution_count": 43,
  1018. "id": "8f4fbab9",
  1019. "metadata": {},
  1020. "outputs": [
  1021. {
  1022. "data": {
  1023. "text/plain": [
  1024. "0 False\n",
  1025. "1 False\n",
  1026. "2 False\n",
  1027. "3 False\n",
  1028. "4 False\n",
  1029. " ... \n",
  1030. "82442 False\n",
  1031. "82443 False\n",
  1032. "82444 False\n",
  1033. "82445 False\n",
  1034. "82446 False\n",
  1035. "Length: 82447, dtype: bool"
  1036. ]
  1037. },
  1038. "execution_count": 43,
  1039. "metadata": {},
  1040. "output_type": "execute_result"
  1041. }
  1042. ],
  1043. "source": [
  1044. "#identifier les lignes dupliquées \n",
  1045. "entreprise50.duplicated()"
  1046. ]
  1047. },
  1048. {
  1049. "cell_type": "code",
  1050. "execution_count": 44,
  1051. "id": "cde83087",
  1052. "metadata": {},
  1053. "outputs": [
  1054. {
  1055. "data": {
  1056. "text/html": [
  1057. "<div>\n",
  1058. "<style scoped>\n",
  1059. " .dataframe tbody tr th:only-of-type {\n",
  1060. " vertical-align: middle;\n",
  1061. " }\n",
  1062. "\n",
  1063. " .dataframe tbody tr th {\n",
  1064. " vertical-align: top;\n",
  1065. " }\n",
  1066. "\n",
  1067. " .dataframe thead th {\n",
  1068. " text-align: right;\n",
  1069. " }\n",
  1070. "</style>\n",
  1071. "<table border=\"1\" class=\"dataframe\">\n",
  1072. " <thead>\n",
  1073. " <tr style=\"text-align: right;\">\n",
  1074. " <th></th>\n",
  1075. " <th>siren</th>\n",
  1076. " <th>nic</th>\n",
  1077. " <th>siret</th>\n",
  1078. " <th>statutDiffusionEtablissement</th>\n",
  1079. " <th>dateCreationEtablissement</th>\n",
  1080. " <th>trancheEffectifsEtablissement</th>\n",
  1081. " <th>anneeEffectifsEtablissement</th>\n",
  1082. " <th>activitePrincipaleRegistreMetiersEtablissement</th>\n",
  1083. " <th>dateDernierTraitementEtablissement</th>\n",
  1084. " <th>etablissementSiege</th>\n",
  1085. " <th>...</th>\n",
  1086. " <th>codePaysEtranger2Etablissement</th>\n",
  1087. " <th>libellePaysEtranger2Etablissement</th>\n",
  1088. " <th>etatAdministratifEtablissement</th>\n",
  1089. " <th>enseigne1Etablissement</th>\n",
  1090. " <th>enseigne2Etablissement</th>\n",
  1091. " <th>enseigne3Etablissement</th>\n",
  1092. " <th>denominationUsuelleEtablissement</th>\n",
  1093. " <th>activitePrincipaleEtablissement</th>\n",
  1094. " <th>nomenclatureActivitePrincipaleEtablissement</th>\n",
  1095. " <th>caractereEmployeurEtablissement</th>\n",
  1096. " </tr>\n",
  1097. " </thead>\n",
  1098. " <tbody>\n",
  1099. " <tr>\n",
  1100. " <th>0</th>\n",
  1101. " <td>5620190</td>\n",
  1102. " <td>65</td>\n",
  1103. " <td>562019000065</td>\n",
  1104. " <td>O</td>\n",
  1105. " <td>2016-01-01</td>\n",
  1106. " <td>21</td>\n",
  1107. " <td>2020.0</td>\n",
  1108. " <td>NaN</td>\n",
  1109. " <td>2022-08-29T09:00:21</td>\n",
  1110. " <td>True</td>\n",
  1111. " <td>...</td>\n",
  1112. " <td>NaN</td>\n",
  1113. " <td>NaN</td>\n",
  1114. " <td>A</td>\n",
  1115. " <td>NaN</td>\n",
  1116. " <td>NaN</td>\n",
  1117. " <td>NaN</td>\n",
  1118. " <td>NaN</td>\n",
  1119. " <td>49.39A</td>\n",
  1120. " <td>NAFRev2</td>\n",
  1121. " <td>O</td>\n",
  1122. " </tr>\n",
  1123. " <tr>\n",
  1124. " <th>1</th>\n",
  1125. " <td>5680145</td>\n",
  1126. " <td>17</td>\n",
  1127. " <td>568014500017</td>\n",
  1128. " <td>O</td>\n",
  1129. " <td>1956-01-01</td>\n",
  1130. " <td>21</td>\n",
  1131. " <td>2020.0</td>\n",
  1132. " <td>NaN</td>\n",
  1133. " <td>2022-10-04T04:12:29</td>\n",
  1134. " <td>True</td>\n",
  1135. " <td>...</td>\n",
  1136. " <td>NaN</td>\n",
  1137. " <td>NaN</td>\n",
  1138. " <td>A</td>\n",
  1139. " <td>NaN</td>\n",
  1140. " <td>NaN</td>\n",
  1141. " <td>NaN</td>\n",
  1142. " <td>NaN</td>\n",
  1143. " <td>22.22Z</td>\n",
  1144. " <td>NAFRev2</td>\n",
  1145. " <td>O</td>\n",
  1146. " </tr>\n",
  1147. " <tr>\n",
  1148. " <th>2</th>\n",
  1149. " <td>5720164</td>\n",
  1150. " <td>28</td>\n",
  1151. " <td>572016400028</td>\n",
  1152. " <td>O</td>\n",
  1153. " <td>1983-09-28</td>\n",
  1154. " <td>22</td>\n",
  1155. " <td>2020.0</td>\n",
  1156. " <td>NaN</td>\n",
  1157. " <td>2022-08-29T09:00:21</td>\n",
  1158. " <td>True</td>\n",
  1159. " <td>...</td>\n",
  1160. " <td>NaN</td>\n",
  1161. " <td>NaN</td>\n",
  1162. " <td>A</td>\n",
  1163. " <td>NaN</td>\n",
  1164. " <td>NaN</td>\n",
  1165. " <td>NaN</td>\n",
  1166. " <td>NaN</td>\n",
  1167. " <td>86.10Z</td>\n",
  1168. " <td>NAFRev2</td>\n",
  1169. " <td>O</td>\n",
  1170. " </tr>\n",
  1171. " <tr>\n",
  1172. " <th>3</th>\n",
  1173. " <td>5720784</td>\n",
  1174. " <td>31</td>\n",
  1175. " <td>572078400031</td>\n",
  1176. " <td>O</td>\n",
  1177. " <td>1993-04-01</td>\n",
  1178. " <td>22</td>\n",
  1179. " <td>2020.0</td>\n",
  1180. " <td>NaN</td>\n",
  1181. " <td>2022-08-29T09:00:21</td>\n",
  1182. " <td>False</td>\n",
  1183. " <td>...</td>\n",
  1184. " <td>NaN</td>\n",
  1185. " <td>NaN</td>\n",
  1186. " <td>A</td>\n",
  1187. " <td>NaN</td>\n",
  1188. " <td>NaN</td>\n",
  1189. " <td>NaN</td>\n",
  1190. " <td>NaN</td>\n",
  1191. " <td>25.72Z</td>\n",
  1192. " <td>NAFRev2</td>\n",
  1193. " <td>O</td>\n",
  1194. " </tr>\n",
  1195. " <tr>\n",
  1196. " <th>4</th>\n",
  1197. " <td>5780960</td>\n",
  1198. " <td>26</td>\n",
  1199. " <td>578096000026</td>\n",
  1200. " <td>O</td>\n",
  1201. " <td>1981-12-28</td>\n",
  1202. " <td>21</td>\n",
  1203. " <td>2020.0</td>\n",
  1204. " <td>NaN</td>\n",
  1205. " <td>2022-08-29T09:00:21</td>\n",
  1206. " <td>False</td>\n",
  1207. " <td>...</td>\n",
  1208. " <td>NaN</td>\n",
  1209. " <td>NaN</td>\n",
  1210. " <td>A</td>\n",
  1211. " <td>HOTEL ROYAL THALASSO</td>\n",
  1212. " <td>NaN</td>\n",
  1213. " <td>NaN</td>\n",
  1214. " <td>NaN</td>\n",
  1215. " <td>55.10Z</td>\n",
  1216. " <td>NAFRev2</td>\n",
  1217. " <td>O</td>\n",
  1218. " </tr>\n",
  1219. " <tr>\n",
  1220. " <th>...</th>\n",
  1221. " <td>...</td>\n",
  1222. " <td>...</td>\n",
  1223. " <td>...</td>\n",
  1224. " <td>...</td>\n",
  1225. " <td>...</td>\n",
  1226. " <td>...</td>\n",
  1227. " <td>...</td>\n",
  1228. " <td>...</td>\n",
  1229. " <td>...</td>\n",
  1230. " <td>...</td>\n",
  1231. " <td>...</td>\n",
  1232. " <td>...</td>\n",
  1233. " <td>...</td>\n",
  1234. " <td>...</td>\n",
  1235. " <td>...</td>\n",
  1236. " <td>...</td>\n",
  1237. " <td>...</td>\n",
  1238. " <td>...</td>\n",
  1239. " <td>...</td>\n",
  1240. " <td>...</td>\n",
  1241. " <td>...</td>\n",
  1242. " </tr>\n",
  1243. " <tr>\n",
  1244. " <th>82442</th>\n",
  1245. " <td>998893002</td>\n",
  1246. " <td>142</td>\n",
  1247. " <td>99889300200142</td>\n",
  1248. " <td>O</td>\n",
  1249. " <td>2013-05-01</td>\n",
  1250. " <td>21</td>\n",
  1251. " <td>2020.0</td>\n",
  1252. " <td>NaN</td>\n",
  1253. " <td>2022-08-29T10:50:43</td>\n",
  1254. " <td>False</td>\n",
  1255. " <td>...</td>\n",
  1256. " <td>NaN</td>\n",
  1257. " <td>NaN</td>\n",
  1258. " <td>A</td>\n",
  1259. " <td>NaN</td>\n",
  1260. " <td>NaN</td>\n",
  1261. " <td>NaN</td>\n",
  1262. " <td>NaN</td>\n",
  1263. " <td>49.39B</td>\n",
  1264. " <td>NAFRev2</td>\n",
  1265. " <td>O</td>\n",
  1266. " </tr>\n",
  1267. " <tr>\n",
  1268. " <th>82443</th>\n",
  1269. " <td>999990005</td>\n",
  1270. " <td>38</td>\n",
  1271. " <td>99999000500038</td>\n",
  1272. " <td>O</td>\n",
  1273. " <td>1993-07-01</td>\n",
  1274. " <td>22</td>\n",
  1275. " <td>2020.0</td>\n",
  1276. " <td>NaN</td>\n",
  1277. " <td>2022-08-29T10:50:43</td>\n",
  1278. " <td>True</td>\n",
  1279. " <td>...</td>\n",
  1280. " <td>NaN</td>\n",
  1281. " <td>NaN</td>\n",
  1282. " <td>A</td>\n",
  1283. " <td>NaN</td>\n",
  1284. " <td>NaN</td>\n",
  1285. " <td>NaN</td>\n",
  1286. " <td>NaN</td>\n",
  1287. " <td>49.41A</td>\n",
  1288. " <td>NAFRev2</td>\n",
  1289. " <td>O</td>\n",
  1290. " </tr>\n",
  1291. " <tr>\n",
  1292. " <th>82444</th>\n",
  1293. " <td>999990005</td>\n",
  1294. " <td>491</td>\n",
  1295. " <td>99999000500491</td>\n",
  1296. " <td>O</td>\n",
  1297. " <td>2017-04-01</td>\n",
  1298. " <td>21</td>\n",
  1299. " <td>2020.0</td>\n",
  1300. " <td>NaN</td>\n",
  1301. " <td>2022-08-29T10:50:43</td>\n",
  1302. " <td>False</td>\n",
  1303. " <td>...</td>\n",
  1304. " <td>NaN</td>\n",
  1305. " <td>NaN</td>\n",
  1306. " <td>A</td>\n",
  1307. " <td>NaN</td>\n",
  1308. " <td>NaN</td>\n",
  1309. " <td>NaN</td>\n",
  1310. " <td>NaN</td>\n",
  1311. " <td>49.41A</td>\n",
  1312. " <td>NAFRev2</td>\n",
  1313. " <td>O</td>\n",
  1314. " </tr>\n",
  1315. " <tr>\n",
  1316. " <th>82445</th>\n",
  1317. " <td>999990286</td>\n",
  1318. " <td>18</td>\n",
  1319. " <td>99999028600018</td>\n",
  1320. " <td>O</td>\n",
  1321. " <td>1979-11-30</td>\n",
  1322. " <td>22</td>\n",
  1323. " <td>2020.0</td>\n",
  1324. " <td>NaN</td>\n",
  1325. " <td>2022-08-29T10:50:43</td>\n",
  1326. " <td>True</td>\n",
  1327. " <td>...</td>\n",
  1328. " <td>NaN</td>\n",
  1329. " <td>NaN</td>\n",
  1330. " <td>A</td>\n",
  1331. " <td>NaN</td>\n",
  1332. " <td>NaN</td>\n",
  1333. " <td>NaN</td>\n",
  1334. " <td>NaN</td>\n",
  1335. " <td>55.10Z</td>\n",
  1336. " <td>NAFRev2</td>\n",
  1337. " <td>O</td>\n",
  1338. " </tr>\n",
  1339. " <tr>\n",
  1340. " <th>82446</th>\n",
  1341. " <td>999990369</td>\n",
  1342. " <td>87</td>\n",
  1343. " <td>99999036900087</td>\n",
  1344. " <td>O</td>\n",
  1345. " <td>2014-03-31</td>\n",
  1346. " <td>21</td>\n",
  1347. " <td>2020.0</td>\n",
  1348. " <td>NaN</td>\n",
  1349. " <td>2022-10-02T03:38:31</td>\n",
  1350. " <td>True</td>\n",
  1351. " <td>...</td>\n",
  1352. " <td>NaN</td>\n",
  1353. " <td>NaN</td>\n",
  1354. " <td>A</td>\n",
  1355. " <td>NaN</td>\n",
  1356. " <td>NaN</td>\n",
  1357. " <td>NaN</td>\n",
  1358. " <td>NaN</td>\n",
  1359. " <td>66.30Z</td>\n",
  1360. " <td>NAFRev2</td>\n",
  1361. " <td>O</td>\n",
  1362. " </tr>\n",
  1363. " </tbody>\n",
  1364. "</table>\n",
  1365. "<p>82447 rows × 77 columns</p>\n",
  1366. "</div>"
  1367. ],
  1368. "text/plain": [
  1369. " siren nic siret statutDiffusionEtablissement \\\n",
  1370. "0 5620190 65 562019000065 O \n",
  1371. "1 5680145 17 568014500017 O \n",
  1372. "2 5720164 28 572016400028 O \n",
  1373. "3 5720784 31 572078400031 O \n",
  1374. "4 5780960 26 578096000026 O \n",
  1375. "... ... ... ... ... \n",
  1376. "82442 998893002 142 99889300200142 O \n",
  1377. "82443 999990005 38 99999000500038 O \n",
  1378. "82444 999990005 491 99999000500491 O \n",
  1379. "82445 999990286 18 99999028600018 O \n",
  1380. "82446 999990369 87 99999036900087 O \n",
  1381. "\n",
  1382. " dateCreationEtablissement trancheEffectifsEtablissement \\\n",
  1383. "0 2016-01-01 21 \n",
  1384. "1 1956-01-01 21 \n",
  1385. "2 1983-09-28 22 \n",
  1386. "3 1993-04-01 22 \n",
  1387. "4 1981-12-28 21 \n",
  1388. "... ... ... \n",
  1389. "82442 2013-05-01 21 \n",
  1390. "82443 1993-07-01 22 \n",
  1391. "82444 2017-04-01 21 \n",
  1392. "82445 1979-11-30 22 \n",
  1393. "82446 2014-03-31 21 \n",
  1394. "\n",
  1395. " anneeEffectifsEtablissement \\\n",
  1396. "0 2020.0 \n",
  1397. "1 2020.0 \n",
  1398. "2 2020.0 \n",
  1399. "3 2020.0 \n",
  1400. "4 2020.0 \n",
  1401. "... ... \n",
  1402. "82442 2020.0 \n",
  1403. "82443 2020.0 \n",
  1404. "82444 2020.0 \n",
  1405. "82445 2020.0 \n",
  1406. "82446 2020.0 \n",
  1407. "\n",
  1408. " activitePrincipaleRegistreMetiersEtablissement \\\n",
  1409. "0 NaN \n",
  1410. "1 NaN \n",
  1411. "2 NaN \n",
  1412. "3 NaN \n",
  1413. "4 NaN \n",
  1414. "... ... \n",
  1415. "82442 NaN \n",
  1416. "82443 NaN \n",
  1417. "82444 NaN \n",
  1418. "82445 NaN \n",
  1419. "82446 NaN \n",
  1420. "\n",
  1421. " dateDernierTraitementEtablissement etablissementSiege ... \\\n",
  1422. "0 2022-08-29T09:00:21 True ... \n",
  1423. "1 2022-10-04T04:12:29 True ... \n",
  1424. "2 2022-08-29T09:00:21 True ... \n",
  1425. "3 2022-08-29T09:00:21 False ... \n",
  1426. "4 2022-08-29T09:00:21 False ... \n",
  1427. "... ... ... ... \n",
  1428. "82442 2022-08-29T10:50:43 False ... \n",
  1429. "82443 2022-08-29T10:50:43 True ... \n",
  1430. "82444 2022-08-29T10:50:43 False ... \n",
  1431. "82445 2022-08-29T10:50:43 True ... \n",
  1432. "82446 2022-10-02T03:38:31 True ... \n",
  1433. "\n",
  1434. " codePaysEtranger2Etablissement libellePaysEtranger2Etablissement \\\n",
  1435. "0 NaN NaN \n",
  1436. "1 NaN NaN \n",
  1437. "2 NaN NaN \n",
  1438. "3 NaN NaN \n",
  1439. "4 NaN NaN \n",
  1440. "... ... ... \n",
  1441. "82442 NaN NaN \n",
  1442. "82443 NaN NaN \n",
  1443. "82444 NaN NaN \n",
  1444. "82445 NaN NaN \n",
  1445. "82446 NaN NaN \n",
  1446. "\n",
  1447. " etatAdministratifEtablissement enseigne1Etablissement \\\n",
  1448. "0 A NaN \n",
  1449. "1 A NaN \n",
  1450. "2 A NaN \n",
  1451. "3 A NaN \n",
  1452. "4 A HOTEL ROYAL THALASSO \n",
  1453. "... ... ... \n",
  1454. "82442 A NaN \n",
  1455. "82443 A NaN \n",
  1456. "82444 A NaN \n",
  1457. "82445 A NaN \n",
  1458. "82446 A NaN \n",
  1459. "\n",
  1460. " enseigne2Etablissement enseigne3Etablissement \\\n",
  1461. "0 NaN NaN \n",
  1462. "1 NaN NaN \n",
  1463. "2 NaN NaN \n",
  1464. "3 NaN NaN \n",
  1465. "4 NaN NaN \n",
  1466. "... ... ... \n",
  1467. "82442 NaN NaN \n",
  1468. "82443 NaN NaN \n",
  1469. "82444 NaN NaN \n",
  1470. "82445 NaN NaN \n",
  1471. "82446 NaN NaN \n",
  1472. "\n",
  1473. " denominationUsuelleEtablissement activitePrincipaleEtablissement \\\n",
  1474. "0 NaN 49.39A \n",
  1475. "1 NaN 22.22Z \n",
  1476. "2 NaN 86.10Z \n",
  1477. "3 NaN 25.72Z \n",
  1478. "4 NaN 55.10Z \n",
  1479. "... ... ... \n",
  1480. "82442 NaN 49.39B \n",
  1481. "82443 NaN 49.41A \n",
  1482. "82444 NaN 49.41A \n",
  1483. "82445 NaN 55.10Z \n",
  1484. "82446 NaN 66.30Z \n",
  1485. "\n",
  1486. " nomenclatureActivitePrincipaleEtablissement \\\n",
  1487. "0 NAFRev2 \n",
  1488. "1 NAFRev2 \n",
  1489. "2 NAFRev2 \n",
  1490. "3 NAFRev2 \n",
  1491. "4 NAFRev2 \n",
  1492. "... ... \n",
  1493. "82442 NAFRev2 \n",
  1494. "82443 NAFRev2 \n",
  1495. "82444 NAFRev2 \n",
  1496. "82445 NAFRev2 \n",
  1497. "82446 NAFRev2 \n",
  1498. "\n",
  1499. " caractereEmployeurEtablissement \n",
  1500. "0 O \n",
  1501. "1 O \n",
  1502. "2 O \n",
  1503. "3 O \n",
  1504. "4 O \n",
  1505. "... ... \n",
  1506. "82442 O \n",
  1507. "82443 O \n",
  1508. "82444 O \n",
  1509. "82445 O \n",
  1510. "82446 O \n",
  1511. "\n",
  1512. "[82447 rows x 77 columns]"
  1513. ]
  1514. },
  1515. "execution_count": 44,
  1516. "metadata": {},
  1517. "output_type": "execute_result"
  1518. }
  1519. ],
  1520. "source": [
  1521. "#supprimer les lignes dupliquées\n",
  1522. "entreprise50.drop_duplicates()"
  1523. ]
  1524. },
  1525. {
  1526. "cell_type": "code",
  1527. "execution_count": null,
  1528. "id": "8cdf8558",
  1529. "metadata": {},
  1530. "outputs": [],
  1531. "source": []
  1532. },
  1533. {
  1534. "cell_type": "code",
  1535. "execution_count": 45,
  1536. "id": "22e04dc2",
  1537. "metadata": {},
  1538. "outputs": [
  1539. {
  1540. "name": "stdout",
  1541. "output_type": "stream",
  1542. "text": [
  1543. "DataFrame is written to Excel File successfully.\n"
  1544. ]
  1545. }
  1546. ],
  1547. "source": [
  1548. "#Télécharger le fichier nettoyé en format excel\n",
  1549. "file_name = 'Sirenplus.xlsx'\n",
  1550. "entreprise50.to_excel(file_name)\n",
  1551. "print('DataFrame is written to Excel File successfully.')"
  1552. ]
  1553. },
  1554. {
  1555. "cell_type": "code",
  1556. "execution_count": null,
  1557. "id": "6253e566",
  1558. "metadata": {},
  1559. "outputs": [],
  1560. "source": []
  1561. }
  1562. ],
  1563. "metadata": {
  1564. "kernelspec": {
  1565. "display_name": "Python 3 (ipykernel)",
  1566. "language": "python",
  1567. "name": "python3"
  1568. },
  1569. "language_info": {
  1570. "codemirror_mode": {
  1571. "name": "ipython",
  1572. "version": 3
  1573. },
  1574. "file_extension": ".py",
  1575. "mimetype": "text/x-python",
  1576. "name": "python",
  1577. "nbconvert_exporter": "python",
  1578. "pygments_lexer": "ipython3",
  1579. "version": "3.9.7"
  1580. }
  1581. },
  1582. "nbformat": 4,
  1583. "nbformat_minor": 5
  1584. }