Browse Source

version beta

beta
l_facheux 2 years ago
parent
commit
6f2da2f5f7
  1. 70
      Colonnes.py
  2. 16
      Doublons.py
  3. 1386
      Table_final.csv
  4. 19
      Values.py
  5. 69
      main.py
  6. 4
      requirements.txt

70
Colonnes.py

@ -0,0 +1,70 @@
#Importation des données dont nous aurons besoin
from typing import List
import pandas as pd
import numpy as np
import requests
import csv
import re
#Afficher les tableaux de données
datafram : pd.read_csv("C:\\Users\\luigg\\Data_cleaning\\Table_final.csv")
datafram.head(5)
#Supprimer les colonnes inutilisées ou non pertinentes
to_drop : [''identifiant
','adresse
','commune
','coordonnees_x
','coordonnees_y
','code_epsg
','code_ape
','libelle_ape
','code_eprtr
','libelle_eprtr
','sigleUniteLegale_imp
','activitePrincipaleUniteLegale_imp
','Catégorie_entreprise_imp
','numeroVoieEtablissement_imp
','typeVoieEtablissement_imp
','libelleVoieEtablissement_imp
','libelleCommuneEtablissement_imp
','codeCommuneEtablissement_imp
','adresse_imp
','geo_imp
','com_code_imp
','code_commune_imp
','Code Officiel_EPCI_imp
','Code_Officiel_region_imp
','codenaffix_imp
','Intitule_NAF_imp
','groupe_imp
','division_imp
','nom_etablissement_tndan
','code_operation_eliminatio_valorisation_tndan
','libelle_operation_eliminatio_valorisation_tndan
','code_departement_tndan
','pays_tndan
','pays_pdan
','code_dechet_pdan
','libelle_dechet_pdan
','quantite_pdan
','unite_pdan
','code_operation_eliminatio_valorisation_pndan
','libelle_operation_eliminatio_valorisation_pndan
','code_departement_pndan
','pays_pndan
','code_dechet_pndan
','libelle_dechet_pndan
','quantite_pndan
','unite_pndan
','code_operation_eliminatio_valorisation_tdan
','libelle_operation_eliminatio_valorisation_tdan
','code_departement_tdan
','pays_tdan
','code_dechet_tdan
','libelle_dechet_tdan
']
datafram.drop(to_drop, inplace = True, axis = 1)
datafram.head(5)

16
Doublons.py

@ -0,0 +1,16 @@
import panda as pd
import numpy as np
import csv
import re
#Afficher les tableaux de données
datafram = pd.read_csv(r"C:\Users\luigg\Data_cleaning\Table_final.csv")
datafram.head(5)
#Supprimer les doublons dans excel
nouvelle_table = datafram.drop_duplicates(
subset = ['order_id', 'customer_id'],
keep = 'last').reset_index(drop = True)
#Afficher la nouvelle table
print(nouvelle_table)

1386
Table_final.csv
File diff suppressed because it is too large
View File

19
Values.py

@ -0,0 +1,19 @@
import pandas as pd
import numpy as np
import csv
#Afficher les tableaux de données
table = pd.read_csv("C:\Users\luigg\Data_cleaning\Table_final.csv")
table.head(5)
#Remplacer les valeurs des lignes
Replace_values = {0: 'Non', 1: 'Oui'}
table = table.replace({"engagement_manifeste_imp
","engagement_data_imp
","prelevements_eaux_souterraines_pre
","prelevements_mer_pre
": replace_values})
table.head(5)

69
main.py

@ -0,0 +1,69 @@
#Importation des données dont nous aurons besoin
import pandas as pd
import numpy as np
import csv
import re
#Afficher les tableaux de données
df = pd.read_csv("C:\Users\luigg\Data_cleaning\Table_final.csv")
df.head(5)
#Supprimer les colonnes inutilisées ou non pertinentes
to_drop = ['identifiant
','adresse
','commune
','coordonnees_x
','coordonnees_y
','code_epsg
','code_ape
','libelle_ape
','code_eprtr
','libelle_eprtr
','sigleUniteLegale_imp
','activitePrincipaleUniteLegale_imp
','Catégorie_entreprise_imp
','numeroVoieEtablissement_imp
','typeVoieEtablissement_imp
','libelleVoieEtablissement_imp
','libelleCommuneEtablissement_imp
','codeCommuneEtablissement_imp
','adresse_imp
','geo_imp
','com_code_imp
','code_commune_imp
','Code Officiel_EPCI_imp
','Code_Officiel_region_imp
','codenaffix_imp
','Intitule_NAF_imp
','groupe_imp
','division_imp
','nom_etablissement_tndan
','code_operation_eliminatio_valorisation_tndan
','libelle_operation_eliminatio_valorisation_tndan
','code_departement_tndan
','pays_tndan
','pays_pdan
','code_dechet_pdan
','libelle_dechet_pdan
','quantite_pdan
','unite_pdan
','code_operation_eliminatio_valorisation_pndan
','libelle_operation_eliminatio_valorisation_pndan
','code_departement_pndan
','pays_pndan
','code_dechet_pndan
','libelle_dechet_pndan
','quantite_pndan
','unite_pndan
','code_operation_eliminatio_valorisation_tdan
','libelle_operation_eliminatio_valorisation_tdan
','code_departement_tdan
','pays_tdan
','code_dechet_tdan
','libelle_dechet_tdan
']
df.drop(to_drop, inplace = True, axis = 1)
df.head(5)

4
requirements.txt

@ -0,0 +1,4 @@
pandas
numpy
csv
re
Loading…
Cancel
Save