fichier = open("Iris.txt")
lignesBrutes = fichier.readlines()
fichier.close()

lignesBrutes[:5]

['"Sepal Length"\t"Sepal Width"\t"Petal Length"\t"Petal Width"\t"Species"\n',
 '5.1\t3.5\t1.4\t0.2\tsetosa\n',
 '4.9\t3\t1.4\t0.2\tsetosa\n',
 '4.7\t3.2\t1.3\t0.2\tsetosa\n',
 '4.6\t3.1\t1.5\t0.2\tsetosa\n']

# Etape par étape
lignes = []
for l in lignesBrutes:
    la = l.replace('"', "")
    lb = la.replace("\n", "")
    lc = lb.split("\t")
    lignes.append(lc)
lignes[:5]

[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'],
 ['5.1', '3.5', '1.4', '0.2', 'setosa'],
 ['4.9', '3', '1.4', '0.2', 'setosa'],
 ['4.7', '3.2', '1.3', '0.2', 'setosa'],
 ['4.6', '3.1', '1.5', '0.2', 'setosa']]

# En une seule ligne
lignes = [l.replace("\n", "").replace('"', "").split("\t") for l in lignesBrutes]
lignes[:5]

[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'],
 ['5.1', '3.5', '1.4', '0.2', 'setosa'],
 ['4.9', '3', '1.4', '0.2', 'setosa'],
 ['4.7', '3.2', '1.3', '0.2', 'setosa'],
 ['4.6', '3.1', '1.5', '0.2', 'setosa']]

def str2float(x):
    try:
        r = float(x)
    except: 
        r = x
    finally:
        return r

print(str2float("5.2"))
print(str2float("setosa"))

5.2
setosa

noms = lignes[0]
iris = [dict(zip(noms, map(str2float, l))) for l in lignes[1:]]
iris[:5]

[{'Sepal Length': 5.1,
  'Sepal Width': 3.5,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.9,
  'Sepal Width': 3.0,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.7,
  'Sepal Width': 3.2,
  'Petal Length': 1.3,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.6,
  'Sepal Width': 3.1,
  'Petal Length': 1.5,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 5.0,
  'Sepal Width': 3.6,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'}]

ratio = list(map(lambda x: (x["Sepal Length"] * x["Sepal Length"]) / (x["Petal Length"] * x["Petal Width"]), iris))
ratio[:5]

[92.89285714285714,
 85.75000000000003,
 84.96153846153847,
 70.53333333333332,
 89.28571428571429]

setosa = list(filter(lambda i: i["Species"] == "setosa", iris))
setosa[:5]

[{'Sepal Length': 5.1,
  'Sepal Width': 3.5,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.9,
  'Sepal Width': 3.0,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.7,
  'Sepal Width': 3.2,
  'Petal Length': 1.3,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 4.6,
  'Sepal Width': 3.1,
  'Petal Length': 1.5,
  'Petal Width': 0.2,
  'Species': 'setosa'},
 {'Sepal Length': 5.0,
  'Sepal Width': 3.6,
  'Petal Length': 1.4,
  'Petal Width': 0.2,
  'Species': 'setosa'}]

import functools

somme = functools.reduce(lambda a, b: {k:(a.get(k)+b.get(k)) for k in vars[:-1]}, iris)
somme

{'Sepal Length': 876.5000000000002,
 'Sepal Width': 458.60000000000014,
 'Petal Length': 563.7000000000004,
 'Petal Width': 179.90000000000012}

moyenne = dict(zip(somme.keys(), [round(x / 150, 2) for x in somme.values()]))
moyenne

{'Sepal Length': 5.84,
 'Sepal Width': 3.06,
 'Petal Length': 3.76,
 'Petal Width': 1.2}

dict(zip(vars[:-1], [round(x / 150, 2) for x in functools.reduce(lambda a, b: {k:(a.get(k)+b.get(k)) for k in vars[:-1]}, iris).values()]))

{'Sepal Length': 5.84,
 'Sepal Width': 3.06,
 'Petal Length': 3.76,
 'Petal Width': 1.2}

especes = list(set([i['Species'] for i in iris]))
especes.sort()
especes

['setosa', 'versicolor', 'virginica']

groupes = [list(filter(lambda i: i['Species'] == s, iris)) for s in especes]
len(groupes)

3

sommes = [functools.reduce(lambda a, b: {k:(a.get(k)+b.get(k)) for k in vars[:-1]}, g) for g in groupes]
sommes

[{'Sepal Length': 250.29999999999998,
  'Sepal Width': 171.40000000000003,
  'Petal Length': 73.10000000000001,
  'Petal Width': 12.299999999999995},
 {'Sepal Length': 296.8,
  'Sepal Width': 138.50000000000003,
  'Petal Length': 212.99999999999997,
  'Petal Width': 66.3},
 {'Sepal Length': 329.3999999999999,
  'Sepal Width': 148.7,
  'Petal Length': 277.59999999999997,
  'Petal Width': 101.29999999999998}]

tailles = [len(g) for g in groupes]
tailles

[50, 50, 50]

moyennes = [{cle:round(sommes[i][cle] / tailles[i], 2) for cle in sommes[i]} for i in range(len(especes))]
moyennes

[{'Sepal Length': 5.01,
  'Sepal Width': 3.43,
  'Petal Length': 1.46,
  'Petal Width': 0.25},
 {'Sepal Length': 5.94,
  'Sepal Width': 2.77,
  'Petal Length': 4.26,
  'Petal Width': 1.33},
 {'Sepal Length': 6.59,
  'Sepal Width': 2.97,
  'Petal Length': 5.55,
  'Petal Width': 2.03}]

dict(zip(especes, moyennes))

{'setosa': {'Sepal Length': 5.01,
  'Sepal Width': 3.43,
  'Petal Length': 1.46,
  'Petal Width': 0.25},
 'versicolor': {'Sepal Length': 5.94,
  'Sepal Width': 2.77,
  'Petal Length': 4.26,
  'Petal Width': 1.33},
 'virginica': {'Sepal Length': 6.59,
  'Sepal Width': 2.97,
  'Petal Length': 5.55,
  'Petal Width': 2.03}}

dict(zip(set([d["Species"] for d in iris]), [dict(zip(noms[:-1], [sum([e[nom] for e in filter(lambda x: x["Species"] == s, iris)]) / len(list(filter(lambda x: x["Species"] == s, iris))) for nom in noms[:-1]])) for s in list(set([d["Species"] for d in iris]))]))

{'setosa': {'Sepal Length': 5.005999999999999,
  'Sepal Width': 3.428000000000001,
  'Petal Length': 1.4620000000000002,
  'Petal Width': 0.2459999999999999},
 'virginica': {'Sepal Length': 6.587999999999998,
  'Sepal Width': 2.9739999999999998,
  'Petal Length': 5.552,
  'Petal Width': 2.026},
 'versicolor': {'Sepal Length': 5.936,
  'Sepal Width': 2.7700000000000005,
  'Petal Length': 4.26,
  'Petal Width': 1.3259999999999998}}

Séance 1 - correction¶

1- Télécharger le fichier et importer les lignes dans python comme ci-dessus¶

2- Visualiser les 5 premières lignes¶

3- A partir de la liste de chaînes obtenue, créer une liste de 151 listes¶

4- Créer une fonction permettant de transformer une chaîne en réel¶

5- Créer une liste de 150 dictionnaires¶

6- Calculer pour chaque iris le rapport entre la surface d'un pétale et la surface d'un sépale¶

7- Créer une sous-liste ne contenant que les iris setosa¶

8- Calculer la moyenne de chaque variable¶

En une ligne, pour le fun ;o)¶

9- Calculer la moyenne de chaque variable pour chaque espèce¶

En une ligne, pour le fun ;o)¶