a = 'bonjour'
print(a)

len(a)

bonjour

7


a = 'bonjour'
a[1:5]

'onjo'


a[0:3]

'bon'


a[:3]

'bon'


a[3:len(a)]

'jour'


a[3:]

'jour'


a[::]

'bonjour'


a[::-1]

'ruojnob'


a[::2]

'bnor'


a[1:5:2]

'oj'


a[5:1:-2]

'uj'


a = 'bonjour'
a.upper()

'BONJOUR'


a.capitalize()

'Bonjour'


a.find('j')

3


a.replace('jour', 'soir')

'bonsoir'


a.count('o')

2


a.split('j')

['bon', 'our']


a = (3, 1, 9, 7)
print(a)
a[0]

(3, 1, 9, 7)

3


a = [3, 1, 9, 7]
print(a)
len(a)

[3, 1, 9, 7]

4


a[0]

3


a[1:3]

[1, 9]


a = [3, 1, 9, 7]
a.reverse()
a

[7, 9, 1, 3]


a.sort()
a

[1, 3, 7, 9]


a.sort(reverse=True)
a

[9, 7, 3, 1]


a.pop()

1

a

[9, 7, 3]


a.append(5)
a

[9, 7, 3, 5]


a.insert(0, 6)
a

[6, 9, 7, 3, 5]


a.remove(7)
a

[6, 9, 3, 5]


a = [3, 1, 9, 7]
b = [1, 2]
a + b

[3, 1, 9, 7, 1, 2]


a * 2

[3, 1, 9, 7, 3, 1, 9, 7]


a = [3, 1, 9, 7]
[x**2 for x in a]

[9, 1, 81, 49]


[x**2 for x in a if x >= 4]

[81, 49]


[x**2 if x >= 4 else -x for x in a]

[-3, -1, 81, 49]


a = [1, 2, 3, 4]
b = a
a[0] = 5
b[1] = 9
print(a, b)

[5, 9, 3, 4] [5, 9, 3, 4]


a = [1, 2, 3, 4]
b = a.copy()
a[0] = 5
b[1] = 9
print(a, b)

[5, 2, 3, 4] [1, 9, 3, 4]


a = { 
    "nom": "Jollois", 
    "prenom": "FX", 
    "langues": ["R", "Python", "SQL", "SAS"], 
    "labo": { "nom": "LIPADE", "lieu": "CUSP"}
}
print(a)
len(a)

{'nom': 'Jollois', 'prenom': 'FX', 'langues': ['R', 'Python', 'SQL', 'SAS'], 'labo': {'nom': 'LIPADE', 'lieu': 'CUSP'}}

4


a = { 
    "nom": "Jollois", 
    "prenom": "FX", 
    "langues": ["R", "Python", "SQL", "SAS"], 
    "labo": { "nom": "LIPADE", "lieu": "CUSP"}
}
a["nom"]

'Jollois'


a["langues"]

['R', 'Python', 'SQL', 'SAS']


a["langues"][0]

'R'


a["labo"]

{'nom': 'LIPADE', 'lieu': 'CUSP'}


a["labo"]["lieu"]

'CUSP'


a = { 
    "nom": "Jollois", 
    "prenom": "FX", 
    "langues": ["R", "Python", "SQL", "SAS"], 
    "labo": { "nom": "LIPADE", "lieu": "CUSP"}
}
a["type"] = "MCF"
a.get("nom")

'Jollois'


a.keys()

dict_keys(['nom', 'prenom', 'langues', 'labo', 'type'])


a.values()

dict_values(['Jollois', 'FX', ['R', 'Python', 'SQL', 'SAS'], {'nom': 'LIPADE', 'lieu': 'CUSP'}, 'MCF'])


a.popitem()

('type', 'MCF')

a

{'nom': 'Jollois',
 'prenom': 'FX',
 'langues': ['R', 'Python', 'SQL', 'SAS'],
 'labo': {'nom': 'LIPADE', 'lieu': 'CUSP'}}


a.pop("nom")

'Jollois'

a

{'prenom': 'FX',
 'langues': ['R', 'Python', 'SQL', 'SAS'],
 'labo': {'nom': 'LIPADE', 'lieu': 'CUSP'}}


a = { "nom": "Jollois", "prenom": "FX" }
b = a
b["prenom"] = "Xavier"
print(a, b)
print(a)

{'nom': 'Jollois', 'prenom': 'Xavier'} {'nom': 'Jollois', 'prenom': 'Xavier'}
{'nom': 'Jollois', 'prenom': 'Xavier'}


a = { "nom": "Jollois", "prenom": "FX" }
b = a.copy()
b["prenom"] = "Xavier"
print(a, b)

{'nom': 'Jollois', 'prenom': 'FX'} {'nom': 'Jollois', 'prenom': 'Xavier'}


fruits = ["pommes", "bananes", "poires", "oranges"]
nombres = [5, 2, 10, 4]
{fruits[i]:nombres[i] for i in range(4)}

{'pommes': 5, 'bananes': 2, 'poires': 10, 'oranges': 4}


fruits = ["pommes", "bananes", "poires", "oranges"]
nombres = [5, 2, 10, 4]
dict(zip(fruits, nombres))

{'pommes': 5, 'bananes': 2, 'poires': 10, 'oranges': 4}


a = 3
if (a > 2):
    print("sup")

sup


if (a > 2):
    print("dans le IF")
    print("sup")

dans le IF
sup


if (a > 2):
    print("sup")
elif (a > 0):
    print("mid")
else:
    print("inf")

sup


for i in range(5):
    print(i)

print("dernière valeur de i :", i)

0
1
2
3
4
dernière valeur de i : 4


i = 0
while i < 5:
    print(i)
    i += 1

print("Valeur de i :", i)

0
1
2
3
4
Valeur de i : 5


def pi():
    res = 3.141593
    return res

pi()

3.141593


def afficheBonjour():
    print("Bonjour")

afficheBonjour()

Bonjour


def afficheBonjour(nom):
    print("Bonjour", nom)

afficheBonjour("Jollois")

Bonjour Jollois


def afficheBonjour(nom, prenom):
    print("Bonjour", prenom, nom)

afficheBonjour("Jollois", "FX")
afficheBonjour(nom = "Jollois", prenom = "FX")
afficheBonjour(prenom = "FX", nom = "Jollois")

Bonjour FX Jollois
Bonjour FX Jollois
Bonjour FX Jollois


def afficheBonjour(nom, prenom = "?"):
    print("Bonjour", prenom, nom)

afficheBonjour("Jollois", "FX")
afficheBonjour("Jollois")

Bonjour FX Jollois
Bonjour ? Jollois


def somme(v):
    try:
        res = sum(v)
    except:
        print("Erreur : somme impossible !")
        res = None
    finally:
        return res

a = somme([1, 3, 5])
print(a)
a = somme(["un", 3, 5])
print(a)

9
Erreur : somme impossible !
None


import matplotlib.pyplot
import scipy.stats
import numpy
import pandas
import seaborn

%matplotlib inline


tips = pandas.read_csv("https://fxjollois.github.io/donnees/tips.csv", header = 0, sep = ",")
tips.head()


tips.query('total_bill > 48') # que les factures de plus de 48


tips.query('day.isin(("Sat", "Sun"))') # que les factures ayant eu lieu un samedi ou un dimanche


tips.query('size > 4 & sex == "Male"') # que les tables de plus de 4 convives et payées par un homme


a = 48
tips.query("total_bill > @a") # idem première ligne ci-dessus


tips.filter(["sex", "total_bill"]) # que sex et total_bill donc


tips.filter(like = "ti") # que les variables ayant "ti" dans leur nom


tips.filter(regex = "t.*i") 
# que les variables ayant la lettre "t" puis la lettre "i" (avec ou sans caractères entre)


tips.filter(["sex", "smoker"]).drop_duplicates()


tips.sort_values(by = "total_bill") # Tri par total croissant


tips.sort_values(by = "total_bill", ascending = False) # Tri décroissant


tips.sort_values(by = ["smoker", "total_bill"], ascending = [True, False]) # Tri avec smoker croissant et total décroissant


tips.head() # 5 premières lignes par défaut


tips.head(10) # 10 premières lignes


tips.tail(3) # 3 dernières lignes


tips['n_row'] = range(244)
tips['nouv'] = "nouvelle valeur"
tips.head()


# attention ici, l.size ferait référence à la taille de l, car c'est un mot clé de python
tips.assign(per_person = lambda l: round(l.total_bill / l['size'], 2))


tips.filter(["total_bill", "tip", "size"]).aggregate(["count", "mean"])


tips.filter(["total_bill", "tip", "size"]).mean()

total_bill    19.785943
tip            2.998279
size           2.569672
dtype: float64


tips.filter(["sex", "total_bill", "tip", "size"]).groupby("sex").mean()


tips.filter(["sex", "smoker", "total_bill", "tip", "size"]).groupby(["sex", "smoker"]).mean()


tips2 = tips.melt(id_vars = "n_row")
tips2


pandas.pivot(tips2, index = "n_row", columns = "variable", values = "value")


tips.describe()


tips.describe().round(2)


tips.total_bill.describe()

count    244.000000
mean      19.785943
std        8.902412
min        3.070000
25%       13.347500
50%       17.795000
75%       24.127500
max       50.810000
Name: total_bill, dtype: float64


tips["total_bill"].describe()

count    244.000000
mean      19.785943
std        8.902412
min        3.070000
25%       13.347500
50%       17.795000
75%       24.127500
max       50.810000
Name: total_bill, dtype: float64


tips.total_bill.mean()

19.78594262295082


tips.total_bill.std()

8.902411954856856


tips.total_bill.var()

79.25293861397827


tips.total_bill.min()

3.07


tips.total_bill.max()

50.81


tips.total_bill.median()

17.795


tips.total_bill.quantile([.01, .1, .9, .99])

0.01     7.250
0.10    10.340
0.90    32.235
0.99    48.227
Name: total_bill, dtype: float64


scipy.stats.normaltest(tips.total_bill)

NormaltestResult(statistic=45.11781912347332, pvalue=1.5951078766352608e-10)


scipy.stats.shapiro(tips.total_bill)

ShapiroResult(statistic=0.9197188019752502, pvalue=3.3245434183371003e-10)


tips.total_bill.hist()

<AxesSubplot:>


tips.total_bill.hist(bins = 20)

<AxesSubplot:>


tips.total_bill.plot(kind = "hist")

<AxesSubplot:ylabel='Frequency'>


tips.total_bill.plot(kind = "hist", density = True)

<AxesSubplot:ylabel='Frequency'>


tips.total_bill.plot(kind = "kde")

<AxesSubplot:ylabel='Density'>


# A mettre ensemble pour avoir densité + histogramme sur le même graphique
tips.total_bill.plot(kind = "hist", density = True, color = "lightgrey")
tips.total_bill.plot(kind = "kde")

<AxesSubplot:ylabel='Density'>


seaborn.histplot(tips.total_bill)

<AxesSubplot:xlabel='total_bill', ylabel='Count'>


seaborn.histplot(data = tips, x = "total_bill")

<AxesSubplot:xlabel='total_bill', ylabel='Count'>


seaborn.histplot(data = tips, x = "total_bill", bins = 20)

<AxesSubplot:xlabel='total_bill', ylabel='Count'>


seaborn.histplot(data = tips, x = "total_bill", bins = [0, 10, 25, 60], stat = "density")

<AxesSubplot:xlabel='total_bill', ylabel='Density'>


seaborn.histplot(data = tips, x = "total_bill", kde = True)

<AxesSubplot:xlabel='total_bill', ylabel='Count'>


tips.boxplot()

<AxesSubplot:>


tips.boxplot(column = "total_bill")

<AxesSubplot:>


tips.boxplot(column = "total_bill", grid = False)

<AxesSubplot:>


seaborn.boxplot(x = "total_bill", data = tips)

<AxesSubplot:xlabel='total_bill'>


seaborn.boxplot(y = "total_bill", data = tips)

<AxesSubplot:ylabel='total_bill'>


seaborn.boxplot(x = "total_bill", data = tips, whis = 3)

<AxesSubplot:xlabel='total_bill'>


seaborn.pointplot(x = "total_bill", data = tips)

<AxesSubplot:xlabel='total_bill'>


seaborn.violinplot(x = "total_bill", data = tips)

<AxesSubplot:xlabel='total_bill'>


seaborn.stripplot(x = "total_bill", data = tips, jitter = True)

<AxesSubplot:xlabel='total_bill'>


tips.sex.describe()

count      244
unique       2
top       Male
freq       157
Name: sex, dtype: object


tips.sex.unique()

array(['Female', 'Male'], dtype=object)


tips.sex.value_counts()

Male      157
Female     87
Name: sex, dtype: int64


pandas.crosstab(tips.sex, "freq")


pandas.crosstab(tips.sex, "freq", normalize = True) # Proportion


t = pandas.crosstab(tips.sex, "freq", normalize=True)
scipy.stats.chisquare(t.freq)

Power_divergenceResult(statistic=0.08230314431604406, pvalue=0.774200187925369)


scipy.stats.chisquare(t.freq, (.2, .8))

Power_divergenceResult(statistic=0.1531888269282451, pvalue=0.6955064385613343)


t = pandas.crosstab(tips.sex, "freq")


t.plot.bar()

<AxesSubplot:xlabel='sex'>


t.plot(kind = "bar")

<AxesSubplot:xlabel='sex'>


# En proportion
t = pandas.crosstab(tips.sex, "freq", normalize=True)
t.plot(kind = "bar")

<AxesSubplot:xlabel='sex'>


# En pourcentage
(t * 100).plot(kind = "bar")

<AxesSubplot:xlabel='sex'>


seaborn.countplot(x = "sex", data = tips)

<AxesSubplot:xlabel='sex', ylabel='count'>


# En pourcentage
t = pandas.crosstab(tips.sex, "freq", normalize=True)
t = t.assign(sex = t.index, freq = 100 * t.freq)
seaborn.barplot(x = "sex", y = "freq", data = t)

<AxesSubplot:xlabel='sex', ylabel='freq'>


t = pandas.crosstab(tips.sex, "freq")
t.plot.pie(subplots = True, figsize = (6, 6))

array([<AxesSubplot:ylabel='freq'>], dtype=object)


tips.corr()


tips.total_bill.cov(tips.tip)

8.323501629224854


tips.total_bill.corr(tips.tip)

0.6757341092113641


scipy.stats.pearsonr(tips.total_bill, tips.tip)

(0.6757341092113647, 6.6924706468630016e-34)


scipy.stats.kendalltau(tips.total_bill, tips.tip)

KendalltauResult(correlation=0.517180972142381, pvalue=2.4455728480214792e-32)


tips.plot.scatter("total_bill", "tip")

<AxesSubplot:xlabel='total_bill', ylabel='tip'>


pandas.plotting.scatter_matrix(tips)

/usr/local/lib/python3.9/site-packages/pandas/plotting/_matplotlib/tools.py:400: MatplotlibDeprecationWarning: 
The is_first_col function was deprecated in Matplotlib 3.4 and will be removed two minor releases later. Use ax.get_subplotspec().is_first_col() instead.
  if ax.is_first_col():

array([[<AxesSubplot:xlabel='total_bill', ylabel='total_bill'>,
        <AxesSubplot:xlabel='tip', ylabel='total_bill'>,
        <AxesSubplot:xlabel='size', ylabel='total_bill'>,
        <AxesSubplot:xlabel='n_row', ylabel='total_bill'>],
       [<AxesSubplot:xlabel='total_bill', ylabel='tip'>,
        <AxesSubplot:xlabel='tip', ylabel='tip'>,
        <AxesSubplot:xlabel='size', ylabel='tip'>,
        <AxesSubplot:xlabel='n_row', ylabel='tip'>],
       [<AxesSubplot:xlabel='total_bill', ylabel='size'>,
        <AxesSubplot:xlabel='tip', ylabel='size'>,
        <AxesSubplot:xlabel='size', ylabel='size'>,
        <AxesSubplot:xlabel='n_row', ylabel='size'>],
       [<AxesSubplot:xlabel='total_bill', ylabel='n_row'>,
        <AxesSubplot:xlabel='tip', ylabel='n_row'>,
        <AxesSubplot:xlabel='size', ylabel='n_row'>,
        <AxesSubplot:xlabel='n_row', ylabel='n_row'>]], dtype=object)


seaborn.jointplot(x = "total_bill", y = "tip", data = tips)

<seaborn.axisgrid.JointGrid at 0x1398803d0>


seaborn.jointplot(x = "total_bill", y = "tip", data = tips, kind = "reg")

<seaborn.axisgrid.JointGrid at 0x1398726d0>


seaborn.jointplot(x = "total_bill", y = "tip", data = tips, kind = "hex")

<seaborn.axisgrid.JointGrid at 0x139a9a9d0>


seaborn.jointplot(x = "total_bill", y = "tip", data = tips, kind = "kde")

<seaborn.axisgrid.JointGrid at 0x139be9250>


seaborn.regplot(x = "total_bill", y = "tip", data = tips)

<AxesSubplot:xlabel='total_bill', ylabel='tip'>


seaborn.regplot(x = "total_bill", y = "tip", data = tips, fit_reg = False)

<AxesSubplot:xlabel='total_bill', ylabel='tip'>


seaborn.regplot(x = "total_bill", y = "tip", data = tips, scatter = False)

<AxesSubplot:xlabel='total_bill', ylabel='tip'>


seaborn.pairplot(data = tips, vars = ["total_bill", "tip", "size"])

<seaborn.axisgrid.PairGrid at 0x139e6f6d0>


pandas.crosstab(tips.sex, tips.smoker)


pandas.crosstab(tips.sex, tips.smoker, margins = True)


pandas.crosstab(tips.sex, tips.smoker, normalize = True)


pandas.crosstab(tips.sex, tips.smoker, normalize = "index")


pandas.crosstab(tips.sex, tips.smoker, normalize = "index", margins = True)


pandas.crosstab(tips.sex, tips.smoker, normalize = "columns")


pandas.crosstab(tips.sex, tips.smoker, normalize = "columns", margins = True)


t = pandas.crosstab(tips.sex, tips.smoker)
scipy.stats.chi2_contingency(t)

(0.008763290531773594,
 0.925417020494423,
 1,
 array([[53.84016393, 33.15983607],
        [97.15983607, 59.84016393]]))


t = pandas.crosstab(tips.sex, tips.smoker)
t.plot.bar()

<AxesSubplot:xlabel='sex'>


t = pandas.crosstab(tips.sex, tips.smoker, normalize=True)
t.plot.bar()

<AxesSubplot:xlabel='sex'>


t = pandas.crosstab(tips.sex, tips.smoker, normalize="index")
t.plot.bar(stacked=True)

<AxesSubplot:xlabel='sex'>


t = pandas.crosstab(tips.sex, tips.smoker)
t.plot.pie(subplots=True, figsize = (12, 6))

/usr/local/lib/python3.9/site-packages/pandas/plotting/_matplotlib/tools.py:400: MatplotlibDeprecationWarning: 
The is_first_col function was deprecated in Matplotlib 3.4 and will be removed two minor releases later. Use ax.get_subplotspec().is_first_col() instead.
  if ax.is_first_col():

array([<AxesSubplot:ylabel='No'>, <AxesSubplot:ylabel='Yes'>],
      dtype=object)


seaborn.countplot(x = "sex", hue = "smoker", data = tips)

<AxesSubplot:xlabel='sex', ylabel='count'>


t = pandas.crosstab(tips.sex, tips.smoker, normalize = "columns")
t = t.assign(sex = t.index)
tm = pandas.melt(t, id_vars = "sex")
tm = tm.assign(value = 100 * tm.value)
seaborn.catplot(x = "sex", y = "value", col = "smoker", data = tm, kind = "bar")

<seaborn.axisgrid.FacetGrid at 0x13a3252e0>


tips.groupby("sex").mean()


tips.groupby("sex")["total_bill"].agg([numpy.mean, numpy.std, numpy.median, numpy.min, numpy.max])


billFemale = tips.total_bill[tips.sex == "Female"]

billMale = tips.total_bill[tips.sex == "Male"]

scipy.stats.ttest_ind(billFemale, billMale)

Ttest_indResult(statistic=-2.2777940289803134, pvalue=0.0236116668468594)


billGrouped = [tips.total_bill[tips.sex == s] for s in list(tips.sex.unique())]

scipy.stats.f_oneway(*billGrouped)

F_onewayResult(statistic=5.188345638458361, pvalue=0.023611666846859697)


tips.hist(column = "total_bill", by = "sex")

/usr/local/lib/python3.9/site-packages/pandas/plotting/_matplotlib/tools.py:400: MatplotlibDeprecationWarning: 
The is_first_col function was deprecated in Matplotlib 3.4 and will be removed two minor releases later. Use ax.get_subplotspec().is_first_col() instead.
  if ax.is_first_col():

array([<AxesSubplot:title={'center':'Female'}>,
       <AxesSubplot:title={'center':'Male'}>], dtype=object)


tips.boxplot(by = "sex")

array([[<AxesSubplot:title={'center':'n_row'}, xlabel='[sex]'>,
        <AxesSubplot:title={'center':'size'}, xlabel='[sex]'>],
       [<AxesSubplot:title={'center':'tip'}, xlabel='[sex]'>,
        <AxesSubplot:title={'center':'total_bill'}, xlabel='[sex]'>]],
      dtype=object)


tips.boxplot(column = "total_bill", by = "sex")

<AxesSubplot:title={'center':'total_bill'}, xlabel='sex'>


p = seaborn.FacetGrid(tips, row = "sex")
p.map(seaborn.distplot, "total_bill")

/usr/local/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/usr/local/lib/python3.9/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)

<seaborn.axisgrid.FacetGrid at 0x13a933e80>


seaborn.catplot(x = "sex", y = "total_bill", data = tips, kind = "box")

<seaborn.axisgrid.FacetGrid at 0x13a937730>


seaborn.catplot(x = "sex", y = "total_bill", data = tips, kind = "point", join = False)

<seaborn.axisgrid.FacetGrid at 0x13aa84640>


seaborn.catplot(x = "sex", y = "total_bill", data = tips, kind = "violin")

<seaborn.axisgrid.FacetGrid at 0x13ab2b370>


seaborn.catplot(x = "sex", y = "total_bill", data = tips, kind = "strip")

<seaborn.axisgrid.FacetGrid at 0x13abb1550>


seaborn.catplot(y = "total_bill", data = tips, kind = "box")

<seaborn.axisgrid.FacetGrid at 0x13abeea90>


seaborn.catplot(x = "total_bill", data = tips, kind = "point")

<seaborn.axisgrid.FacetGrid at 0x13ac3d640>


seaborn.catplot(x = "total_bill", data = tips, kind = "violin")

<seaborn.axisgrid.FacetGrid at 0x13ac629d0>


seaborn.catplot(x = "total_bill", data = tips, kind = "strip", jitter = True)

<seaborn.axisgrid.FacetGrid at 0x13ad039a0>


seaborn.catplot(x = "sex", data = tips, kind = "count")

<seaborn.axisgrid.FacetGrid at 0x13ac625e0>


seaborn.catplot(x = "sex", hue = "smoker", data = tips, kind = "count")

<seaborn.axisgrid.FacetGrid at 0x13ad153d0>


seaborn.catplot(x = "sex", col = "smoker", data = tips, kind = "count")

<seaborn.axisgrid.FacetGrid at 0x13ae2e2e0>


t = pandas.crosstab(pandas.cut(tips.total_bill, bins = 6),
                    tips["size"],
                    values = tips.tip, aggfunc = numpy.mean)
seaborn.heatmap(t)

<AxesSubplot:xlabel='size', ylabel='total_bill'>


seaborn.lmplot(x = "total_bill", y = "tip", hue = "sex", col = "sex", data = tips)

<seaborn.axisgrid.FacetGrid at 0x13afbccd0>


p = seaborn.FacetGrid(tips, row = "sex", col = "smoker")
p.map(seaborn.histplot, "total_bill")

<seaborn.axisgrid.FacetGrid at 0x13b078df0>


seaborn.catplot(x = "sex", y = "total_bill", hue = "smoker", data = tips, kind = "box")

<seaborn.axisgrid.FacetGrid at 0x13b1df1c0>


seaborn.catplot(x = "sex", y = "total_bill", hue = "sex", col = "smoker", data = tips, 
                   kind = "point", join = False)

<seaborn.axisgrid.FacetGrid at 0x13b052cd0>


seaborn.catplot(x = "sex", y = "total_bill", hue = "smoker", data = tips, kind = "violin")

<seaborn.axisgrid.FacetGrid at 0x13b2ee490>


seaborn.catplot(x = "sex", y = "total_bill", hue = "smoker", col = "smoker", data = tips, 
                   kind = "strip", jitter = True)

<seaborn.axisgrid.FacetGrid at 0x13b3a8df0>


seaborn.catplot(x = "sex", row = "smoker", col = "time", data = tips, kind = "count")

<seaborn.axisgrid.FacetGrid at 0x13b4dd3a0>


seaborn.catplot(x = "sex", hue = "smoker", col = "time", data = tips, kind = "count")

<seaborn.axisgrid.FacetGrid at 0x13b62d760>


t = pandas.crosstab([tips.smoker, tips.time], tips.sex, normalize = "index")
t = t.reset_index().assign(smoker_time = lambda x: x.smoker + "_" + x.time).drop(columns = ["smoker", "time"])
tm = pandas.melt(t, id_vars = "smoker_time")
tm = tm.assign(value = 100 * tm.value)
seaborn.catplot(x = "smoker_time", y = "value", hue = "sex", data = tm, kind = "bar")

<seaborn.axisgrid.FacetGrid at 0x13aeda0d0>


seaborn.set(font_scale=2, style="white")

p = seaborn.catplot(x = "size", y = "tip", hue = "sex", data = tips, kind = "box", 
                    palette = "Set2", height = 8, aspect = 2, legend = False)

p.fig.suptitle("Taille et pourboire en fonction du sexe")

p.set_axis_labels("Nombre de convives", "Pourboire")

matplotlib.pyplot.legend(title='Sexe', loc='upper right')

matplotlib.pyplot.show()

Exemple de `range()`	Valeurs prises
range(5)	0, 1, 2, 3, 4
range(5, 10)	5, 6, 7, 8, 9
range(5, 10, 2)	5, 7, 9
range(10, 5, -1)	10, 9, 8, 7, 6

Fonction	Commentaire
`tips.head()`	Premières lignes du tableau (5 par défaut)
`tips.shape`	Nombre de lignes et de colonnes
`tips.count()`	Nombre de valeurs non nulles pour chaque colonne
`tips.info()`	Combinaisons de plusieurs infos
`tips.columns`	Noms des colonnes
`list(tips)`	Liste des noms de colonnes

	sex	total_bill
0	Female	16.99
1	Male	10.34
2	Male	21.01
3	Male	23.68
4	Female	24.59
...	...	...
239	Male	29.03
240	Female	27.18
241	Male	22.67
242	Male	17.82
243	Female	18.78

	tip	time
0	1.01	Dinner
1	1.66	Dinner
2	3.50	Dinner
3	3.31	Dinner
4	3.61	Dinner
...	...	...
239	5.92	Dinner
240	2.00	Dinner
241	2.00	Dinner
242	1.75	Dinner
243	3.00	Dinner

	total_bill	tip	time
0	16.99	1.01	Dinner
1	10.34	1.66	Dinner
2	21.01	3.50	Dinner
3	23.68	3.31	Dinner
4	24.59	3.61	Dinner
...	...	...	...
239	29.03	5.92	Dinner
240	27.18	2.00	Dinner
241	22.67	2.00	Dinner
242	17.82	1.75	Dinner
243	18.78	3.00	Dinner

	total_bill	tip	sex	smoker	day	time	size
59	48.27	6.73	Male	No	Sat	Dinner	4
156	48.17	5.00	Male	No	Sun	Dinner	6
170	50.81	10.00	Male	Yes	Sat	Dinner	3
212	48.33	9.00	Male	No	Sat	Dinner	4

	total_bill	tip	sex	smoker	day	time	size
141	34.30	6.7	Male	No	Thur	Lunch	6
142	41.19	5.0	Male	No	Thur	Lunch	5
156	48.17	5.0	Male	No	Sun	Dinner	6
185	20.69	5.0	Male	No	Sun	Dinner	5
187	30.46	2.0	Male	Yes	Sun	Dinner	5
216	28.15	3.0	Male	Yes	Sat	Dinner	5

	total_bill	tip	sex	smoker	day	time	size
67	3.07	1.00	Female	Yes	Sat	Dinner	1
92	5.75	1.00	Female	Yes	Fri	Dinner	2
111	7.25	1.00	Female	No	Sat	Dinner	1
172	7.25	5.15	Male	Yes	Sun	Dinner	2
149	7.51	2.00	Male	No	Thur	Lunch	2
...	...	...	...	...	...	...	...
182	45.35	3.50	Male	Yes	Sun	Dinner	3
156	48.17	5.00	Male	No	Sun	Dinner	6
59	48.27	6.73	Male	No	Sat	Dinner	4
212	48.33	9.00	Male	No	Sat	Dinner	4
170	50.81	10.00	Male	Yes	Sat	Dinner	3

	total_bill	tip	size
count	244.000000	244.000000	244.000000
mean	19.785943	2.998279	2.569672

	total_bill	tip	size
sex
Female	18.056897	2.833448	2.459770
Male	20.744076	3.089618	2.630573

		total_bill	tip	size
sex	smoker
Female	No	18.105185	2.773519	2.592593
Female	Yes	17.977879	2.931515	2.242424
Male	No	19.791237	3.113402	2.711340
Male	Yes	22.284500	3.051167	2.500000

variable	day	nouv	sex	size	smoker	time	tip	total_bill
n_row
0	Sun	nouvelle valeur	Female	2	No	Dinner	1.01	16.99
1	Sun	nouvelle valeur	Male	3	No	Dinner	1.66	10.34
2	Sun	nouvelle valeur	Male	3	No	Dinner	3.50	21.01
3	Sun	nouvelle valeur	Male	2	No	Dinner	3.31	23.68
4	Sun	nouvelle valeur	Female	4	No	Dinner	3.61	24.59
...	...	...	...	...	...	...	...	...
239	Sat	nouvelle valeur	Male	3	No	Dinner	5.92	29.03
240	Sat	nouvelle valeur	Female	2	Yes	Dinner	2.00	27.18
241	Sat	nouvelle valeur	Male	2	Yes	Dinner	2.00	22.67
242	Sat	nouvelle valeur	Male	2	No	Dinner	1.75	17.82
243	Thur	nouvelle valeur	Female	2	No	Dinner	3.00	18.78

	total_bill	tip	size	n_row
count	244.00	244.00	244.00	244.00
mean	19.79	3.00	2.57	121.50
std	8.90	1.38	0.95	70.58
min	3.07	1.00	1.00	0.00
25%	13.35	2.00	2.00	60.75
50%	17.80	2.90	2.00	121.50
75%	24.13	3.56	3.00	182.25
max	50.81	10.00	6.00	243.00

	total_bill	tip	size	n_row
total_bill	1.000000	0.675734	0.598315	0.044526
tip	0.675734	1.000000	0.489299	-0.026709
size	0.598315	0.489299	1.000000	0.008061
n_row	0.044526	-0.026709	0.008061	1.000000

smoker	No	Yes
sex
Female	0.620690	0.379310
Male	0.617834	0.382166
All	0.618852	0.381148

smoker	No	Yes	All
sex
Female	0.357616	0.354839	0.356557
Male	0.642384	0.645161	0.643443

	mean	std	median	amin	amax
sex
Female	18.056897	8.009209	16.40	3.07	44.30
Male	20.744076	9.246469	18.35	7.25	50.81

Introduction à Python¶

Intérêt de Python¶

Eléments de base¶

Chaînes de caractères¶

Fonctions sur les chaînes¶

Tuples¶

Listes¶

Fonctions sur les listes¶

Opérateurs sur les listes¶

List comprehension¶

Passage de référence¶

Dictionnaires¶

Accès aux éléments¶

Fonctions sur les dictionnaires¶

Passage de référence¶

dict comprehension¶

Traitement conditionnel - if¶

Traitement itératif - for¶

Traitement itératif - while¶

Fonctions¶

Paramètres des fonctions¶

Gestion des erreurs¶

Utilisation de Python pour l'ingénierie de la donnée¶

Modules utiles¶

Données utilisées¶

Manipulation de données¶

Restriction¶

Projection¶

Tri et limitation des résultats¶

Ajout de nouvelles variables¶

Agrégat¶

Modification du format d'un data frame (reshaping)¶

Statistiques descriptives¶

Quantitative¶

Histogramme¶

Boîtes à moustaches¶

Qualitative¶

Diagramme en barres¶

Diagramme circulaire¶

Quantitative - quantitative¶

Nuage de points¶

Qualitative - qualitative¶

Diagramme en barres¶

Qualitative - quantitative¶

Représentations graphiques¶

Compléments sur seaborn¶

Représentation de 3 variables quantitatives¶

Représentation de 2 variables quantitatives avec 1 qualitative¶

Représentation d' 1 variable quantitative et de 2 variables qualitatives¶

Représentation de 3 variables qualitatives¶

Compléments sur seaborn¶

Traitement conditionnel - `if`¶

Traitement itératif - `for`¶

Traitement itératif - `while`¶

Compléments sur `seaborn`¶

Compléments sur `seaborn`¶