import pandas
import numpy
import matplotlib.pyplot as plt
import seaborn
seaborn.set_style("white")

from prince import PCA

temp = pandas.read_table(
    "https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT5.0Analysis_gl.txt", 
    sep = "\s+", 
    names = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Annual"])
temp = temp.iloc[::2]
temp = temp.iloc[:-1] # suppression de la dernière ligne (i.e. 2024 n'est pas finie)
temp

temp.describe().round(2)

plt.figure(figsize = (16, 8))
seaborn.boxplot(data = temp)
plt.show()

plt.figure(figsize = (16, 8))
plt.axhline(y = 0, linestyle = "dashed", color = "gray", alpha = .5)
plt.axvline(x = 1961, linestyle = "dashed", color = "gray", alpha = .5)
plt.axvline(x = 1990, linestyle = "dashed", color = "gray", alpha = .5)
seaborn.lineplot(x = "Annee", y = "Annual",
                 data = temp.assign(Annee = temp.index).reset_index(drop=True))
plt.show()

temp2 = temp[temp.columns[:12]]
temp2

pca = PCA(n_components = 12)
pca.fit(temp2)

PCA(n_components=12)

PCA(n_components=12)

eig = pandas.DataFrame({
    "Dimension": ["Dim"+str(i+1) for i in range(12)],
    "Valeurs propres": pca.eigenvalues_,
    "% expliquée": pca.percentage_of_variance_,
    "% expliquée cumulée": numpy.cumsum(pca.percentage_of_variance_)
})
eig.round(2)

plt.figure(figsize = (16, 8))
seaborn.barplot(x = "Dimension", y = "% expliquée", data = eig, color = "steelblue")
plt.show()

pca.column_correlations.round(2)

coord_col = pca.column_correlations

fig, axes = plt.subplots(figsize = (10, 10))
fig.suptitle("Cercle des corrélations")
axes.set_xlim(-1, 1)
axes.set_ylim(-1, 1)
axes.axvline(x = 0, color = 'lightgray', linestyle = '--', linewidth = 1)
axes.axhline(y = 0, color = 'lightgray', linestyle = '--', linewidth = 1)
for j in range(12):
    xj = coord_col.iloc[j,0]
    yj = coord_col.iloc[j,1]
    axes.text(xj, yj, coord_col.index[j], size = 25)
    axes.plot([0,xj], [0,yj], color = "gray", linestyle = 'dashed')
plt.gca().add_artist(plt.Circle((0,0),1,color='blue',fill=False))

plt.show()

pca.plot(temp2,
    x_component=0,
    y_component=1,
    show_rows=True,
    show_columns=False)

coord = pca.row_coordinates(temp2)
coord

contrib = pca.row_contributions_
contrib.round(2)

plt.figure(figsize = (16, 10))
seaborn.scatterplot(x = 0, y = 1, data = coord, color = "gray", alpha = .25)
for i in range(coord.shape[0]):
    taille = "small"
    if (contrib.iloc[i,0] > .02):
        taille = 30
    if (contrib.iloc[i,1] > .02):
        taille = 30
    plt.text(coord.iloc[i][0], coord.iloc[i][1], coord.index[i], 
             fontsize = taille, ha = "center", va = "center")

pandas.DataFrame(temp.mean()).transpose().round(3).rename(index = {0: "Moyenne"})

temp.filter(items = [1852, 1877, 1893, 1951], axis = 0)

temp.filter(items = [1878, 1882, 1903, 1912, 1992], axis = 0)

temp.filter(items = [i for i in range(2015, 2022)], axis = 0)

plt.figure(figsize = (16, 4))
g = seaborn.heatmap(temp2.T, cmap = "coolwarm")

from prince import CA

ca = CA()
ca.fit(temp2 + 100)

<prince.ca.CA at 0x13cd60520>

ca_row = ca.row_coordinates(temp2)
ca_col = ca.column_coordinates(temp2)

/usr/local/lib/python3.9/site-packages/prince/ca.py:206: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
  is_sparse = X.dtypes.apply(pd.api.types.is_sparse).all()

plt.figure(figsize = (16, 8))

g = seaborn.scatterplot(x = 0, y = 1, data = ca_row)
for i in range(ca_row.shape[0]):
    plt.text(ca_row.iloc[i][0], ca_row.iloc[i][1], ca_row.index[i], 
             ha = "center", va = "center", fontsize = 15, color = "steelblue")
for i in range(ca_col.shape[0]):
    plt.text(ca_col.iloc[i][0], ca_col.iloc[i][1], 
             ca_col.index[i].replace(" Actifs ayant un emploi RP1968", ""), 
             ha = "center", va = "center", fontsize = 15, color = "red")

ax = plt.gca()
plt.show()

temp.filter(items = [i for i in [1878, 1942, 1943, 1978]], axis = 0)

plt.figure(figsize = (4, 16))
g = seaborn.heatmap(temp2, cmap = "coolwarm")

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
1850	-0.675	-0.333	-0.591	-0.589	-0.509	-0.344	-0.160	-0.208	-0.385	-0.533	-0.283	-0.404	-0.418
1851	-0.201	-0.469	-0.646	-0.542	-0.198	-0.137	-0.097	-0.102	-0.091	-0.008	-0.082	-0.228	-0.233
1852	-0.375	-0.477	-0.560	-0.585	-0.127	-0.084	0.005	-0.136	-0.002	-0.172	-0.305	0.065	-0.229
1853	-0.233	-0.404	-0.280	-0.386	-0.268	-0.142	-0.083	-0.057	-0.250	-0.392	-0.411	-0.337	-0.270
1854	-0.381	-0.361	-0.243	-0.334	-0.289	-0.299	-0.179	-0.239	-0.217	-0.095	-0.410	-0.450	-0.292
...	...	...	...	...	...	...	...	...	...	...	...	...	...
2019	0.800	0.844	1.076	0.939	0.778	0.809	0.857	0.858	0.803	0.956	0.937	1.037	0.891
2020	1.069	1.113	1.094	1.063	0.908	0.825	0.816	0.801	0.867	0.811	1.014	0.693	0.923
2021	0.701	0.565	0.726	0.760	0.706	0.713	0.792	0.799	0.868	0.907	0.854	0.751	0.762
2022	0.779	0.764	0.890	0.770	0.761	0.858	0.778	0.853	0.790	0.929	0.675	0.768	0.801
2023	0.779	0.869	1.125	0.927	0.871	1.052	1.150	1.199	1.352	1.287	1.334	1.259	1.100

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
count	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00	174.00
mean	-0.10	-0.10	-0.12	-0.08	-0.09	-0.06	-0.04	-0.02	-0.04	-0.04	-0.08	-0.11	-0.07
std	0.42	0.43	0.45	0.41	0.38	0.36	0.35	0.36	0.36	0.39	0.41	0.42	0.38
min	-1.04	-0.84	-0.84	-0.66	-0.66	-0.64	-0.59	-0.60	-0.64	-0.68	-0.68	-0.90	-0.60
25%	-0.39	-0.41	-0.43	-0.39	-0.35	-0.31	-0.29	-0.27	-0.30	-0.33	-0.39	-0.41	-0.34
50%	-0.18	-0.22	-0.21	-0.18	-0.17	-0.15	-0.12	-0.11	-0.13	-0.09	-0.14	-0.21	-0.18
75%	0.14	0.12	0.11	0.15	0.12	0.10	0.09	0.13	0.12	0.11	0.07	0.11	0.09
max	1.09	1.22	1.18	1.06	0.91	1.05	1.15	1.20	1.35	1.29	1.33	1.26	1.10

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec
1850	-0.675	-0.333	-0.591	-0.589	-0.509	-0.344	-0.160	-0.208	-0.385	-0.533	-0.283	-0.404
1851	-0.201	-0.469	-0.646	-0.542	-0.198	-0.137	-0.097	-0.102	-0.091	-0.008	-0.082	-0.228
1852	-0.375	-0.477	-0.560	-0.585	-0.127	-0.084	0.005	-0.136	-0.002	-0.172	-0.305	0.065
1853	-0.233	-0.404	-0.280	-0.386	-0.268	-0.142	-0.083	-0.057	-0.250	-0.392	-0.411	-0.337
1854	-0.381	-0.361	-0.243	-0.334	-0.289	-0.299	-0.179	-0.239	-0.217	-0.095	-0.410	-0.450
...	...	...	...	...	...	...	...	...	...	...	...	...
2019	0.800	0.844	1.076	0.939	0.778	0.809	0.857	0.858	0.803	0.956	0.937	1.037
2020	1.069	1.113	1.094	1.063	0.908	0.825	0.816	0.801	0.867	0.811	1.014	0.693
2021	0.701	0.565	0.726	0.760	0.706	0.713	0.792	0.799	0.868	0.907	0.854	0.751
2022	0.779	0.764	0.890	0.770	0.761	0.858	0.778	0.853	0.790	0.929	0.675	0.768
2023	0.779	0.869	1.125	0.927	0.871	1.052	1.150	1.199	1.352	1.287	1.334	1.259

	Dimension	Valeurs propres	% expliquée	% expliquée cumulée
0	Dim1	11.09	92.42	92.42
1	Dim2	0.31	2.59	95.01
2	Dim3	0.13	1.06	96.07
3	Dim4	0.11	0.90	96.97
4	Dim5	0.09	0.71	97.69
5	Dim6	0.07	0.57	98.26
6	Dim7	0.05	0.42	98.68
7	Dim8	0.04	0.36	99.03
8	Dim9	0.04	0.30	99.33
9	Dim10	0.03	0.25	99.59
10	Dim11	0.03	0.22	99.80
11	Dim12	0.02	0.20	100.00

component	0	1	2	3	4	5	6	7	8	9	10	11
variable
Jan	0.93	-0.26	-0.15	0.14	0.02	-0.10	-0.08	0.04	0.01	-0.01	-0.01	0.01
Feb	0.94	-0.27	-0.10	0.01	0.00	0.17	0.08	-0.04	0.01	0.00	0.02	0.00
Mar	0.96	-0.14	0.07	-0.20	-0.08	0.01	-0.09	0.04	0.02	-0.03	0.05	-0.00
Apr	0.97	-0.13	0.08	-0.09	-0.03	-0.04	0.02	-0.02	-0.04	0.05	-0.12	-0.03
May	0.97	-0.07	0.08	-0.01	0.05	-0.13	0.09	-0.06	-0.04	0.01	0.08	0.02
Jun	0.98	0.03	0.10	0.03	0.11	-0.01	0.06	0.05	0.07	-0.10	-0.03	-0.04
Jul	0.97	0.07	0.12	0.07	0.08	0.05	-0.04	0.05	0.06	0.11	0.02	0.02
Aug	0.98	0.10	0.08	0.07	-0.01	0.07	-0.05	-0.01	-0.09	-0.06	-0.02	0.08
Sep	0.97	0.12	0.02	0.11	-0.07	0.03	-0.04	-0.03	-0.05	0.00	0.04	-0.11
Oct	0.97	0.15	-0.03	0.02	-0.13	-0.03	-0.01	-0.10	0.11	-0.01	-0.02	0.03
Nov	0.96	0.18	-0.11	-0.03	-0.10	-0.02	0.10	0.13	-0.02	0.02	0.00	0.01
Dec	0.94	0.21	-0.18	-0.13	0.16	0.00	-0.05	-0.04	-0.02	0.01	-0.01	-0.01

Température HadCRUT¶

Description¶

Evolution annuelle sur la période¶

Analyse¶

Représentation des années¶

Années avec un début d'année chaud et une fin d'année froide¶

Années avec un début d'année froid et une fin d'année chaude¶

Années (beaucoup) plus chaudes que la moyenne¶

Heatmap classique de lévolution pour repérer les années à comportement atypique¶

Et avec une AFC ?¶

component	0	1	2	3	4	5	6	7	8	9	10	11
1850	-3.012003	0.431466	0.098982	0.067951	0.351684	0.776386	0.283599	0.428619	-0.153287	0.164669	0.168333	0.241695
1851	-1.345836	0.930646	-0.244484	0.877566	0.095484	-0.327217	0.146827	0.047136	0.222303	-0.092849	0.229386	0.193750
1852	-1.297763	1.145892	-0.017672	0.485128	0.892949	-0.089029	-0.121513	-0.232531	0.060483	0.023844	0.559803	-0.200627
1853	-1.711012	-0.078725	0.439443	0.231846	0.498018	0.059116	-0.378079	0.288674	-0.123946	-0.158261	0.156853	0.194773
1854	-1.917393	-0.015155	0.328295	-0.011872	-0.298448	0.000950	-0.205764	-0.259100	0.332803	0.062479	0.155142	0.052413
...	...	...	...	...	...	...	...	...	...	...	...	...
2019	8.475412	0.330155	-0.027330	-0.425422	0.042390	0.056811	-0.228674	0.016499	0.122236	0.066653	-0.079167	0.064623
2020	8.726627	-0.751095	0.010678	0.015370	-0.295516	-0.032163	0.217470	0.304661	-0.092436	0.123502	0.022758	-0.139310
2021	7.402119	0.687969	0.207032	0.299010	-0.194183	-0.180343	-0.140000	-0.007238	0.035431	0.106444	-0.033782	-0.072324
2022	7.728384	0.150989	0.344504	0.155171	0.099516	0.005746	-0.218616	-0.225818	0.261071	-0.290057	0.016249	0.049978
2023	10.419954	1.656237	0.185007	0.225360	-0.289773	0.392665	-0.264498	0.112945	0.024298	-0.024447	0.184994	-0.269471

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
1878	-0.070	0.229	0.361	0.189	-0.122	0.010	-0.077	-0.047	0.021	-0.165	-0.143	-0.323	-0.011
1882	-0.025	-0.117	-0.177	-0.315	-0.326	-0.392	-0.341	-0.245	-0.150	-0.399	-0.404	-0.655	-0.296
1903	-0.434	-0.240	-0.398	-0.588	-0.516	-0.548	-0.505	-0.601	-0.560	-0.665	-0.630	-0.714	-0.533
1912	-0.329	-0.315	-0.554	-0.338	-0.335	-0.285	-0.507	-0.572	-0.644	-0.679	-0.558	-0.590	-0.476
1992	0.363	0.348	0.309	0.162	0.168	0.156	0.005	0.038	-0.090	-0.023	-0.037	0.099	0.125

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
2015	0.740	0.782	0.795	0.701	0.731	0.760	0.715	0.799	0.826	1.015	0.996	1.042	0.825
2016	1.088	1.224	1.182	1.030	0.880	0.777	0.778	0.949	0.846	0.819	0.844	0.779	0.933
2017	0.952	1.067	1.065	0.846	0.780	0.658	0.805	0.811	0.729	0.809	0.806	0.815	0.845
2018	0.711	0.796	0.790	0.822	0.713	0.738	0.733	0.735	0.676	0.869	0.745	0.824	0.763
2019	0.800	0.844	1.076	0.939	0.778	0.809	0.857	0.858	0.803	0.956	0.937	1.037	0.891
2020	1.069	1.113	1.094	1.063	0.908	0.825	0.816	0.801	0.867	0.811	1.014	0.693	0.923
2021	0.701	0.565	0.726	0.760	0.706	0.713	0.792	0.799	0.868	0.907	0.854	0.751	0.762

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
1852	-0.375	-0.477	-0.560	-0.585	-0.127	-0.084	0.005	-0.136	-0.002	-0.172	-0.305	0.065	-0.229
1877	-0.411	-0.209	-0.091	-0.346	-0.411	-0.138	-0.016	0.211	0.108	0.113	0.034	-0.056	-0.101
1893	-1.045	-0.845	-0.427	-0.515	-0.572	-0.442	-0.244	-0.287	-0.365	-0.304	-0.405	-0.485	-0.495
1951	-0.360	-0.509	-0.262	-0.108	0.032	0.060	0.039	0.121	0.132	0.124	-0.072	0.069	-0.061

	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec	Annual
1878	-0.070	0.229	0.361	0.189	-0.122	0.010	-0.077	-0.047	0.021	-0.165	-0.143	-0.323	-0.011
1942	0.258	0.017	-0.042	-0.015	0.124	0.030	-0.060	-0.057	-0.013	-0.087	-0.058	-0.080	0.001
1943	-0.183	-0.031	-0.168	0.023	0.036	-0.075	0.018	0.032	-0.013	0.228	0.104	0.106	0.006
1978	0.037	0.074	0.059	0.053	0.012	-0.055	0.016	-0.217	0.042	-0.060	0.092	0.010	0.005