import pandas
import numpy
import matplotlib.pyplot as plt
import seaborn
seaborn.set_style("white")
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import scale
iris = pandas.read_table("https://fxjollois.github.io/donnees/Iris.txt", sep = "\t")
iris.head()
Sepal Length | Sepal Width | Petal Length | Petal Width | Species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
iris2 = iris.drop("Species", axis = 1)
iris2.head()
Sepal Length | Sepal Width | Petal Length | Petal Width | |
---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 |
1 | 4.9 | 3.0 | 1.4 | 0.2 |
2 | 4.7 | 3.2 | 1.3 | 0.2 |
3 | 4.6 | 3.1 | 1.5 | 0.2 |
4 | 5.0 | 3.6 | 1.4 | 0.2 |
db = DBSCAN()
db.fit(scale(iris2))
DBSCAN()
pandas.Series(db.labels_).value_counts()
1 71 0 45 -1 34 dtype: int64
iris2.assign(classe = db.labels_).groupby("classe").mean()
Sepal Length | Sepal Width | Petal Length | Petal Width | |
---|---|---|---|---|
classe | ||||
-1 | 6.288235 | 2.982353 | 4.647059 | 1.538235 |
0 | 4.968889 | 3.386667 | 1.471111 | 0.246667 |
1 | 6.184507 | 2.884507 | 4.781690 | 1.640845 |
db2 = DBSCAN(eps = .6, min_samples = 3)
db2.fit(scale(iris2))
DBSCAN(eps=0.6, min_samples=3)
pandas.Series(db2.labels_).value_counts()
1 86 0 49 -1 9 2 3 3 3 dtype: int64
iris2.assign(classe = db2.labels_).groupby("classe").mean()
Sepal Length | Sepal Width | Petal Length | Petal Width | |
---|---|---|---|---|
classe | ||||
-1 | 6.400000 | 2.811111 | 5.022222 | 1.644444 |
0 | 5.016327 | 3.451020 | 1.465306 | 0.244898 |
1 | 6.274419 | 2.910465 | 4.920930 | 1.694186 |
2 | 5.000000 | 2.400000 | 3.200000 | 1.033333 |
3 | 6.166667 | 2.233333 | 4.633333 | 1.433333 |