Contents

from sklearn.datasets import make_moons
from sklearn.cluster import DBSCAN

import matplotlib.pyplot as plt
import numpy as np
X, y = make_moons(n_samples=1000, noise=0.05, random_state=42)
model1 = DBSCAN(eps=0.05, min_samples=5)
model1.fit(X)
model2 = DBSCAN(eps=0.2, min_samples=5)
model2.fit(X)
def plot_dbscan(dbscan, X, size, show_xlabels=True, show_ylabels=True):
    core_mask = np.zeros_like(dbscan.labels_, dtype=bool)
    core_mask[dbscan.core_sample_indices_] = True
    anomalies_mask = dbscan.labels_ == -1
    non_core_mask = ~(core_mask | anomalies_mask)

    cores = dbscan.components_
    anomalies = X[anomalies_mask]
    non_cores = X[non_core_mask]
    
    plt.scatter(cores[:, 0], cores[:, 1],
                c=dbscan.labels_[core_mask], marker='o', s=size, cmap="Paired") # de epsilon-cirkels rondom de core samples
    plt.scatter(cores[:, 0], cores[:, 1], marker='*', s=20, c=dbscan.labels_[core_mask]) # de core samples
    plt.scatter(anomalies[:, 0], anomalies[:, 1],
                c="r", marker="x", s=100) # de uitbijters
    plt.scatter(non_cores[:, 0], non_cores[:, 1], c=dbscan.labels_[non_core_mask], marker=".") # de niet-core samples
    if show_xlabels:
        plt.xlabel("$x_1$", fontsize=14)
    else:
        plt.tick_params(labelbottom=False)
    if show_ylabels:
        plt.ylabel("$x_2$", fontsize=14, rotation=0)
    else:
        plt.tick_params(labelleft=False)
    plt.title("eps={:.2f}, min_samples={}".format(dbscan.eps, dbscan.min_samples), fontsize=14)
plt.figure(figsize=(9, 3.2))

plt.subplot(121)
plot_dbscan(model1, X, size=100)

plt.subplot(122)
plot_dbscan(model2, X, size=100)

plt.show()
print (f'model1 labels: {model1.labels_[:20]}')
print (f'model1 core_sample_indices: {model1.core_sample_indices_[:20]}')
print (f'model1 components_: {model1.components_[:20]}')
print (f'model2 labels: {model2.labels_[:20]}')
print (f'model2 core_sample_indices: {model2.core_sample_indices_[:20]}')
print (f'model2 components_: {model2.components_[:20]}')