from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.datasets import make_moons
#moons_X: Data, moon_y: Labels
moons_X, moon_y = make_moons(n_samples = 2000)
def add_noise(X,y, noise_level = 0.01):
#The number of points we wish to make noisy
amt_noise = int(noise_level*len(y))
#Pick amt_noise points at random
idx = np.random.choice(len(X), size = amt_noise)
#Add random noise to these selected points
noise = np.random.random((amt_noise, 2) ) -0.5
X[idx,:] += noise
return X
moon_noise_X = add_noise(moons_X, moon_y)
plt.scatter(moon_noise_X[:,0], moon_noise_X[:,1], c = moon_y)
dbsc = DBSCAN(eps = 0.05, min_samples = 10).fit(moon_noise_X)
#Get the cluster labels
labels = dbsc.labels_
#Identify the core and border points
core_samples = np.zeros_like(labels, dtype = bool)
core_samples[dbsc.core_sample_indices_] = True
unique_labels = np.unique(labels)
colors = ["red", "gold", "silver"]
for (label, color) in zip(unique_labels, colors):
class_member_mask = (labels == label)
xy = moon_noise_X[class_member_mask & core_samples]
plt.plot(xy[:,0],xy[:,1], 'o', markerfacecolor = color, markersize = 10)
xy2 = moon_noise_X[class_member_mask & ~core_samples]
plt.plot(xy2[:,0],xy2[:,1], 'o', markerfacecolor = color, markersize = 5)
plt.title("K-Means with two clusters on Half Moons")
Try for wholesale data