In [1]:
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
/home/black/miniconda3/envs/x/lib/python3.7/site-packages/sklearn/utils/__init__.py:4: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
  from collections import Sequence
/home/black/miniconda3/envs/x/lib/python3.7/site-packages/sklearn/model_selection/_split.py:18: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
  from collections import Iterable
/home/black/miniconda3/envs/x/lib/python3.7/site-packages/sklearn/model_selection/_search.py:16: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
  from collections import Mapping, namedtuple, defaultdict, Sequence
In [2]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [3]:
from sklearn.datasets import make_moons
#moons_X: Data, moon_y: Labels
moons_X, moon_y = make_moons(n_samples = 2000)
In [4]:
def add_noise(X,y, noise_level = 0.01):
    #The number of points we wish to make noisy
    amt_noise = int(noise_level*len(y))
    #Pick amt_noise points at random
    idx = np.random.choice(len(X), size = amt_noise)
    #Add random noise to these selected points
    noise = np.random.random((amt_noise, 2) ) -0.5
    X[idx,:] += noise
    return X    
In [5]:
moon_noise_X = add_noise(moons_X, moon_y)
In [6]:
plt.scatter(moon_noise_X[:,0], moon_noise_X[:,1], c = moon_y)
Out[6]:
<matplotlib.collections.PathCollection at 0x7fb260f98610>
In [7]:
dbsc = DBSCAN(eps = 0.05, min_samples = 10).fit(moon_noise_X)
#Get the cluster labels
labels = dbsc.labels_
#Identify the core and border points
core_samples = np.zeros_like(labels, dtype = bool)
core_samples[dbsc.core_sample_indices_] = True
In [8]:
unique_labels = np.unique(labels)
colors = ["red", "gold", "silver"]
In [9]:
for (label, color) in zip(unique_labels, colors):
    class_member_mask = (labels == label)
    xy = moon_noise_X[class_member_mask & core_samples]
    plt.plot(xy[:,0],xy[:,1], 'o', markerfacecolor = color, markersize = 10)
    
    xy2 = moon_noise_X[class_member_mask & ~core_samples]
    plt.plot(xy2[:,0],xy2[:,1], 'o', markerfacecolor = color, markersize = 5)
    
plt.title("K-Means with two clusters on Half Moons")
Out[9]:
Text(0.5, 1.0, 'K-Means with two clusters on Half Moons')

Try for wholesale data

In [ ]: