Ad Code

Responsive Advertisement

Machine Learning Program / Project - 06

Question 06: Implement K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset. Determine the number of clusters using the elbow method.
Download hole Program / Project code, by clicking following link:
Question ?
Answer
Programming Code:
Following code write in: ML_P06.py
# ML Project Program 06 

# K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("./sales_data_sample_dataset/sales_data_sample.csv", encoding = 'latin1')
data
data.info()
data.describe()
data.columns
data.shape
data = data[['QUANTITYORDERED', 'ORDERLINENUMBER']]
new_data = data.dropna(axis = 0)
from sklearn.cluster import KMeans
import seaborn as sns

wcss = []

for i in range (1, 11):
    clustering = KMeans(n_clusters = i, init = 'k-means++', random_state=42 )
    clustering.fit(data)
    wcss.append(clustering.inertia_)
    
ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sns.lineplot(x = ks, y = wcss)
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (15, 5))
sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering')
sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method')

new_data.describe().T
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
scaled = ss.fit_transform(new_data,)
wcss_sc = []
for i in range(1, 11):
    clustering_sc = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    clustering_sc.fit(scaled)
    wcss_sc.append(clustering_sc.inertia_)
    
ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sns.lineplot(x = ks, y = wcss_sc)

fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = (15, 5))
sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering')
sns.scatterplot(ax = axes[2], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method')
sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering_sc.labels_).set_title('Using Elbow Clustering Method & Scaled Data')

# Thanks For Reading.
Output:

Post a Comment

0 Comments

Ad Code

Responsive Advertisement