Question 06: Implement K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset. Determine the number of clusters using the elbow method. Download hole Program / Project code, by clicking following link: Question ? Answer Programming Code: Following code write in: ML_P06.py # ML Project Program 06
# K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("./sales_data_sample_dataset/sales_data_sample.csv", encoding = 'latin1')
data
data.info()
data.describe()
data.columns
data.shape
data = data[['QUANTITYORDERED', 'ORDERLINENUMBER']]
new_data = data.dropna(axis = 0)
from sklearn.cluster import KMeans
import seaborn as sns
wcss = []
for i in range (1, 11):
clustering = KMeans(n_clusters = i, init = 'k-means++', random_state=42 )
clustering.fit(data)
wcss.append(clustering.inertia_)
ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sns.lineplot(x = ks, y = wcss)
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (15, 5))
sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering')
sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method')
new_data.describe().T
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
scaled = ss.fit_transform(new_data,)
wcss_sc = []
for i in range(1, 11):
clustering_sc = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
clustering_sc.fit(scaled)
wcss_sc.append(clustering_sc.inertia_)
ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sns.lineplot(x = ks, y = wcss_sc)
fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = (15, 5))
sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering')
sns.scatterplot(ax = axes[2], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method')
sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering_sc.labels_).set_title('Using Elbow Clustering Method & Scaled Data')
# Thanks For Reading.
Output:
# ML Project Program 06 # K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset import numpy as np import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv("./sales_data_sample_dataset/sales_data_sample.csv", encoding = 'latin1') data data.info() data.describe() data.columns data.shape data = data[['QUANTITYORDERED', 'ORDERLINENUMBER']] new_data = data.dropna(axis = 0) from sklearn.cluster import KMeans import seaborn as sns wcss = [] for i in range (1, 11): clustering = KMeans(n_clusters = i, init = 'k-means++', random_state=42 ) clustering.fit(data) wcss.append(clustering.inertia_) ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sns.lineplot(x = ks, y = wcss) fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (15, 5)) sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering') sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method') new_data.describe().T from sklearn.preprocessing import StandardScaler ss = StandardScaler() scaled = ss.fit_transform(new_data,) wcss_sc = [] for i in range(1, 11): clustering_sc = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) clustering_sc.fit(scaled) wcss_sc.append(clustering_sc.inertia_) ks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sns.lineplot(x = ks, y = wcss_sc) fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize = (15, 5)) sns.scatterplot(ax = axes[0], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER').set_title('without clustering') sns.scatterplot(ax = axes[2], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering.labels_).set_title('Using Elbow Clustering Method') sns.scatterplot(ax = axes[1], data = new_data, x = 'QUANTITYORDERED', y = 'ORDERLINENUMBER', hue = clustering_sc.labels_).set_title('Using Elbow Clustering Method & Scaled Data') # Thanks For Reading.
Output:
0 Comments