k means - Posts - OneCompiler

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

df_cust=pd.read_csv("Mall_Customers.csv")
df_cust.head()

df_cust.corr()
df_cust.columns

df_cust.drop(["CustomerID"],axis=1,inplace=True)

df_cust.plot.scatter(x='Age',y='Spending Score (1-100)')

sns.countplot(x='Genre',data=df_cust)

plt.figure(figsize=(12,10))
sns.countplot(x='Age',data=df_cust)

from sklearn.cluster import KMeans

X=df_cust[["Annual Income (k$)","Spending Score (1-100)"]]

wcss=[]
for i in range(1,11):
km=KMeans(n_clusters=i)
km.fit(X)
wcss.append(km.inertia_)

km1=KMeans(n_clusters=5)

km1.fit(X)

y=km1.predict(X)

df_cust["label"] = y

df_cust.head()

plt.figure(figsize=(10,6))
sns.scatterplot(x = 'Annual Income (k $)',y = 'Spending Score (1-100)',hue="label", palette=['green','orange','brown','dodgerblue','red'], legend='full',data = df_cust ,s = 60 ) plt.xlabel('Annual Income (k$ )')
plt.ylabel('Spending Score (1-100)')
plt.title('Spending Score (1-100) vs Annual Income (k$)')
plt.show()

from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
y_hc = hc.fit_predict(X)
y_hc