k means
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
df_cust=pd.read_csv("Mall_Customers.csv")
df_cust.head()
df_cust.corr()
df_cust.columns
df_cust.drop(["CustomerID"],axis=1,inplace=True)
df_cust.plot.scatter(x='Age',y='Spending Score (1-100)')
sns.countplot(x='Genre',data=df_cust)
plt.figure(figsize=(12,10))
sns.countplot(x='Age',data=df_cust)
from sklearn.cluster import KMeans
X=df_cust[["Annual Income (k$)","Spending Score (1-100)"]]
wcss=[]
for i in range(1,11):
km=KMeans(n_clusters=i)
km.fit(X)
wcss.append(km.inertia_)
km1=KMeans(n_clusters=5)
km1.fit(X)
y=km1.predict(X)
df_cust["label"] = y
df_cust.head()
plt.figure(figsize=(10,6))
sns.scatterplot(x = 'Annual Income (k)')
plt.ylabel('Spending Score (1-100)')
plt.title('Spending Score (1-100) vs Annual Income (k$)')
plt.show()
from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
y_hc = hc.fit_predict(X)
y_hc
plt.figure(figsize=(10,6))
sns.scatterplot(x = 'Annual Income (k)')
plt.ylabel('Spending Score (1-100)')
plt.title('Spending Score (1-100) vs Annual Income (k$)')
plt.show()