DE_ASS8
import pandas as pd
import numpy as np
housing = pd.read_csv("HousingData.csv")
housing.columns
housing.dropna(inplace=True)
housing.isnull().sum()
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
sns.heatmap(housing.corr(), annot=True , linewidths=1);
from sklearn.cluster import KMeans
k = 3
data_sample= housing.loc[:,['CRIM','MEDV']]
model = KMeans(n_clusters=3)
model.fit(data_sample)
labels = model.predict(data_sample)
data_sample['Label_data']=labels
data_sample
clusters= {}
for i in range(k):
clusters[i] = []
for i in range(k):
clusters[i].append(data_sample[data_sample['Label_data'] == i])
print(clusters[1][0]['MEDV'])
for i in range(k):
plt.scatter(clusters[i][0]['CRIM'],clusters[i][0]['MEDV'])
plt.show()