OneCompiler

DE_ASS8

1679

import pandas as pd
import numpy as np
housing = pd.read_csv("HousingData.csv")
housing.columns

housing.dropna(inplace=True)
housing.isnull().sum()

import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
sns.heatmap(housing.corr(), annot=True , linewidths=1);

from sklearn.cluster import KMeans
k = 3
data_sample= housing.loc[:,['CRIM','MEDV']]
model = KMeans(n_clusters=3)
model.fit(data_sample)
labels = model.predict(data_sample)

data_sample['Label_data']=labels
data_sample

clusters= {}
for i in range(k):
clusters[i] = []
for i in range(k):
clusters[i].append(data_sample[data_sample['Label_data'] == i])
print(clusters[1][0]['MEDV'])

for i in range(k):
plt.scatter(clusters[i][0]['CRIM'],clusters[i][0]['MEDV'])
plt.show()