OneCompiler

ML

220

Q7. KNN

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Load the Forge dataset from the CSV file

data = pd.read_csv('Social_Network_Ads.csv')

Display the first few rows of the dataset

print("Dataset:")

print(data.head())

Check the columns in the dataset

print("\nColumn names:", data.columns)

One-hot encode the 'Gender' column

data = pd.get_dummies(data, columns=['Gender'], drop_first=True)

Specify the target column

target_column = 'Purchased'

Separate features and target variable

X = data.drop(target_column, axis=1)

y = data[target_column]

Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Create and train the k-nearest neighbors classifier

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_train, y_train)

Make predictions on the testing set

y_pred = knn.predict(X_test)

Evaluate the model

accuracy = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

class_report = classification_report(y_test, y_pred)

Display the evaluation results

print("\nAccuracy:", accuracy)

print("\nConfusion Matrix:")

print(conf_matrix)

print("\nClassification Report:")

print(class_report)



Q8. K-Means Algo..

import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.datasets import make_blobs

Generate a synthetic dataset

X, y = make_blobs(n_samples=300, centers=4, random_state=42)

Plot the synthetic dataset

plt.scatter(X[:, 0], X[:, 1], s=50, cmap='viridis')

plt.title("Synthetic Dataset")

plt.xlabel("Feature 1")

plt.ylabel("Feature 2")

plt.show()

Apply k-means algorithm

kmeans = KMeans(n_clusters=4, random_state=42)

kmeans.fit(X)

Get cluster centers and labels

centers = kmeans.cluster_centers_

labels = kmeans.labels_

Plot the data points with cluster centers

plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis')

plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75, marker='X', label='Cluster Centers')

plt.title("K-Means Clustering")

plt.xlabel("Feature 1")

plt.ylabel("Feature 2")

plt.legend()

plt.show()