ML


#p1.Write a Python Program to Prepare Scatter Plot on Iris Sataset
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('iris.csv')
plt.scatter(df['SepalLengthCm'],df['SepalWidthCm'])
plt.xlabel('SepalLengthCm')
plt.ylabel('SepalWidthCm')
plt.show()



P2 Remove Null Values

import pandas as pd

Assuming your CSV file is named 'iris.csv'

df = pd.read_csv('iris.csv')

Display original dataframe information

print("Original DataFrame:")
print(df.info())

Check for null values

null_values = df.isnull().sum()

Display null values

print("\nNull values in each column:")
print(null_values)

Remove rows with null values (if any)

df_cleaned = df.dropna()

Display cleaned dataframe

print("\nDataFrame after removing null values:")
print(df_cleaned)



P3 Catogorical to Numerical

import pandas as pd
from sklearn.preprocessing import LabelEncoder

Assuming your CSV file is named 'your_dataset.csv'

df = pd.read_csv('iris.csv')

Convert categorical values to numeric

df = df.apply(lambda x: LabelEncoder().fit_transform(x) if x.dtype == 'object' else x)

Display the updated DataFrame

print(df)



P4 Linear Regression

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

Load data

data = pd.read_csv('housing_data.csv')

Features and target variable

X = data[['area (in cm)', 'no of bedrooms']]

y = data['price in lakhs']

Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Create, train, and evaluate the model

model = LinearRegression().fit(X_train, y_train)

mse = mean_squared_error(y_test, model.predict(X_test))

print(f'Mean Squared Error: {mse}')

Plot actual vs predicted prices

plt.scatter(X_test['area (in cm)'], y_test, color='black', label='Actual Prices')

plt.scatter(X_test['area (in cm)'], model.predict(X_test), color='blue', label='Predicted Prices')

plt.xlabel('Area (in cm)')

plt.ylabel('Price (in lakhs)')

plt.title('Linear Regression: Actual vs Predicted Prices')

plt.legend()

plt.show()



#P5 Multiple Linear Regression

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score

Load the dataset

data = pd.read_csv('Slip.csv')

Features and target variable

Replace 'feature1', 'feature2', 'feature3' with actual feature names or column indices

X = data[['feature1', 'feature2', 'feature3']]

y = data['target_variable']

Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Check if the test set has at least two samples

if len(y_test) < 2:

print("Warning: Test set has less than two samples. R-squared score is not well-defined.")

else:

# Create, train, and evaluate the model

model = LinearRegression().fit(X_train, y_train)

y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

# Print results

print(f'Mean Squared Error: {mse}')

print(f'R-squared: {r2}')

print('Coefficients:', model.coef_)

print('Intercept:', model.intercept_)



#P6 Decision Tree

import pandas as pd

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

Load the dataset from CSV

tennis_data = pd.read_csv('tennis.csv') # Replace 'tennis_dataset.csv' with your actual filename

Convert categorical features to numerical using one-hot encoding

tennis_data = pd.get_dummies(tennis_data, columns=['Outlook', 'Temperature', 'Humidity', 'Wind'], drop_first=True)

Features and target variable

X = tennis_data.drop('PlayTennis_Yes', axis=1)

y = tennis_data['PlayTennis_Yes']

Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Create and train the Decision Tree model

model = DecisionTreeClassifier()

model.fit(X_train, y_train)

Make predictions

y_pred = model.predict(X_test)

Evaluate the model

accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')

Example prediction for new data

new_data = {

'Outlook_Overcast': [1],

'Temperature_Mild': [1],

'Humidity_Normal': [1],

'Wind_Weak': [1]

}

Convert categorical features to numerical using one-hot encoding

new_df = pd.get_dummies(pd.DataFrame(new_data), drop_first=True)

Reorder columns to match the order during training

new_df = new_df.reindex(columns=X.columns, fill_value=0)

Now you can make predictions

prediction = model.predict(new_df)

print(f'Prediction for new data: {"Yes" if prediction[0] else "No"}')