ML
#p1.Write a Python Program to Prepare Scatter Plot on Iris Sataset
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('iris.csv')
plt.scatter(df['SepalLengthCm'],df['SepalWidthCm'])
plt.xlabel('SepalLengthCm')
plt.ylabel('SepalWidthCm')
plt.show()
P2 Remove Null Values
import pandas as pd
Assuming your CSV file is named 'iris.csv'
df = pd.read_csv('iris.csv')
Display original dataframe information
print("Original DataFrame:")
print(df.info())
Check for null values
null_values = df.isnull().sum()
Display null values
print("\nNull values in each column:")
print(null_values)
Remove rows with null values (if any)
df_cleaned = df.dropna()
Display cleaned dataframe
print("\nDataFrame after removing null values:")
print(df_cleaned)
P3 Catogorical to Numerical
import pandas as pd
from sklearn.preprocessing import LabelEncoder
Assuming your CSV file is named 'your_dataset.csv'
df = pd.read_csv('iris.csv')
Convert categorical values to numeric
df = df.apply(lambda x: LabelEncoder().fit_transform(x) if x.dtype == 'object' else x)
Display the updated DataFrame
print(df)
P4 Linear Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
Load data
data = pd.read_csv('housing_data.csv')
Features and target variable
X = data[['area (in cm)', 'no of bedrooms']]
y = data['price in lakhs']
Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Create, train, and evaluate the model
model = LinearRegression().fit(X_train, y_train)
mse = mean_squared_error(y_test, model.predict(X_test))
print(f'Mean Squared Error: {mse}')
Plot actual vs predicted prices
plt.scatter(X_test['area (in cm)'], y_test, color='black', label='Actual Prices')
plt.scatter(X_test['area (in cm)'], model.predict(X_test), color='blue', label='Predicted Prices')
plt.xlabel('Area (in cm)')
plt.ylabel('Price (in lakhs)')
plt.title('Linear Regression: Actual vs Predicted Prices')
plt.legend()
plt.show()
#P5 Multiple Linear Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
Load the dataset
data = pd.read_csv('Slip.csv')
Features and target variable
Replace 'feature1', 'feature2', 'feature3' with actual feature names or column indices
X = data[['feature1', 'feature2', 'feature3']]
y = data['target_variable']
Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Check if the test set has at least two samples
if len(y_test) < 2:
print("Warning: Test set has less than two samples. R-squared score is not well-defined.")
else:
# Create, train, and evaluate the model
model = LinearRegression().fit(X_train, y_train)
y_pred = model.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Print results
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
#P6 Decision Tree
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
Load the dataset from CSV
tennis_data = pd.read_csv('tennis.csv') # Replace 'tennis_dataset.csv' with your actual filename
Convert categorical features to numerical using one-hot encoding
tennis_data = pd.get_dummies(tennis_data, columns=['Outlook', 'Temperature', 'Humidity', 'Wind'], drop_first=True)
Features and target variable
X = tennis_data.drop('PlayTennis_Yes', axis=1)
y = tennis_data['PlayTennis_Yes']
Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Create and train the Decision Tree model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
Make predictions
y_pred = model.predict(X_test)
Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
Example prediction for new data
new_data = {
'Outlook_Overcast': [1],
'Temperature_Mild': [1],
'Humidity_Normal': [1],
'Wind_Weak': [1]
}
Convert categorical features to numerical using one-hot encoding
new_df = pd.get_dummies(pd.DataFrame(new_data), drop_first=True)
Reorder columns to match the order during training
new_df = new_df.reindex(columns=X.columns, fill_value=0)
Now you can make predictions
prediction = model.predict(new_df)
print(f'Prediction for new data: {"Yes" if prediction[0] else "No"}')