ALL<ML>
MACHINE LEARNING PRACTICAL
- Write a python program to Prepare Scatter Plot (Use Forge Dataset / Iris
Dataset)
Ans:
import pandas as pd
import matplotlib.pyplot as plt
iris=pd.read_csv('IRIS.csv')
print(iris.head(20))
plt.plot(iris.id,iris["sepal_length"],"r--")
plt.show
iris.plot(kind="scatter",x="sepal_length",y="petal_length")
plt.show() - Write a python program to find all null values in a given data set and
remove them.
Ans:
import numpy as np
import pandas as pd
dict={"first score":[100,90,np.nan,95],"second score":[30,45,56,np.nan],"third
score":[np.nan,40,80,98]}
df=pd.DataFrame(dict)
print(df)
x=df.isnull()
print(x)
y=df.notnull()
print(y)
z=df.fillna(0)
print(z)
s=df.fillna(method="pad")
print(s)
a=df.fillna(method="bfill")
print(a)
b=df.replace(to_replace=np.nan,value=-99)
print(b)
c=df.dropna()
print(c)
d=df.dropna(axis=1) - Write a python program the Categorical values in numeric format for a
given dataset
Ans:
import pandas as pd
df = pd.read_csv("playtennis.csv")
print(df)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
label = le.fit_transform(df['Play Tennis'])
print(label)
df.drop("Play Tennis" , axis = 1 , inplace = True)
df["Play Tennis"] = label
print(df) - Write a python program to implement simple Linear Regression for
predicting house price
Ans:
#simple Linear
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict
data = pd.read_csv(r'kc_house_data.csv')
data.head(5)
print(data.shape)
Make a list of importatnt feature which is needed to be incuding in training
data
f = ['price', 'bedrooms', 'bathrooms', 'sqft_living', 'floors', 'condition',
'sqft_above', 'sqft_basement', 'yr_built',
'yr_renovated']
data = data[f]
print(data.shape)
Drop the missing values
data = data.dropna()
print(data.shape)
Get the statictial information of the dataset
data.describe()
Now,Divide the dataset into two parts:independent variable and dependent
variable
X = data[f[1:]]
y = data['price']
Split the dataset into training data and testing data
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2,
random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
Fit the regression model
lr = LinearRegression() # Create object of linear regression class
lr.fit(X_train,y_train) #fit training data
print(lr.coef_)
Create the Prediction
y_test_predict = lr.predict(X_test)
print(y_test_predict.shape)
Plot the error
g=plt.plot((y_test - y_test_predict),marker='o',linestyle='')
plt.show()
5. Write a python program to implement multiple Linear Regression for a
given dataset.
Ans:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Load the dataset
dataset = pd.read_csv('50_Startups.csv')
Extract features (x) and target variable (y)
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
Perform one-hot encoding on the categorical variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])],
remainder='passthrough')
x = np.array(ct.fit_transform(x))
Split the dataset into training set and test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
random_state=0)
Create and fit the Multiple Linear Regression model
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)
Predict the test set results
y_pred = regressor.predict(x_test)
Create a DataFrame to compare real values and predicted values
df = pd.DataFrame({'Real Values': y_test, 'Predicted Values': y_pred})
print(df)
Plot the real values and predicted values
plt.scatter(y_test, y_pred)
plt.xlabel('Real Values')
plt.ylabel('Predicted Values')
plt.title('Real Values vs Predicted Values')
plt.show()
6. Write a python program to implement Polynomial Regression for given
dataset.
Ans:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset=pd.read_csv('Position_Salaries.csv')
x=dataset.iloc[:,1:-1].values
y=dataset.iloc[:,-1].values
print(dataset.head(5))
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
p_r=PolynomialFeatures(degree=4)
x_poly=p_r.fit_transform(x)
lin_reg=LinearRegression()
lin_reg.fit(x_poly,y)
LinearRegression()
y_pred=lin_reg.predict(x_poly)
df=pd.DataFrame({'Real Values':y,'Predicted Values':y_pred})
print(df)
x_grid=np.arange(min(x),max(x),0.1)
x_grid=x_grid.reshape((len(x_grid),1))
plt.scatter(x,y,color='yellow')
plt.scatter(x,y_pred,color='red')
plt.plot(x_grid,lin_reg.predict(p_r.fit_transform(x_grid)),color='black')
plt.title('Polynomial Regression')
plt.xlabel('position level')
plt.ylabel('Salary')
plt.show()
7. A).Write a python program to implement Logistic Regression for Iris
dataset
Ans:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
iris=datasets.load_iris()
#what kind of data is present in iris dataset
print(list(iris.keys()))
#print data
print(iris['data'])
print(iris['target'])
#print data and corrensponding targets of that data
print(iris['DESCR'])
print(iris['data'].shape)
#it will print only value of all rows of 3rd column i. e. petal width
x=iris["data"][:,3:]
print(x)
y=(iris["target"]==2)
print(y)
y=(iris["target"]==2).astype(np.int64)
print(y)
clf=LogisticRegression()
clf.fit(x,y)
To check iris vergina 0 means no and 1 means yes
example=clf.predict(([[1.6]]))
print(example)
example=clf.predict(([[2.6]]))
print(example)
Use matplotlib to plot the visualization
x_new=np.linspace(0,3,1000).reshape(-1,1)
print(x_new)
y_prob=clf.predict_proba(x_new)
print(y_prob)
plt.plot(x_new,y_prob[:,1],"g-",label="verginica")
plt.show()
B).Write a python program to Implement Naïve Bayes
Ans:
import numpy as np
from sklearn.naive_bayes import GaussianNB
x_train=np.array([[1,2],[3,4],[5,6],[7,8]])
y_train=np.array([1,1,2,2])
x_test=np.array([[9,10],[11,12],[13,14],[15,16]])
classifire=GaussianNB()
classifire.fit(x_train,y_train)
y_pred=classifire.predict(x_test)
print(y_pred)
print("accuracy:",classifire.score(x_test,y_pred))
8. Write a python program to Implement Decision Tree whether or not to
play tennis
Ans:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
df= pd.read_csv('PlayTennis.csv')
print(df)
x=df(['outlook','temprature','humidity','windy'])
y=df['Play']
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(x,y)
tree.plot_tree(clf)
9. Write a python program to Implement Random Forest for iris Dataset
Ans:
import pandas as pd
import matplotlib.pyplot as plt
#import seaborn as sns
import sklearn
import warnings
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')
df= pd.read_csv('Salaries.csv')
print(df)
df.info()
Assuming df is your DataFrame
X = df.iloc[:,1:2].values #features
y = df.iloc[:,2].values # Target variable
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
Check for and handle categorical variables
label_encoder = LabelEncoder()
x_categorical =
df.select_dtypes(include=['object']).apply(label_encoder.fit_transform)
x_numerical = df.select_dtypes(exclude=['object']).values
x = pd.concat([pd.DataFrame(x_numerical), x_categorical], axis=1).values
Fitting Random Forest Regression to the dataset
regressor = RandomForestRegressor(n_estimators=10, random_state=0,
oob_score=True)
Fit the regressor with x and y data
regressor.fit(x, y)
Evaluating the model
from sklearn.metrics import mean_squared_error, r2_score
Access the OOB Score
oob_score = regressor.oob_score_
print(f'Out-of-Bag Score: {oob_score}')
Making predictions on the same data or new data
predictions = regressor.predict(x)
Evaluating the model
mse = mean_squared_error(y, predictions)
print(f'Mean Squared Error: {mse}')
r2 = r2_score(y, predictions)
print(f'R-squared: {r2}')
import numpy as np
X_grid = np.arange(min(X),max(X),0.01)
X_grid = X_grid.reshape(len(X_grid),1)
plt.scatter(X,y, color='blue') #plotting real points
plt.plot(X_grid, regressor.predict(X_grid),color='green') #plotting for predict
points
plt.title("Random Forest Regression Results")
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
Assuming regressor is your trained Random Forest model
Pick one tree from the forest, e.g., the first tree (index 0)
tree_to_plot = regressor.estimators_[0]
Plot the decision tree
plt.figure(figsize=(20, 10))
plot_tree(tree_to_plot, feature_names=df.columns.tolist(), filled=True,
rounded=True, fontsize=10)
plt.title("Decision Tree from Random Forest")
plt.show()
10. Write a python program to implement linear SVM.
Ans:
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
x=np.array([[1,2],[5,8],[8,8],[1,0.6]])
y=[1,0,1,0]
clf=svm.SVC(kernel='linear',C=1.0).fit(x,y)
print(clf.predict([[0.56,0.76]]))
w=clf.coef_[0]
print(w)
a=-w[0]/w[1]
xx=np.linspace(0,12)
yy=a*xx-clf.intercept_[0]/w[1]
plt.plot(xx,yy,'k-',label="non weighted div")
plt.scatter(x[:,0],x[:,1],c=y)
plt.legend()
plt.show()
- Write a python program to find Decision boundary by using a neural
network with 10 hidden units on two moons dataset
Ans: - Write a python program to generate frequent itemset and association rule
by applying apriori algorithm on Market basket dataset
Ans:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder
transactions=[['eggs','milk','bread'],
['eggs','apple'],
['milk','bread'],
['apple','milk'],
['milk','apple','bread']]
te=TransactionEncoder()
te_array=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_array,columns=te.columns_)
print("Encoded dataset")
print(df)
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print("\n Frequent ItemSet")
print(freq_items)
rules=association_rules(freq_items,metric="support",min_threshold=0.05)
rules=rules.sort_values(['support','confidence'],ascending=[False,False])
print("\n Association rules")
print(rules)
13. Write a python program to implement k-nearest Neighbours ML algorithm
to build prediction model (Use Forge Dataset)
Ans:
14. Write a python program to implement Agglomerative clustering on a
synthetic dataset
Ans:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler,normalize
import scipy.cluster.hierarchy as shc
Load dataset
X = pd.read_csv('CC GENERAL.csv')
Drop unnecessary column
X = X.drop('CUST_ID', axis=1)
Handle missing values
X.fillna(method='ffill', inplace=True)
Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_normalized = normalize(X_scaled)
Apply PCA
pca = PCA(n_components=2)
X_principal = pca.fit_transform(X_normalized)
Convert to DataFrame
X_principal = pd.DataFrame(X_principal)
X_principal.columns = ['p1', 'p2']
Plot dendrogram
plt.figure(figsize=(8, 8))
plt.title("Visualizing the Data")
dendrogram = shc.dendrogram(shc.linkage(X_principal, method='ward'))
plt.show()