OneCompiler

ALL<ML>

1712

MACHINE LEARNING PRACTICAL

  1. Write a python program to Prepare Scatter Plot (Use Forge Dataset / Iris
    Dataset)
    Ans:
    import pandas as pd
    import matplotlib.pyplot as plt
    iris=pd.read_csv('IRIS.csv')
    print(iris.head(20))
    plt.plot(iris.id,iris["sepal_length"],"r--")
    plt.show
    iris.plot(kind="scatter",x="sepal_length",y="petal_length")
    plt.show()
  2. Write a python program to find all null values in a given data set and
    remove them.
    Ans:
    import numpy as np
    import pandas as pd
    dict={"first score":[100,90,np.nan,95],"second score":[30,45,56,np.nan],"third
    score":[np.nan,40,80,98]}
    df=pd.DataFrame(dict)
    print(df)
    x=df.isnull()
    print(x)
    y=df.notnull()
    print(y)
    z=df.fillna(0)
    print(z)
    s=df.fillna(method="pad")
    print(s)
    a=df.fillna(method="bfill")
    print(a)
    b=df.replace(to_replace=np.nan,value=-99)
    print(b)
    c=df.dropna()
    print(c)
    d=df.dropna(axis=1)
  3. Write a python program the Categorical values in numeric format for a
    given dataset
    Ans:
    import pandas as pd
    df = pd.read_csv("playtennis.csv")
    print(df)
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    label = le.fit_transform(df['Play Tennis'])
    print(label)
    df.drop("Play Tennis" , axis = 1 , inplace = True)
    df["Play Tennis"] = label
    print(df)
  4. Write a python program to implement simple Linear Regression for
    predicting house price
    Ans:
    #simple Linear
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import cross_val_predict
    data = pd.read_csv(r'kc_house_data.csv')
    data.head(5)
    print(data.shape)

Make a list of importatnt feature which is needed to be incuding in training

data
f = ['price', 'bedrooms', 'bathrooms', 'sqft_living', 'floors', 'condition',
'sqft_above', 'sqft_basement', 'yr_built',
'yr_renovated']
data = data[f]
print(data.shape)

Drop the missing values

data = data.dropna()
print(data.shape)

Get the statictial information of the dataset

data.describe()

Now,Divide the dataset into two parts:independent variable and dependent

variable
X = data[f[1:]]
y = data['price']

Split the dataset into training data and testing data

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2,
random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

Fit the regression model

lr = LinearRegression() # Create object of linear regression class
lr.fit(X_train,y_train) #fit training data
print(lr.coef_)

Create the Prediction

y_test_predict = lr.predict(X_test)
print(y_test_predict.shape)

Plot the error

g=plt.plot((y_test - y_test_predict),marker='o',linestyle='')
plt.show()
5. Write a python program to implement multiple Linear Regression for a
given dataset.
Ans:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Load the dataset

dataset = pd.read_csv('50_Startups.csv')

Extract features (x) and target variable (y)

x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

Perform one-hot encoding on the categorical variable

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])],
remainder='passthrough')
x = np.array(ct.fit_transform(x))

Split the dataset into training set and test set

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,
random_state=0)

Create and fit the Multiple Linear Regression model

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)

Predict the test set results

y_pred = regressor.predict(x_test)

Create a DataFrame to compare real values and predicted values

df = pd.DataFrame({'Real Values': y_test, 'Predicted Values': y_pred})
print(df)

Plot the real values and predicted values

plt.scatter(y_test, y_pred)
plt.xlabel('Real Values')
plt.ylabel('Predicted Values')
plt.title('Real Values vs Predicted Values')
plt.show()
6. Write a python program to implement Polynomial Regression for given
dataset.
Ans:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset=pd.read_csv('Position_Salaries.csv')
x=dataset.iloc[:,1:-1].values
y=dataset.iloc[:,-1].values
print(dataset.head(5))
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
p_r=PolynomialFeatures(degree=4)
x_poly=p_r.fit_transform(x)
lin_reg=LinearRegression()
lin_reg.fit(x_poly,y)
LinearRegression()
y_pred=lin_reg.predict(x_poly)
df=pd.DataFrame({'Real Values':y,'Predicted Values':y_pred})
print(df)
x_grid=np.arange(min(x),max(x),0.1)
x_grid=x_grid.reshape((len(x_grid),1))
plt.scatter(x,y,color='yellow')
plt.scatter(x,y_pred,color='red')
plt.plot(x_grid,lin_reg.predict(p_r.fit_transform(x_grid)),color='black')
plt.title('Polynomial Regression')
plt.xlabel('position level')
plt.ylabel('Salary')
plt.show()
7. A).Write a python program to implement Logistic Regression for Iris
dataset
Ans:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
iris=datasets.load_iris()
#what kind of data is present in iris dataset
print(list(iris.keys()))
#print data
print(iris['data'])
print(iris['target'])
#print data and corrensponding targets of that data
print(iris['DESCR'])
print(iris['data'].shape)
#it will print only value of all rows of 3rd column i. e. petal width
x=iris["data"][:,3:]
print(x)
y=(iris["target"]==2)
print(y)
y=(iris["target"]==2).astype(np.int64)
print(y)
clf=LogisticRegression()
clf.fit(x,y)

To check iris vergina 0 means no and 1 means yes

example=clf.predict(([[1.6]]))
print(example)
example=clf.predict(([[2.6]]))
print(example)

Use matplotlib to plot the visualization

x_new=np.linspace(0,3,1000).reshape(-1,1)
print(x_new)
y_prob=clf.predict_proba(x_new)
print(y_prob)
plt.plot(x_new,y_prob[:,1],"g-",label="verginica")
plt.show()
B).Write a python program to Implement Naïve Bayes
Ans:
import numpy as np
from sklearn.naive_bayes import GaussianNB
x_train=np.array([[1,2],[3,4],[5,6],[7,8]])
y_train=np.array([1,1,2,2])
x_test=np.array([[9,10],[11,12],[13,14],[15,16]])
classifire=GaussianNB()
classifire.fit(x_train,y_train)
y_pred=classifire.predict(x_test)
print(y_pred)
print("accuracy:",classifire.score(x_test,y_pred))
8. Write a python program to Implement Decision Tree whether or not to
play tennis
Ans:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
df= pd.read_csv('PlayTennis.csv')
print(df)
x=df(['outlook','temprature','humidity','windy'])
y=df['Play']
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(x,y)
tree.plot_tree(clf)
9. Write a python program to Implement Random Forest for iris Dataset
Ans:
import pandas as pd
import matplotlib.pyplot as plt
#import seaborn as sns
import sklearn
import warnings
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')
df= pd.read_csv('Salaries.csv')
print(df)
df.info()

Assuming df is your DataFrame

X = df.iloc[:,1:2].values #features
y = df.iloc[:,2].values # Target variable
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

Check for and handle categorical variables

label_encoder = LabelEncoder()
x_categorical =
df.select_dtypes(include=['object']).apply(label_encoder.fit_transform)
x_numerical = df.select_dtypes(exclude=['object']).values
x = pd.concat([pd.DataFrame(x_numerical), x_categorical], axis=1).values

Fitting Random Forest Regression to the dataset

regressor = RandomForestRegressor(n_estimators=10, random_state=0,
oob_score=True)

Fit the regressor with x and y data

regressor.fit(x, y)

Evaluating the model

from sklearn.metrics import mean_squared_error, r2_score

Access the OOB Score

oob_score = regressor.oob_score_
print(f'Out-of-Bag Score: {oob_score}')

Making predictions on the same data or new data

predictions = regressor.predict(x)

Evaluating the model

mse = mean_squared_error(y, predictions)
print(f'Mean Squared Error: {mse}')
r2 = r2_score(y, predictions)
print(f'R-squared: {r2}')
import numpy as np
X_grid = np.arange(min(X),max(X),0.01)
X_grid = X_grid.reshape(len(X_grid),1)
plt.scatter(X,y, color='blue') #plotting real points
plt.plot(X_grid, regressor.predict(X_grid),color='green') #plotting for predict
points
plt.title("Random Forest Regression Results")
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

Assuming regressor is your trained Random Forest model

Pick one tree from the forest, e.g., the first tree (index 0)

tree_to_plot = regressor.estimators_[0]

Plot the decision tree

plt.figure(figsize=(20, 10))
plot_tree(tree_to_plot, feature_names=df.columns.tolist(), filled=True,
rounded=True, fontsize=10)
plt.title("Decision Tree from Random Forest")
plt.show()
10. Write a python program to implement linear SVM.
Ans:
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
x=np.array([[1,2],[5,8],[8,8],[1,0.6]])
y=[1,0,1,0]
clf=svm.SVC(kernel='linear',C=1.0).fit(x,y)
print(clf.predict([[0.56,0.76]]))
w=clf.coef_[0]
print(w)
a=-w[0]/w[1]
xx=np.linspace(0,12)
yy=a*xx-clf.intercept_[0]/w[1]
plt.plot(xx,yy,'k-',label="non weighted div")
plt.scatter(x[:,0],x[:,1],c=y)
plt.legend()
plt.show()

  1. Write a python program to find Decision boundary by using a neural
    network with 10 hidden units on two moons dataset
    Ans:
  2. Write a python program to generate frequent itemset and association rule
    by applying apriori algorithm on Market basket dataset
    Ans:
    import pandas as pd
    from mlxtend.frequent_patterns import apriori,association_rules
    from mlxtend.preprocessing import TransactionEncoder
    transactions=[['eggs','milk','bread'],
    ['eggs','apple'],
    ['milk','bread'],
    ['apple','milk'],
    ['milk','apple','bread']]
    te=TransactionEncoder()
    te_array=te.fit(transactions).transform(transactions)
    df=pd.DataFrame(te_array,columns=te.columns_)
    print("Encoded dataset")
    print(df)

freq_items=apriori(df,min_support=0.5,use_colnames=True)

print("\n Frequent ItemSet")
print(freq_items)

rules=association_rules(freq_items,metric="support",min_threshold=0.05)
rules=rules.sort_values(['support','confidence'],ascending=[False,False])
print("\n Association rules")
print(rules)
13. Write a python program to implement k-nearest Neighbours ML algorithm
to build prediction model (Use Forge Dataset)
Ans:
14. Write a python program to implement Agglomerative clustering on a
synthetic dataset
Ans:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler,normalize
import scipy.cluster.hierarchy as shc

Load dataset

X = pd.read_csv('CC GENERAL.csv')

Drop unnecessary column

X = X.drop('CUST_ID', axis=1)

Handle missing values

X.fillna(method='ffill', inplace=True)

Scale data

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_normalized = normalize(X_scaled)

Apply PCA

pca = PCA(n_components=2)
X_principal = pca.fit_transform(X_normalized)

Convert to DataFrame

X_principal = pd.DataFrame(X_principal)
X_principal.columns = ['p1', 'p2']

Plot dendrogram

plt.figure(figsize=(8, 8))
plt.title("Visualizing the Data")
dendrogram = shc.dendrogram(shc.linkage(X_principal, method='ward'))
plt.show()