OneCompiler

DevManus

348

#p1.Write a Python Program to Prepare Scatter Plot on Iris Sataset
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('iris.csv')
plt.scatter(df['SepalLengthCm'],df['SepalWidthCm'])
plt.xlabel('SepalLengthCm')
plt.ylabel('SepalWidthCm')
plt.show()

#p2.Write a Python Program to find all null values in a given data set andd remove them
import pandas as pd
#import matplotlib.pyplot as plt
df=pd.read_csv('iris_null_values.csv')
print(df)
df.isnull().sum()
df[df['sepal.length'].isnull()]
df[df['sepal.width'].isnull()]
df.fillna('default')

#df.dropna()

#p3.Write a python program to find all null values in auser defined data set and remove them.
import numpy as np

import pandas as pd

dict = {'First Column': [100, 90, None, 95],

   'Second Column': [30, 45, 56, 67], 

    'Third Column': [np.nan, 40, 80, 98]} 

df = pd.DataFrame(dict)

print (df)

df [df ['First Column'].isnull()]

df [df ['Second Column'].isnull()]

df [df ['Third Column'].isnull()]

df.dropna()

#p4.Write a python program the categorical values in numeric format for a given dataset
import numpy as np
from sklearn.preprocessing import LabelEncoder
df=pd.read_csv('categorical_data.csv')
print(df)

#p5.Write a python program the categorical values in numeric format for a given dataset
import numpy as np
from sklearn.preprocessing import LabelEncoder
df=pd.read_csv('categorical_data.csv')
label_encoder=LabelEncoder()
n=label_encoder.fit_transform(df['variety'])
print(n)

#p6.Write a python program Linear_Regression_SimpleData
import numpy as np
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
mean_x = np.mean(x)
mean_y = np.mean(y)
print(mean_x)
print(mean_y)
#4.5
#6.5
numerator = 0
denominator = 0
n=len(x)
for i in range(n):
numerator += (x[i] - mean_x) * (y[i] - mean_y)
denominator += (x[i] - mean_x) ** 2
b1 = numerator / denominator
b0 = mean_y - (b1 * mean_x)

Print coefficients

print(b0, b1)
#1.2363636363636363 1.1696969696969697
import matplotlib.pyplot as plt
q = b0 + b1 * x

Ploting Scatter Points

plt.scatter(x, y, c='red', label='Scatter Plot')
plt.plot(x, q, color='blue', label='Regression Line')

plt.xlabel('x values')
plt.ylabel('y values')
plt.legend()
plt.show()

#p6.1 Write a python program Linear_Regression_SKLearn
import pandas as pd
df=pd.read_csv('housing_data.csv')
print(df.head(10))

X = df['area (in cm)'].values
Y = df['price in lakhs'].values
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
n=len(X)
X = X.reshape((n, 1))
reg = LinearRegression()
reg = reg.fit(df[['area (in cm)']], Y)
Y_pred = reg.predict(X)
print(Y_pred)

Calculating R2 Score

r2_score = reg.score(X, Y)
print(r2_score)

#p6.2 Write a python program Linear_Regression_2
import pandas as pd
df=pd.read_csv('housing_data.csv')
print(df.head(10))

n = len(df)
print (n)

import numpy as np
X = df['area (in cm)'].values
Y = df['price in lakhs'].values
mean_x = np.mean(X)
mean_y = np.mean(Y)
print(mean_x)
print(mean_y)

numerator = 0
denominator = 0
for i in range(n):
numerator += (X[i] - mean_x) * (Y[i] - mean_y)
denominator += (X[i] - mean_x) ** 2
m = numerator / denominator
c = mean_y - (m * mean_x)

Print coefficients

print(m, c)

##########################################################
import matplotlib.pyplot as plt

Plotting Values and Regression Line

max_x = np.max(X) + 100
min_x = np.min(X) - 100

Calculating line values x and y

x = np.linspace(min_x, max_x, 1000)
y = c + m * x

Ploting Line

plt.plot(x, y, color='blue', label='Regression Line')

Ploting Scatter Points

plt.scatter(X, Y, c='red', label='Scatter Plot')

plt.xlabel('Area (in cm)')
plt.ylabel('Price in lakhs')
plt.legend()
plt.show()

ss_t = 0
ss_r = 0
for i in range(n):
y_pred = c + m * X[i]
ss_t += (Y[i] - mean_y) ** 2
ss_r += (Y[i] - y_pred) ** 2
r2 = 1 - (ss_r/ss_t)
print(r2)

#7) Write a python program to implement Polynomial Regression for given dataset.
import pandas as pd
df=pd.read_csv('employees.csv')
print(df)

X=df.iloc[:,1:2].values
y=df.iloc[:,2].values
print(X,y)

#fitting the polynomial regression model to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg=PolynomialFeatures(degree=4)
X_poly=poly_reg.fit_transform(X)
poly_reg.fit(X_poly,y)
lin_reg2=LinearRegression()
lin_reg2.fit(X_poly,y)
LinearRegression()
#Visualising the pollynomial regression model results
import numpy as np
X_grid=np.arange(min(X),max(X),0.1)
X_grid=X_grid.reshape((len(X_grid),1))
plt.scatter(X,y,color='red')
plt.plot(X,lin_reg2.predict(poly_reg.fit_transform(X)),color='blue')
plt.title('(Polynomial Regression)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(X,y)
LinearRegression()
import matplotlib.pyplot as plt
plt.scatter(X,y,color='red')
plt.plot(X,lin_reg.predict(X),color='blue')
plt.title('(Linear Regression)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

import numpy as np
lin_reg.predict(np.array([ [6.5] ]))
#array([330378.78787879])

import numpy as np
lin_reg2.predict(poly_reg.fit_transform(np.array([ [6.5] ])))
#array([158862.45265155])

#8 Write a python program to Implement Decision Tree whether or not to play tennis.
import numpy as np
import pandas as pd
from sklearn import metrics
df=pd.read_csv('weather.csv')
df

df.head()

from sklearn import preprocessing
string_to_int= preprocessing.LabelEncoder() #transform categorical data into numerical form
df=df.apply(string_to_int.fit_transform)
df

X = df[['outlook','temperature','humidity','windy'] ]
y= df['play']
from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion = 'entropy')
clf = clf.fit(X, y)
tree.plot_tree(clf)

#9 Write a python program to implement linear SVM.
x = [1, 5, 1.5, 8, 1, 9]
y = [2, 8, 1.8, 8, 0.6, 11]
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")
plt.scatter(x,y)
plt.show()

import numpy as np
from sklearn import svm
X = np.array([[1,2],
[5,8],
[1.5,1.8],
[8,8],
[1,0.6],
[9,11]])
y = [0,1,0,1,0,1]
clf = svm.SVC(kernel='linear', C = 1.0)
clf.fit(X,y)
print(clf.predict([[0.58,0.76]]))
[0]
w = clf.coef_[0]
print(w)

a = -w[0] / w[1]

xx = np.linspace(0,12)
yy = a * xx - clf.intercept_[0] / w[1]

h0 = plt.plot(xx, yy, 'k-', label="non weighted div")

plt.scatter(X[:, 0], X[:, 1], c = y)
plt.legend()
plt.show()