dhina
Importing the required packages
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
Data Import and Exploration
Function to import the dataset
def importdata():
balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-' +
'databases/balance-scale/balance-scale.data',
sep=',', header=None)
Displaying dataset information
print("Dataset Length: ", len(balance_data))
print("Dataset Shape: ", balance_data.shape)
print("Dataset: ", balance_data.head())
return balance_data
Data Splitting
Function to split the dataset into features and target variables
def splitdataset(balance_data):
Separating the target variable
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.3, random_state=100)
return X, Y, X_train, X_test, y_train, y_test
Training with Gini Index:
def train_using_gini(X_train, X_test, y_train):
Creating the classifier object
clf_gini = DecisionTreeClassifier(criterion="gini",
random_state=100, max_depth=3, min_samples_leaf=5)
Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
Training with Entropy:
def train_using_entropy(X_train, X_test, y_train):
Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
criterion="entropy", random_state=100,
max_depth=3, min_samples_leaf=5)
Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
Prediction and Evaluation:
Function to make predictions
def prediction(X_test, clf_object):
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
Placeholder function for cal_accuracy
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print("Accuracy : ",
accuracy_score(y_test, y_pred)*100)
print("Report : ",
classification_report(y_test, y_pred))
Plots the Decision Tree
from sklearn import tree
Function to plot the decision tree
def plot_decision_tree(clf_object, feature_names, class_names):
plt.figure(figsize=(15, 10))
plot_tree(clf_object, filled=True, feature_names=feature_names,
class_names=class_names, rounded=True)
plt.show()
if name == "main":
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = train_using_entropy(X_train, X_test, y_train)
Visualizing the Decision Trees
plot_decision_tree(clf_gini, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])
plot_decision_tree(clf_entropy, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])