Importing the required packages

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

Data Import and Exploration

Function to import the dataset

def importdata():
balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-' +
'databases/balance-scale/balance-scale.data',
sep=',', header=None)

Displaying dataset information

print("Dataset Length: ", len(balance_data))
print("Dataset Shape: ", balance_data.shape)
print("Dataset: ", balance_data.head())
return balance_data
Data Splitting

Function to split the dataset into features and target variables

def splitdataset(balance_data):

Separating the target variable

X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]

Splitting the dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.3, random_state=100)
return X, Y, X_train, X_test, y_train, y_test

Training with Gini Index:
def train_using_gini(X_train, X_test, y_train):

Creating the classifier object

clf_gini = DecisionTreeClassifier(criterion="gini",
random_state=100, max_depth=3, min_samples_leaf=5)

Performing training

clf_gini.fit(X_train, y_train)
return clf_gini

Training with Entropy:
def train_using_entropy(X_train, X_test, y_train):

Decision tree with entropy

clf_entropy = DecisionTreeClassifier(
criterion="entropy", random_state=100,
max_depth=3, min_samples_leaf=5)

Performing training

clf_entropy.fit(X_train, y_train)
return clf_entropy

Prediction and Evaluation:

Function to make predictions

def prediction(X_test, clf_object):
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred

Placeholder function for cal_accuracy

def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print("Accuracy : ",
accuracy_score(y_test, y_pred)*100)
print("Report : ",
classification_report(y_test, y_pred))

Plots the Decision Tree
from sklearn import tree

Function to plot the decision tree

def plot_decision_tree(clf_object, feature_names, class_names):
plt.figure(figsize=(15, 10))
plot_tree(clf_object, filled=True, feature_names=feature_names,
class_names=class_names, rounded=True)
plt.show()
if name == "main":
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = train_using_entropy(X_train, X_test, y_train)

Visualizing the Decision Trees

plot_decision_tree(clf_gini, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])
plot_decision_tree(clf_entropy, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])

dhina

Importing the required packages

Function to import the dataset

Displaying dataset information

Function to split the dataset into features and target variables

Separating the target variable

Splitting the dataset into train and test

Creating the classifier object

Performing training

Decision tree with entropy

Performing training

Function to make predictions

Placeholder function for cal_accuracy

Function to plot the decision tree

Visualizing the Decision Trees