import pandas as pd # data processing
	import numpy as np # working with arrays
	import matplotlib.pyplot as plt # visualization
	from termcolor import colored as cl # text customization
	import itertools # advanced tools
	
	from sklearn.preprocessing import StandardScaler # data normalization
	from sklearn.model_selection import train_test_split # data split
	from sklearn.tree import DecisionTreeClassifier # Decision tree algorithm
	from sklearn.neighbors import KNeighborsClassifier # KNN algorithm
	from sklearn.linear_model import LogisticRegression # Logistic regression algorithm
	from sklearn.svm import SVC # SVM algorithm
	from sklearn.ensemble import RandomForestClassifier # Random forest tree algorithm
	from xgboost import XGBClassifier # XGBoost algorithm
	
	from sklearn.metrics import confusion_matrix # evaluation metric
	from sklearn.metrics import accuracy_score # evaluation metric
	from sklearn.metrics import f1_score # evaluation metric
	
	# IMPORTING DATA
	
	df = pd.read_csv('creditcard.csv')
	df.drop('Time', axis = 1, inplace = True)
	
	print(df.head())
	
	# EDA
	
	# 1. Count & percentage
	
	cases = len(df)
	nonfraud_count = len(df[df.Class == 0])
	fraud_count = len(df[df.Class == 1])
	fraud_percentage = round(fraud_count/nonfraud_count*100, 2)
	
	print(cl('CASE COUNT', attrs = ['bold']))
	print(cl('--------------------------------------------', attrs = ['bold']))
	print(cl('Total number of cases are {}'.format(cases), attrs = ['bold']))
	print(cl('Number of Non-fraud cases are {}'.format(nonfraud_count), attrs = ['bold']))
	print(cl('Number of Non-fraud cases are {}'.format(fraud_count), attrs = ['bold']))
	print(cl('Percentage of fraud cases is {}'.format(fraud_percentage), attrs = ['bold']))
	print(cl('--------------------------------------------', attrs = ['bold']))
	
	# 2. Description
	
	nonfraud_cases = df[df.Class == 0]
	fraud_cases = df[df.Class == 1]
	
	print(cl('CASE AMOUNT STATISTICS', attrs = ['bold']))
	print(cl('--------------------------------------------', attrs = ['bold']))
	print(cl('NON-FRAUD CASE AMOUNT STATS', attrs = ['bold']))
	print(nonfraud_cases.Amount.describe())
	print(cl('--------------------------------------------', attrs = ['bold']))
	print(cl('FRAUD CASE AMOUNT STATS', attrs = ['bold']))
	print(fraud_cases.Amount.describe())
	print(cl('--------------------------------------------', attrs = ['bold']))
	
	# DATA SPLIT
	
	X = df.drop('Class', axis = 1).values
	y = df['Class'].values
	
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
	
	print(cl('X_train samples : ', attrs = ['bold']), X_train[:1])
	print(cl('X_test samples : ', attrs = ['bold']), X_test[0:1])
	print(cl('y_train samples : ', attrs = ['bold']), y_train[0:10])
	print(cl('y_test samples : ', attrs = ['bold']), y_test[0:10])
	
	# MODELING
	
	# 1. Decision Tree
	
	tree_model = DecisionTreeClassifier(max_depth = 4, criterion = 'entropy')
	tree_model.fit(X_train, y_train)
	tree_yhat = tree_model.predict(X_test)
	
	# 2. K-Nearest Neighbors
	
	n = 5
	
	knn = KNeighborsClassifier(n_neighbors = n)
	knn.fit(X_train, y_train)
	knn_yhat = knn.predict(X_test)
	
	# 3. Logistic Regression
	
	lr = LogisticRegression()
	lr.fit(X_train, y_train)
	lr_yhat = lr.predict(X_test)
	
	# 4. SVM 
	
	svm = SVC()
	svm.fit(X_train, y_train)
	svm_yhat = svm.predict(X_test)
	
	# 5. Random Forest Tree
	
	rf = RandomForestClassifier(max_depth = 4)
	rf.fit(X_train, y_train)
	rf_yhat = rf.predict(X_test)
	
	# 6. XGBoost
	
	xgb = XGBClassifier(max_depth = 4)
	xgb.fit(X_train, y_train)
	xgb_yhat = xgb.predict(X_test)
	
	# EVALUATION
	
	# 1. Accuracy score
	
	print(cl('ACCURACY SCORE', attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the Decision Tree model is {}'.format(accuracy_score(y_test, tree_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the KNN model is {}'.format(accuracy_score(y_test, knn_yhat)), attrs = ['bold'], color = 'green'))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the Logistic Regression model is {}'.format(accuracy_score(y_test, lr_yhat)), attrs = ['bold'], color = 'red'))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the SVM model is {}'.format(accuracy_score(y_test, svm_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the Random Forest Tree model is {}'.format(accuracy_score(y_test, rf_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('Accuracy score of the XGBoost model is {}'.format(accuracy_score(y_test, xgb_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	
	# 2. F1 score
	
	print(cl('F1 SCORE', attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the Decision Tree model is {}'.format(f1_score(y_test, tree_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the KNN model is {}'.format(f1_score(y_test, knn_yhat)), attrs = ['bold'], color = 'green'))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the Logistic Regression model is {}'.format(f1_score(y_test, lr_yhat)), attrs = ['bold'], color = 'red'))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the SVM model is {}'.format(f1_score(y_test, svm_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the Random Forest Tree model is {}'.format(f1_score(y_test, rf_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	print(cl('F1 score of the XGBoost model is {}'.format(f1_score(y_test, xgb_yhat)), attrs = ['bold']))
	print(cl('------------------------------------------------------------------------', attrs = ['bold']))
	
	# 3. Confusion Matrix
	
	# defining the plot function
	
	def plot_confusion_matrix(cm, classes, title, normalize = False, cmap = plt.cm.Blues):
	    title = 'Confusion Matrix of {}'.format(title)
	    if normalize:
	        cm = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis]
	
	    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
	    plt.title(title)
	    plt.colorbar()
	    tick_marks = np.arange(len(classes))
	    plt.xticks(tick_marks, classes, rotation = 45)
	    plt.yticks(tick_marks, classes)
	
	    fmt = '.2f' if normalize else 'd'
	    thresh = cm.max() / 2.
	    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
	        plt.text(j, i, format(cm[i, j], fmt),
	                 horizontalalignment = 'center',
	                 color = 'white' if cm[i, j] > thresh else 'black')
	
	    plt.tight_layout()
	    plt.ylabel('True label')
	    plt.xlabel('Predicted label')
	
	# Compute confusion matrix for the models
	
	tree_matrix = confusion_matrix(y_test, tree_yhat, labels = [0, 1]) # Decision Tree
	knn_matrix = confusion_matrix(y_test, knn_yhat, labels = [0, 1]) # K-Nearest Neighbors
	lr_matrix = confusion_matrix(y_test, lr_yhat, labels = [0, 1]) # Logistic Regression
	svm_matrix = confusion_matrix(y_test, svm_yhat, labels = [0, 1]) # Support Vector Machine
	rf_matrix = confusion_matrix(y_test, rf_yhat, labels = [0, 1]) # Random Forest Tree
	xgb_matrix = confusion_matrix(y_test, xgb_yhat, labels = [0, 1]) # XGBoost
	
	# Plot the confusion matrix
	
	plt.rcParams['figure.figsize'] = (6, 6)
	
	# 1. Decision tree
	
	tree_cm_plot = plot_confusion_matrix(tree_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'Decision Tree')
	plt.savefig('tree_cm_plot.png')
	plt.show()
	
	# 2. K-Nearest Neighbors
	
	knn_cm_plot = plot_confusion_matrix(knn_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'KNN')
	plt.savefig('knn_cm_plot.png')
	plt.show()
	
	# 3. Logistic regression
	
	lr_cm_plot = plot_confusion_matrix(lr_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'Logistic Regression')
	plt.savefig('lr_cm_plot.png')
	plt.show()
	
	# 4. Support Vector Machine
	
	svm_cm_plot = plot_confusion_matrix(svm_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'SVM')
	plt.savefig('svm_cm_plot.png')
	plt.show()
	
	# 5. Random forest tree
	
	rf_cm_plot = plot_confusion_matrix(rf_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'Random Forest Tree')
	plt.savefig('rf_cm_plot.png')
	plt.show()
	
	# 6. XGBoost
	
	xgb_cm_plot = plot_confusion_matrix(xgb_matrix, 
	                                classes = ['Non-Default(0)','Default(1)'], 
	                                normalize = False, title = 'XGBoost')
	plt.savefig('xgb_cm_plot.png')
	plt.show()

 

Python Online Compiler

Write, Run & Share Python code online using OneCompiler's Python online compiler for free. It's one of the robust, feature-rich online compilers for python language, supporting both the versions which are Python 3 and Python 2.7. Getting started with the OneCompiler's Python editor is easy and fast. The editor shows sample boilerplate code when you choose language as Python or Python2 and start coding.

Taking inputs (stdin)

OneCompiler's python online editor supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample python program which takes name as input and print your name with hello.

import sys
name = sys.stdin.readline()
print("Hello "+ name)

About Python

Python is a very popular general-purpose programming language which was created by Guido van Rossum, and released in 1991. It is very popular for web development and you can build almost anything like mobile apps, web apps, tools, data analytics, machine learning etc. It is designed to be simple and easy like english language. It's is highly productive and efficient making it a very popular language.

Tutorial & Syntax help

Loops

1. If-Else:

When ever you want to perform a set of operations based on a condition IF-ELSE is used.

if conditional-expression
    #code
elif conditional-expression
    #code
else:
    #code

Note:

Indentation is very important in Python, make sure the indentation is followed correctly

2. For:

For loop is used to iterate over arrays(list, tuple, set, dictionary) or strings.

Example:

mylist=("Iphone","Pixel","Samsung")
for i in mylist:
    print(i)

3. While:

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while condition  
    #code 

Collections

There are four types of collections in Python.

1. List:

List is a collection which is ordered and can be changed. Lists are specified in square brackets.

Example:

mylist=["iPhone","Pixel","Samsung"]
print(mylist)

2. Tuple:

Tuple is a collection which is ordered and can not be changed. Tuples are specified in round brackets.

Example:

myTuple=("iPhone","Pixel","Samsung")
print(myTuple)

Below throws an error if you assign another value to tuple again.

myTuple=("iPhone","Pixel","Samsung")
print(myTuple)
myTuple[1]="onePlus"
print(myTuple)

3. Set:

Set is a collection which is unordered and unindexed. Sets are specified in curly brackets.

Example:

myset = {"iPhone","Pixel","Samsung"}
print(myset)

4. Dictionary:

Dictionary is a collection of key value pairs which is unordered, can be changed, and indexed. They are written in curly brackets with key - value pairs.

Example:

mydict = {
    "brand" :"iPhone",
    "model": "iPhone 11"
}
print(mydict)

Supported Libraries

Following are the libraries supported by OneCompiler's Python compiler

NameDescription
NumPyNumPy python library helps users to work on arrays with ease
SciPySciPy is a scientific computation library which depends on NumPy for convenient and fast N-dimensional array manipulation
SKLearn/Scikit-learnScikit-learn or Scikit-learn is the most useful library for machine learning in Python
PandasPandas is the most efficient Python library for data manipulation and analysis
DOcplexDOcplex is IBM Decision Optimization CPLEX Modeling for Python, is a library composed of Mathematical Programming Modeling and Constraint Programming Modeling