# import all necessary packages
import numpy as np
from statistics import mean
from copy import deepcopy
from random import random, randint, seed
import matplotlib.pyplot as plt
import math
from sklearn.cluster.tests.test_k_means import n_samples
from gplearn.genetic import SymbolicRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

# population size
population_size = 60
# maximum number of generations
max_generations = 100
# maximum depth of tree
max_depth = 5
# crossover rate
crossover_rate = 0.9
# node mutation rate
mutation_rate = 0.2
# size for tournament selection
tournament_size = 5


# functions to add into the function set
def add(x, y): return x + y


def sub(x, y): return x - y


def mul(x, y): return x * y


# primitives Set
function_set = [add, sub, mul]
terminals_set = ['x', 2, 1, -1, -2]


# termination criteria
def target_function(x): 
    return x + x
#     return (x ** 5) + (4 * x ** 4) + x ** 2


# generate 100 of datapoints for the dataset using the termination criteria
# returns all keyword arguments
def gen_dataset(): 
    dataset = []
    # use every second value from -100 to 0 to 100
    for x in range(-100, 100, 2): 
#       divide "x" by 100 to get smaller values
        x /= 100
#       first value: the value of 'x', second value: the result of the equation from target function
        dataset.append([x, target_function(x)])
    return dataset 


dataset_1 = gen_dataset()
print('First dataset: ')
print(dataset_1)

# use every second value from -100 to 0 to 100, use it instead of 'x' in the target function
# returns (-1.0, 1.0, 0.02)
d = np.arange(-1.0, 1.0, 0.02).reshape(-1, 1)


# returns all function parameters
def data():
    data = []
    for x in d:
        data.append([target_function(*x)])
    return data


dataset_2 = data()
print('Second dataset: ')
print(dataset_2)


class Tree:
    
# self is the instance of the class; using it I can access the attributes and methods of the class 
# reserved method; constructor in object oriented concepts; called when an object is created 
# because I am creating a tree, my only attributes are the data, left and right;

    def __init__(self, data=None, left=None, right=None):
        self.data = data
        self.left = left   
        self.right = right
                
    def Nodes(self, data): 
        if (self.data in function_set):
            return self.data.__name__
        else: 
            return str(self.data)

    def display_tree(self):
#       *_ means multiple placeholders
        connections, *_ = self.print_tree(prefix="")
        for  connection in connections:
            print(connection)

    # using prefix for textual printout
    def print_tree(self, prefix=""):

        # If there are no children:
        if self.right is None and self.left is None:
            connection = '%s' % self.Nodes(data)
            width = len(connection)
            height = 1
            middle = width // 2
            return [connection], width, height, middle

        # If there is only a left child:
        if self.right is None:
            connections, a, b, c = self.left.print_tree(prefix="")
            s = '%s' % self.Nodes(data)
            length_ = len(s)
            root_position = (c + 1) * ' ' + (a - c - 1) * '_' + s
            connections_position = c * ' ' + '/' + (a - c - 1 + length_) * ' '
            shifted_connections = [connection + length_ * '' for connection in connections]
            return [root_position, connections_position] + shifted_connections, a + length_, b + 2, a + length_ // 2

        # If there is only a right child:
        if self.left is None:
            connections, a, b, c = self.right.print_tree(prefix="")
            s = '%s' % self.Nodes(data)
            length_ = len(s)
            root_position = s + c * '_' + (a - c) * ' '
            connections_position = (length_ + c) * ' ' + '\\' + (a - c - 1) * ' '
            shifted_connections = [length_ * ' ' + connection for connection in connections]
            return [root_position, connections_position] + shifted_connections, a + length_, b + 2, length_ // 2

        # If there are both of the children present:
        left, a, b, c = self.left.print_tree(prefix + " ")
        right, e, f, g = self.right.print_tree(prefix + " ")
        s = '%s' % self.Nodes(data)
        length_ = len(s)
        root_position = (c + 1) * ' ' + (a - c - 1) * '_' + s + g * '_' + (e - g) * ' '
        connections_position = c * ' ' + '/' + (a - c - 1 + length_ + g) * ' ' + '\\' + (e - g - 1) * ' '
        if b < f:
            left += [a * ' '] * (f - b)
        elif f < b:
            right += [e * ' '] * (b - f)
        zipped_connections = zip(left, right)
        connections = [root_position, connections_position] + [a + length_ * ' ' + b for a, b in zipped_connections]
        return connections, a + e + length_, max(b, f) + 2, a + length_ // 2
               
    # generate random population of programs        
    def generate_tree(self, grow, max_depth, depth=0): 
        # are we adding a function or terminal
        if (depth < max_depth) and not grow: 
            # the node would be equal to a random function from the function set  
            self.data = function_set[randint(0, len(function_set) - 1)]
        elif depth >= max_depth: 
            # the node would be equal to a random terminal from the terminals set  
            self.data = terminals_set[randint(0, len(terminals_set) - 1)]  
        else: 
            # intermediate depth, grow
            if random () > 0.5: 
                # the node would be equal to a random terminal from the terminals set
                self.data = terminals_set[randint(0, len(terminals_set) - 1)]
            else:
                 # the node would be equal to a random function from the function set
                self.data = function_set[randint(0, len(function_set) - 1)]
        # if the data is a function  
        if self.data in function_set:
#             generate tree
            self.left = Tree()          
            self.left.generate_tree(grow, max_depth, depth=depth + 1)            
            self.right = Tree()
            self.right.generate_tree(grow, max_depth, depth=depth + 1)

#   checks the data in the node
    def compute(self, x): 
#       checks if data is a function  
        if (self.data in function_set): 
            return self.data(self.left.compute(x), self.right.compute(x))
#       checks if the data is the terminal x  
        elif self.data == 'x': 
            return x
        else: 
            return self.data
    
#   creates subtree
    def subtree(self): 
#       T equals the class Tree
        T = Tree()
        T.data = self.data
#       decides on which side does the subtree go to
        if self.left: T.left = self.left.subtree()
        if self.right: T.right = self.right.subtree()
        return T
                             
#   size of tree
    def size(self): 
#       if the node is a terminal return 1  
        if self.data in terminals_set: return 1
        L = self.left.size()  if self.left  else 0
        R = self.right.size() if self.right else 0
#       return the sum  
        return 1 + L + R
    
#   mutation function
    def mutation(self):
        # mutate at this node
        if random() < mutation_rate: 
            self.generate_tree(grow=True, max_depth=5)
        elif self.left: self.left.mutation()
        elif self.right: self.right.mutation() 
        
    # scan the second subtree in crossover (code taken inspiration from tiny_gp - bibliography)
    def scan_tree(self, count, second): 
        count[0] -= 1        
#       check value of count     
        if count[0] <= 1: 
#            if it is not the second subtree 
            if not second: 
                # return subtree 
                return self.subtree()
            else: 
                # attach subtree 
                self.data = second.data
                self.left = second.left
                self.right = second.right
        else: 
            ret = None              
            if self.left  and count[0] > 1: ret = self.left.scan_tree(count, second)  
            if self.right and count[0] > 1: ret = self.right.scan_tree(count, second)  
            return ret

    # crossover function
    def crossover(self, other): 
        if random() < crossover_rate:
            second = other.scan_tree([randint(1, other.size())], None)  
            self.scan_tree([randint(1, self.size())], second)  
 
 
# ramped half-and-half
def initialise_population(): 
    population = []
#   max depth in the range from 3 to 6(max depth + 1)  
    for maxdepth in range(3, max_depth + 1):
#       going trhough each individual in the pooulation
        for i in range(int(population_size / 6)):
#           set T equal to the class Tree
            T = Tree()
            # perform grow method
            T.generate_tree(grow=True, max_depth=maxdepth)  
            population.append(T)
#       going trhough each individual in the pooulation
        for i in range(int(population_size / 6)):
#           set T equal to the class Tree
            T = Tree()
            # perform full method
            T.generate_tree(grow=False, max_depth=maxdepth)  
#           append result into the empty array
            population.append(T) 
    return population

    
# fitness function using mae
def fitness(individual, dataset): 
    return 1 / (1 + mean([abs(individual.compute(ds[0]) - ds[1]) for ds in dataset]))


# tournament selection
def tournament_selection(population, fitnesses): 
    contestant = [randint(0, len(population) - 1) for i in range(tournament_size)]  
# tournament member fitness 
    contestant_fitness = [fitnesses[contestant[i]] for i in range(tournament_size)]
    return deepcopy(population[contestant[contestant_fitness.index(max(contestant_fitness))]]) 
    

# function to run the code
def main(): 
    seed()  
#   calling methods and assigning values  
    dataset = gen_dataset()
    population = initialise_population()
    fitnesses = [fitness(population[i], dataset) for i in range(population_size)] 
    best_tree = None
    best_of_generation_fitness = 0
    best_generation = 0   

    # Evolving the programs
    for generation in range(max_generations): 
        next_generation = []
        for i in range(population_size):
#           performing tournament selection on two randomly chosen parents
            parent1 = tournament_selection(population, fitnesses)
#           performing mutation on parent 1
            parent1.mutation()
#           tournament selection on second parent
            parent2 = tournament_selection(population, fitnesses)
#           performing crossover between first parent and second parent 
            parent1.crossover(parent2)
#           assigning the crossover version to next generations
            next_generation.append(parent1)
        population = next_generation
        fitnesses = [fitness(population[i], dataset) for i in range(population_size)]
#       comparing fitness scores to find the one with the highest score
#       if current individuals fitness is highest becomes the best for the run
        if max(fitnesses) > best_of_generation_fitness:
            best_of_generation_fitness = max(fitnesses)
#           the generation it belongs to becomes the best performing generation
            best_generation = generation
#           and the tree generated by this generation is executed  and becomes the best performing
            best_tree = deepcopy(population[fitnesses.index(max(fitnesses))])
#           printing the results
            print("________________________")
            print("Generation:", generation, "\nBest Individual Fitness Score:", round(max(fitnesses), 3), "\nTree:") 
            best_tree.display_tree()
#       terminate when fitness is at its optimal 1.0 value 
        if best_of_generation_fitness == 1: break   
#   then print the best performing generation of the run    
    print("\n\n_________________________________________________\nTERMINATION\nBest fitness score achieved at generation " + str(best_generation) + \
          " and has fitness of = " + str(round(best_of_generation_fitness, 3)))
    best_tree.display_tree()

     
if __name__ == "__main__":
    main()     
    
# table of results using gplearn and sklearn
symbRegressor = SymbolicRegressor(verbose=1)
symbRegressor.fit(d, dataset_2)
_predict_ = symbRegressor.predict(d)
print('Random Forest score: ', symbRegressor.score(d, dataset_2))
      
decision_tree = DecisionTreeRegressor(max_depth=5)
decision_tree.fit(d, dataset_2)
print('Decision Tree score:', decision_tree.score(d, dataset_2))
       
rndForest = RandomForestRegressor(n_estimators=100, max_depth=2)
dataset_2 = np.array(dataset_2).reshape((n_samples,))
rndForest.fit(d, dataset_2)
print('GPlearn score:', rndForest.score(d, dataset_2))
     
# Plot diagram of data
fig = plt.figure(1, figsize=(8, 6))   
plt.scatter(d, dataset_2, color='green')
plt.plot(d, _predict_, color='red', linewidth=2)
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
 
# Comparison of full data (blue points) and test data (red points) 
     
# Gplearn graph
y_gp = symbRegressor.predict(d)
score_gp = symbRegressor.score(d, dataset_2)
# Decision tree graph 
y_dt = decision_tree.predict(d)
score_dt = decision_tree.score(d, dataset_2)
# Random forest graph      
y_rf = rndForest.predict(d)
score_rf = rndForest.score(d, dataset_2)
       
# Viuslising them together onto one graph
# Top left indicates original data.
fig = plt.figure(figsize=(9, 7))
for i, (dataset_2, score, title) in enumerate([(dataset_2, None, "Original data"),
                                           (y_gp, score_gp, "Symbolic Regressor"),
                                           (y_dt, score_dt, "Decision Tree Regressor"),
                                           (y_rf, score_rf, "Random Forest Regressor")]):
                
    ax = fig.add_subplot(2, 2, i + 1)
    points = ax.scatter(d, _predict_, color='blue', linewidth=2)
    test = ax.scatter(d, dataset_2, color='red', linewidth=0.1)
    plt.title(title)
plt.show()
 

Python Online Compiler

Write, Run & Share Python code online using OneCompiler's Python online compiler for free. It's one of the robust, feature-rich online compilers for python language, supporting both the versions which are Python 3 and Python 2.7. Getting started with the OneCompiler's Python editor is easy and fast. The editor shows sample boilerplate code when you choose language as Python or Python2 and start coding.

Taking inputs (stdin)

OneCompiler's python online editor supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample python program which takes name as input and print your name with hello.

import sys
name = sys.stdin.readline()
print("Hello "+ name)

About Python

Python is a very popular general-purpose programming language which was created by Guido van Rossum, and released in 1991. It is very popular for web development and you can build almost anything like mobile apps, web apps, tools, data analytics, machine learning etc. It is designed to be simple and easy like english language. It's is highly productive and efficient making it a very popular language.

Tutorial & Syntax help

Loops

1. If-Else:

When ever you want to perform a set of operations based on a condition IF-ELSE is used.

if conditional-expression
    #code
elif conditional-expression
    #code
else:
    #code

Note:

Indentation is very important in Python, make sure the indentation is followed correctly

2. For:

For loop is used to iterate over arrays(list, tuple, set, dictionary) or strings.

Example:

mylist=("Iphone","Pixel","Samsung")
for i in mylist:
    print(i)

3. While:

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while condition  
    #code 

Collections

There are four types of collections in Python.

1. List:

List is a collection which is ordered and can be changed. Lists are specified in square brackets.

Example:

mylist=["iPhone","Pixel","Samsung"]
print(mylist)

2. Tuple:

Tuple is a collection which is ordered and can not be changed. Tuples are specified in round brackets.

Example:

myTuple=("iPhone","Pixel","Samsung")
print(myTuple)

Below throws an error if you assign another value to tuple again.

myTuple=("iPhone","Pixel","Samsung")
print(myTuple)
myTuple[1]="onePlus"
print(myTuple)

3. Set:

Set is a collection which is unordered and unindexed. Sets are specified in curly brackets.

Example:

myset = {"iPhone","Pixel","Samsung"}
print(myset)

4. Dictionary:

Dictionary is a collection of key value pairs which is unordered, can be changed, and indexed. They are written in curly brackets with key - value pairs.

Example:

mydict = {
    "brand" :"iPhone",
    "model": "iPhone 11"
}
print(mydict)

Supported Libraries

Following are the libraries supported by OneCompiler's Python compiler

NameDescription
NumPyNumPy python library helps users to work on arrays with ease
SciPySciPy is a scientific computation library which depends on NumPy for convenient and fast N-dimensional array manipulation
SKLearn/Scikit-learnScikit-learn or Scikit-learn is the most useful library for machine learning in Python
PandasPandas is the most efficient Python library for data manipulation and analysis
DOcplexDOcplex is IBM Decision Optimization CPLEX Modeling for Python, is a library composed of Mathematical Programming Modeling and Constraint Programming Modeling