up

2020-12-25 17:54:27 +03:30 · 2020-12-25 17:54:27 +03:30 · de656ad6b2
parent bec8fe4850
commit de656ad6b2
39 changed files with 15 additions and 1446 deletions
--- a/.history/app_20201225152542.py
+++ b/.history/app_20201225152542.py
--- a/.history/app_20201225152608.py
+++ b/.history/app_20201225152608.py
@ -1,21 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225152810.py
+++ b/.history/app_20201225152810.py
@ -1,21 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225152934.py
+++ b/.history/app_20201225152934.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
    print(classes)
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153009.py
+++ b/.history/app_20201225153009.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
    print(classes)
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153016.py
+++ b/.history/app_20201225153016.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153034.py
+++ b/.history/app_20201225153034.py
@ -1,23 +0,0 @@
 def gini_index(groups, classes):
    print(groups)
    print(classes)
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153041.py
+++ b/.history/app_20201225153041.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
    print(groups)
    print(classes)
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153057.py
+++ b/.history/app_20201225153057.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
    print(groups)
     print(classes)
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153107.py
+++ b/.history/app_20201225153107.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
    print(groups)
    print(classes)
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153134.py
+++ b/.history/app_20201225153134.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
    print(groups)
    print(classes) 
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153219.py
+++ b/.history/app_20201225153219.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153710.py
+++ b/.history/app_20201225153710.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153815.py
+++ b/.history/app_20201225153815.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153906.py
+++ b/.history/app_20201225153906.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153913.py
+++ b/.history/app_20201225153913.py
@ -1,23 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153932.py
+++ b/.history/app_20201225153932.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154020.py
+++ b/.history/app_20201225154020.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154028.py
+++ b/.history/app_20201225154028.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
             for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154033.py
+++ b/.history/app_20201225154033.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group:
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154115.py
+++ b/.history/app_20201225154115.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
            for row in group: 
                print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154141.py
+++ b/.history/app_20201225154141.py
@ -1,24 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			for row in group:
 				print(row[-1])
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154223.py
+++ b/.history/app_20201225154223.py
@ -1,23 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154225.py
+++ b/.history/app_20201225154225.py
@ -1,22 +0,0 @@
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # test Gini values
 print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
 print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154640.py
+++ b/.history/app_20201225154640.py
@ -1,54 +0,0 @@
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 dataset = [[2.771244718,1.784783929,0],
 	[1.728571309,1.169761413,0],
 	[3.678319846,2.81281357,0],
 	[3.961043357,2.61995032,0],
 	[2.999208922,2.209014212,0],
 	[7.497545867,3.162953546,1],
 	[9.00220326,3.339047188,1],
 	[7.444542326,0.476683375,1],
 	[10.12493903,3.234550982,1],
 	[6.642287351,3.319983761,1]]
 split = get_split(dataset)
 print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161344.py
+++ b/.history/app_20201225161344.py
@ -1,48 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 dataset = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
 split = get_split(dataset)
 print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161529.py
+++ b/.history/app_20201225161529.py
@ -1,48 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 dataset = pd.read_excel('train.xls', sheet_name="Sheet1").to_numpy()
 split = get_split(dataset)
 print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161542.py
+++ b/.history/app_20201225161542.py
@ -1,48 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 split = get_split(dataset)
 print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/gini_20201225161914.py
+++ b/.history/gini_20201225161914.py
@ -1,48 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 split = get_split(dataset)
 print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/gini_20201225162123.py
+++ b/.history/gini_20201225162123.py
@ -1,90 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 tree = build_tree(dataset, 1, 1)
 print_tree(tree)
--- a/.history/gini_20201225162607.py
+++ b/.history/gini_20201225162607.py
@ -1,90 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 tree = build_tree(dataset, 5, 1)
 print_tree(tree)
--- a/.history/gini_20201225162804.py
+++ b/.history/gini_20201225162804.py
@ -1,90 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 tree = build_tree(dataset, 5, 1)
 print_tree(tree)
--- a/.history/gini_20201225162859.py
+++ b/.history/gini_20201225162859.py
@ -1,90 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 tree = build_tree(dataset, 5, 0)
 print_tree(tree)
--- a/.history/gini_20201225163004.py
+++ b/.history/gini_20201225163004.py
@ -1,92 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 if __name__ == "__main__":
 	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 	tree = build_tree(dataset, 5, 0)
 	print_tree(tree)
--- a/.history/gini_20201225163146.py
+++ b/.history/gini_20201225163146.py
@ -1,92 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 if __name__ == "__main__":
 	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 	tree = build_tree(dataset, 1, 1)
 	print_tree(tree)
--- a/.history/gini_20201225163343.py
+++ b/.history/gini_20201225163343.py
@ -1,92 +0,0 @@
 import pandas as pd
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
 		if row[index] < value:
 			left.append(row)
 		else:
 			right.append(row)
 	return left, right
 # Calculate the Gini index for a split dataset
 def gini_index(groups, classes):
 	# count all samples at split point
 	n_instances = float(sum([len(group) for group in groups]))
 	# sum weighted Gini index for each group
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
 		# avoid divide by zero
 		if size == 0:
 			continue
 		score = 0.0
 		# score the group based on the score for each class
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
 		# weight the group score by its relative size
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
 # Select the best split point for a dataset
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
 	for index in range(len(dataset[0])-1):
 		for row in dataset:
 			groups = test_split(index, row[index], dataset)
 			gini = gini_index(groups, class_values)
 			if gini < b_score:
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
 # Create a terminal node value
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
 # Create child splits for a node or make terminal
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
 	# check for a no split
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
 	# check for max depth
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
 	# process left child
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
 	# process right child
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
 # Build a decision tree
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
 # Print a decision tree
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
 		print_tree(node['left'], depth+1)
 		print_tree(node['right'], depth+1)
 	else:
 		print('%s[%s]' % ((depth*' ', node)))
 if __name__ == "__main__":
 	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
 	tree = build_tree(dataset, 1, 1)
 	print_tree(tree)
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,15 +0,0 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Current File",
            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal"
        }
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,3 +0,0 @@
 {
    "python.pythonPath": "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python38-32\\python.exe"
 }
--- a/gini.py
+++ b/gini.py
@ -8,27 +8,27 @@ def test_split(index, value, dataset):
 			right.append(row)
 	return left, right
-# Calculate the Gini index for a split dataset
+
 def gini_index(groups, classes):
-	# count all samples at split point
+	
 	n_instances = float(sum([len(group) for group in groups]))
-	# sum weighted Gini index for each group
+	
 	gini = 0.0
 	for group in groups:
 		size = float(len(group))
-		# avoid divide by zero
+		
 		if size == 0:
 			continue
 		score = 0.0
-		# score the group based on the score for each class
+		
 		for class_val in classes:
 			p = [row[-1] for row in group].count(class_val) / size
 			score += p * p
-		# weight the group score by its relative size
+		
 		gini += (1.0 - score) * (size / n_instances)
 	return gini
-# Select the best split point for a dataset
+
 def get_split(dataset):
 	class_values = list(set(row[-1] for row in dataset))
 	b_index, b_value, b_score, b_groups = 999, 999, 999, None
@ -40,43 +40,43 @@ def get_split(dataset):
 				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
 	return {'index':b_index, 'value':b_value, 'groups':b_groups}
-# Create a terminal node value
+
 def to_terminal(group):
 	outcomes = [row[-1] for row in group]
 	return max(set(outcomes), key=outcomes.count)
-# Create child splits for a node or make terminal
+
 def split(node, max_depth, min_size, depth):
 	left, right = node['groups']
 	del(node['groups'])
-	# check for a no split
+	
 	if not left or not right:
 		node['left'] = node['right'] = to_terminal(left + right)
 		return
-	# check for max depth
+	
 	if depth >= max_depth:
 		node['left'], node['right'] = to_terminal(left), to_terminal(right)
 		return
-	# process left child
+	
 	if len(left) <= min_size:
 		node['left'] = to_terminal(left)
 	else:
 		node['left'] = get_split(left)
 		split(node['left'], max_depth, min_size, depth+1)
-	# process right child
+	
 	if len(right) <= min_size:
 		node['right'] = to_terminal(right)
 	else:
 		node['right'] = get_split(right)
 		split(node['right'], max_depth, min_size, depth+1)
-# Build a decision tree
+
 def build_tree(train, max_depth, min_size):
 	root = get_split(train)
 	split(root, max_depth, min_size, 1)
 	return root
-# Print a decision tree
+
 def print_tree(node, depth=0):
 	if isinstance(node, dict):
 		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))