Initial Commit

2020-12-25 16:33:49 +03:30 · 2020-12-25 16:33:49 +03:30 · 51f4670f60
commit 51f4670f60
40 changed files with 1523 additions and 0 deletions
--- a/.history/app_20201225152542.py
+++ b/.history/app_20201225152542.py
--- a/.history/app_20201225152608.py
+++ b/.history/app_20201225152608.py
@ -0,0 +1,21 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+    
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225152810.py
+++ b/.history/app_20201225152810.py
@ -0,0 +1,21 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225152934.py
+++ b/.history/app_20201225152934.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+    print(classes)
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153009.py
+++ b/.history/app_20201225153009.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+    print(classes)
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153016.py
+++ b/.history/app_20201225153016.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+   
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153034.py
+++ b/.history/app_20201225153034.py
@ -0,0 +1,23 @@
+def gini_index(groups, classes):
+    print(groups)
+    print(classes)
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153041.py
+++ b/.history/app_20201225153041.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+    print(groups)
+    print(classes)
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153057.py
+++ b/.history/app_20201225153057.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+    print(groups)
+     print(classes)
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153107.py
+++ b/.history/app_20201225153107.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+    print(groups)
+    print(classes)
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153134.py
+++ b/.history/app_20201225153134.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+    print(groups)
+    print(classes) 
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153219.py
+++ b/.history/app_20201225153219.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153710.py
+++ b/.history/app_20201225153710.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153815.py
+++ b/.history/app_20201225153815.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+    
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153906.py
+++ b/.history/app_20201225153906.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153913.py
+++ b/.history/app_20201225153913.py
@ -0,0 +1,23 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225153932.py
+++ b/.history/app_20201225153932.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154020.py
+++ b/.history/app_20201225154020.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154028.py
+++ b/.history/app_20201225154028.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+             for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154033.py
+++ b/.history/app_20201225154033.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group:
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154115.py
+++ b/.history/app_20201225154115.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+            for row in group: 
+                print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154141.py
+++ b/.history/app_20201225154141.py
@ -0,0 +1,24 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			for row in group:
+				print(row[-1])
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154223.py
+++ b/.history/app_20201225154223.py
@ -0,0 +1,23 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154225.py
+++ b/.history/app_20201225154225.py
@ -0,0 +1,22 @@
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# test Gini values
+print(gini_index([[[1, 1], [1, 0]], [[1, 1], [1, 0]]], [0, 1]))
+print(gini_index([[[1, 0], [1, 0]], [[1, 1], [1, 1]]], [0, 1]))
--- a/.history/app_20201225154640.py
+++ b/.history/app_20201225154640.py
@ -0,0 +1,54 @@
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+dataset = [[2.771244718,1.784783929,0],
+	[1.728571309,1.169761413,0],
+	[3.678319846,2.81281357,0],
+	[3.961043357,2.61995032,0],
+	[2.999208922,2.209014212,0],
+	[7.497545867,3.162953546,1],
+	[9.00220326,3.339047188,1],
+	[7.444542326,0.476683375,1],
+	[10.12493903,3.234550982,1],
+	[6.642287351,3.319983761,1]]
+split = get_split(dataset)
+print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161344.py
+++ b/.history/app_20201225161344.py
@ -0,0 +1,48 @@
+import pandas as pd
+
+
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+dataset = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
+split = get_split(dataset)
+print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161529.py
+++ b/.history/app_20201225161529.py
@ -0,0 +1,48 @@
+import pandas as pd
+
+
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+dataset = pd.read_excel('train.xls', sheet_name="Sheet1").to_numpy()
+split = get_split(dataset)
+print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/app_20201225161542.py
+++ b/.history/app_20201225161542.py
@ -0,0 +1,48 @@
+import pandas as pd
+
+
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+split = get_split(dataset)
+print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/gini_20201225161914.py
+++ b/.history/gini_20201225161914.py
@ -0,0 +1,48 @@
+import pandas as pd
+
+
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+split = get_split(dataset)
+print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
--- a/.history/gini_20201225162123.py
+++ b/.history/gini_20201225162123.py
@ -0,0 +1,90 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+tree = build_tree(dataset, 1, 1)
+print_tree(tree)
--- a/.history/gini_20201225162607.py
+++ b/.history/gini_20201225162607.py
@ -0,0 +1,90 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+tree = build_tree(dataset, 5, 1)
+print_tree(tree)
--- a/.history/gini_20201225162804.py
+++ b/.history/gini_20201225162804.py
@ -0,0 +1,90 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+tree = build_tree(dataset, 5, 1)
+print_tree(tree)
--- a/.history/gini_20201225162859.py
+++ b/.history/gini_20201225162859.py
@ -0,0 +1,90 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+tree = build_tree(dataset, 5, 0)
+print_tree(tree)
--- a/.history/gini_20201225163004.py
+++ b/.history/gini_20201225163004.py
@ -0,0 +1,92 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+
+if __name__ == "__main__":
+	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+	tree = build_tree(dataset, 5, 0)
+	print_tree(tree)
--- a/.history/gini_20201225163146.py
+++ b/.history/gini_20201225163146.py
@ -0,0 +1,92 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+
+if __name__ == "__main__":
+	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+	tree = build_tree(dataset, 1, 1)
+	print_tree(tree)
--- a/.history/gini_20201225163343.py
+++ b/.history/gini_20201225163343.py
@ -0,0 +1,92 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+
+if __name__ == "__main__":
+	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+	tree = build_tree(dataset, 1, 1)
+	print_tree(tree)
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python38-32\\python.exe"
+}
--- a/Train.xlsx
+++ b/Train.xlsx
--- a/gini.py
+++ b/gini.py
@ -0,0 +1,92 @@
+import pandas as pd
+def test_split(index, value, dataset):
+	left, right = list(), list()
+	for row in dataset:
+		if row[index] < value:
+			left.append(row)
+		else:
+			right.append(row)
+	return left, right
+ 
+# Calculate the Gini index for a split dataset
+def gini_index(groups, classes):
+	# count all samples at split point
+	n_instances = float(sum([len(group) for group in groups]))
+	# sum weighted Gini index for each group
+	gini = 0.0
+	for group in groups:
+		size = float(len(group))
+		# avoid divide by zero
+		if size == 0:
+			continue
+		score = 0.0
+		# score the group based on the score for each class
+		for class_val in classes:
+			p = [row[-1] for row in group].count(class_val) / size
+			score += p * p
+		# weight the group score by its relative size
+		gini += (1.0 - score) * (size / n_instances)
+	return gini
+ 
+# Select the best split point for a dataset
+def get_split(dataset):
+	class_values = list(set(row[-1] for row in dataset))
+	b_index, b_value, b_score, b_groups = 999, 999, 999, None
+	for index in range(len(dataset[0])-1):
+		for row in dataset:
+			groups = test_split(index, row[index], dataset)
+			gini = gini_index(groups, class_values)
+			if gini < b_score:
+				b_index, b_value, b_score, b_groups = index, row[index], gini, groups
+	return {'index':b_index, 'value':b_value, 'groups':b_groups}
+ 
+# Create a terminal node value
+def to_terminal(group):
+	outcomes = [row[-1] for row in group]
+	return max(set(outcomes), key=outcomes.count)
+ 
+# Create child splits for a node or make terminal
+def split(node, max_depth, min_size, depth):
+	left, right = node['groups']
+	del(node['groups'])
+	# check for a no split
+	if not left or not right:
+		node['left'] = node['right'] = to_terminal(left + right)
+		return
+	# check for max depth
+	if depth >= max_depth:
+		node['left'], node['right'] = to_terminal(left), to_terminal(right)
+		return
+	# process left child
+	if len(left) <= min_size:
+		node['left'] = to_terminal(left)
+	else:
+		node['left'] = get_split(left)
+		split(node['left'], max_depth, min_size, depth+1)
+	# process right child
+	if len(right) <= min_size:
+		node['right'] = to_terminal(right)
+	else:
+		node['right'] = get_split(right)
+		split(node['right'], max_depth, min_size, depth+1)
+ 
+# Build a decision tree
+def build_tree(train, max_depth, min_size):
+	root = get_split(train)
+	split(root, max_depth, min_size, 1)
+	return root
+ 
+# Print a decision tree
+def print_tree(node, depth=0):
+	if isinstance(node, dict):
+		print('%s[X%d < %.3f]' % ((depth*' ', (node['index']+1), node['value'])))
+		print_tree(node['left'], depth+1)
+		print_tree(node['right'], depth+1)
+	else:
+		print('%s[%s]' % ((depth*' ', node)))
+ 
+
+if __name__ == "__main__":
+	dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy()
+	tree = build_tree(dataset, 1, 1)
+	print_tree(tree)