diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b0e92f --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.vscode +.history +.__pycache__ \ No newline at end of file diff --git a/.history/app_20201231195755.py b/.history/app_20201231195755.py deleted file mode 100644 index e69de29..0000000 diff --git a/.history/app_20201231200318.py b/.history/app_20201231200318.py deleted file mode 100644 index 0377641..0000000 --- a/.history/app_20201231200318.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.cross_validation import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - balance_data = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy() - - # Printing the dataswet shape - print ("Dataset Length: ", len(balance_data)) - print ("Dataset Shape: ", balance_data.shape) - - # Printing the dataset obseravtions - print ("Dataset: ",balance_data.head()) - return balance_data - -# Function to split the dataset -def splitdataset(balance_data): - - # Separating the target variable - X = balance_data.values[:, 1:5] - Y = balance_data.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.history/app_20201231201128.py b/.history/app_20201231201128.py deleted file mode 100644 index 735243c..0000000 --- a/.history/app_20201231201128.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - balance_data = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy() - - # Printing the dataswet shape - print ("Dataset Length: ", len(balance_data)) - print ("Dataset Shape: ", balance_data.shape) - - # Printing the dataset obseravtions - print ("Dataset: ",balance_data.head()) - return balance_data - -# Function to split the dataset -def splitdataset(balance_data): - - # Separating the target variable - X = balance_data.values[:, 1:5] - Y = balance_data.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.history/app_20201231201209.py b/.history/app_20201231201209.py deleted file mode 100644 index c5ce610..0000000 --- a/.history/app_20201231201209.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - balance_data = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy() - return balance_data - -# Function to split the dataset -def splitdataset(balance_data): - - # Separating the target variable - X = balance_data.values[:, 1:5] - Y = balance_data.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.history/app_20201231201258.py b/.history/app_20201231201258.py deleted file mode 100644 index 011819d..0000000 --- a/.history/app_20201231201258.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - dataset = pd.read_excel('Train.xlsx', sheet_name="Sheet1").to_numpy() - return dataset - -# Function to split the dataset -def splitdataset(dataset): - - # Separating the target variable - X = dataset.values[:, 1:5] - Y = dataset.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.history/app_20201231201711.py b/.history/app_20201231201711.py deleted file mode 100644 index 93e927b..0000000 --- a/.history/app_20201231201711.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - balance_data = pd.read_csv( 'Train.csv',sep= ',', header = None) - return dataset - -# Function to split the dataset -def splitdataset(dataset): - - # Separating the target variable - X = dataset.values[:, 1:5] - Y = dataset.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.history/app_20201231201714.py b/.history/app_20201231201714.py deleted file mode 100644 index dadd4e6..0000000 --- a/.history/app_20201231201714.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.metrics import confusion_matrix -from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import accuracy_score -from sklearn.metrics import classification_report - -# Function importing Dataset -def importdata(): - dataset = pd.read_csv( 'Train.csv',sep= ',', header = None) - return dataset - -# Function to split the dataset -def splitdataset(dataset): - - # Separating the target variable - X = dataset.values[:, 1:5] - Y = dataset.values[:, 0] - - # Splitting the dataset into train and test - X_train, X_test, y_train, y_test = train_test_split( - X, Y, test_size = 0.3, random_state = 100) - - return X, Y, X_train, X_test, y_train, y_test - -# Function to perform training with giniIndex. -def train_using_gini(X_train, X_test, y_train): - - # Creating the classifier object - clf_gini = DecisionTreeClassifier(criterion = "gini", - random_state = 100,max_depth=3, min_samples_leaf=5) - - # Performing training - clf_gini.fit(X_train, y_train) - return clf_gini - -# Function to perform training with entropy. -def tarin_using_entropy(X_train, X_test, y_train): - - # Decision tree with entropy - clf_entropy = DecisionTreeClassifier( - criterion = "entropy", random_state = 100, - max_depth = 3, min_samples_leaf = 5) - - # Performing training - clf_entropy.fit(X_train, y_train) - return clf_entropy - - -# Function to make predictions -def prediction(X_test, clf_object): - - # Predicton on test with giniIndex - y_pred = clf_object.predict(X_test) - print("Predicted values:") - print(y_pred) - return y_pred - -# Function to calculate accuracy -def cal_accuracy(y_test, y_pred): - - print("Confusion Matrix: ", - confusion_matrix(y_test, y_pred)) - - print ("Accuracy : ", - accuracy_score(y_test,y_pred)*100) - - print("Report : ", - classification_report(y_test, y_pred)) - -# Driver code -def main(): - - # Building Phase - data = importdata() - X, Y, X_train, X_test, y_train, y_test = splitdataset(data) - clf_gini = train_using_gini(X_train, X_test, y_train) - clf_entropy = tarin_using_entropy(X_train, X_test, y_train) - - # Operational Phase - print("Results Using Gini Index:") - - # Prediction using gini - y_pred_gini = prediction(X_test, clf_gini) - cal_accuracy(y_test, y_pred_gini) - - print("Results Using Entropy:") - # Prediction using entropy - y_pred_entropy = prediction(X_test, clf_entropy) - cal_accuracy(y_test, y_pred_entropy) - - -# Calling main function -if __name__=="__main__": - main() \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index c8a63ca..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "C:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python38-32\\python.exe" -} \ No newline at end of file