diff --git a/.gitignore b/.gitignore index 32378b8..2d4aa43 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ __pycache__/ *.py[cod] .history *$py.class -eculidean.* +*.xls # C extensions *.so diff --git a/co.py b/co.py new file mode 100644 index 0000000..a0758c5 --- /dev/null +++ b/co.py @@ -0,0 +1,89 @@ +import pandas as pd +import xlsxwriter +from random import uniform +import math +import numpy as np + + +def calucateMean(df) -> tuple: + mean = [] + maximum = [] + minimum = [] + for col in range(df.shape[1]): # 0 ta 10 + mean.append(0) + maximum.append(0) + minimum.append(math.inf) + for row in df: + if (row[col] > maximum[col]): + maximum[col] = row[col] + if (row[col] < minimum[col]): + minimum[col] = row[col] + mean[col] += row[col] + mean[col] = ((int)((mean[col] / len(df))*10000))/10000 + v = [] + for i in range(df.shape[1]): + v.append(uniform(minimum[i], maximum[i])) + vm = [] + for j in range(df.shape[1]): + vm.append(mean[j] - v[j]) + return (mean, vm) + + +def matrixToxls(matrix, filename): + workbook = xlsxwriter.Workbook(filename) + worksheet = workbook.add_worksheet() + row = 0 + for col, data in enumerate(matrix): + worksheet.write_column(row, col, data) + workbook.close() + + +def variance(col) -> list: + return ((int)(sum([cell ** 2 for cell in matrixSubs[col]])*10000))/10000 + + +def covariance(col1, col2): + return ((int)(sum([p*q for p, q in zip(matrixSubs[col1], matrixSubs[col2])])*10000))/10000 + + +def matrixCov(): + matrix = [] + for col1 in range(len(matrixSubs)): + tmp = [] + for col2 in range(len(matrixSubs)): + if (col1 == col2): + tmp.append(variance(col1)) + else: + tmp.append(covariance(col1, col2)) + matrix.append(tmp) + return matrix + + +def corrMatrix(matrix): + matrixCorr = [] + for col1 in range(len(matrix)): + tmp = [] + for col2 in range(len(matrix)): + if (col1 == col2): + tmp.append(1) + else: + tmp.append((matrix[col1][col2]) / + ((matrix[col1][col1]*matrix[col2][col2])**0.5)) + matrixCorr.append(tmp) + return matrixCorr + + +if __name__ == "__main__": + df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy() + (mean, vm) = calucateMean(df) + # print(mean) + matrixSubs = [] + for col in range(df.shape[1]): + tmp = [] + for row in df: + tmp.append(row[col] - mean[col]) + matrixSubs.append(tmp) + matrix = matrixCov() + # + CorrelationMatrix = corrMatrix(matrix) + matrixToxls(CorrelationMatrix, "CorrelationMatrix.xls") diff --git a/entropy.py b/entropy.py new file mode 100644 index 0000000..ade16c2 --- /dev/null +++ b/entropy.py @@ -0,0 +1,53 @@ +import pandas as pd +import xlsxwriter +from random import uniform +import math +import numpy as np + + +def calucateMean(df) -> tuple: + mean = [] + maximum = [] + minimum = [] + for col in range(df.shape[1]): # 0 ta 10 + mean.append(0) + maximum.append(0) + minimum.append(math.inf) + for row in df: + if (row[col] > maximum[col]): + maximum[col] = row[col] + if (row[col] < minimum[col]): + minimum[col] = row[col] + mean[col] += row[col] + mean[col] = ((int)((mean[col] / len(df))*10000))/10000 + v = [] + for i in range(df.shape[1]): + v.append(uniform(minimum[i], maximum[i])) + vm = [] + for j in range(df.shape[1]): + vm.append(mean[j] - v[j]) + return (mean, vm) + + +def matrixToxls(matrix, filename): + workbook = xlsxwriter.Workbook(filename) + worksheet = workbook.add_worksheet() + row = 0 + for col, data in enumerate(matrix): + worksheet.write_column(row, col, data) + workbook.close() + + +def entropy(col): + total = sum([row[col] for row in df]) + print(total) + return [-(math.fabs(row[col]) / total*100) * math.log((math.fabs(row[col]) / total*100), 2) for row in df] + + +if __name__ == "__main__": + df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy() + (mean, vm) = calucateMean(df) + entropyMatrix = [] + for col in range(df.shape[1]): + entropyMatrix.append(entropy(col)) + matrixToxls(entropyMatrix, "entropiesMatrix.xls")