diff --git a/dataset2.xls b/dataset2.xls index 57175ea..839d5bc 100644 Binary files a/dataset2.xls and b/dataset2.xls differ diff --git a/exec.py b/exec.py index 7a70819..92fdefb 100644 --- a/exec.py +++ b/exec.py @@ -2,13 +2,6 @@ import pandas as pd import xlsxwriter -def exulidean(row1, row2): - s = 0 - for p, q in zip(row1, row2): - s += ((p - q) ** 2) ** 0.5 # 421.30 - return s - - def matrixToxls(matrix, filename): workbook = xlsxwriter.Workbook(filename) worksheet = workbook.add_worksheet() @@ -18,14 +11,19 @@ def matrixToxls(matrix, filename): workbook.close() -resultEculidean = [] +def exulidean(row1, row2): + s = 0 + for p, q in zip(row1, row2): + s += ((p - q) ** 2) + return s ** 0.5 + if __name__ == "__main__": df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy() + resultEculidean = [] for row in df: tmp = [] for row2 in df: - # [12312,3123,21321,3543,667,313,21,4,346,54,745,6........] - tmp.append(exulidean(row, row2)) + tmp.append(exulidean(row, row2)) # [0,x, ] resultEculidean.append(tmp) matrixToxls(resultEculidean, "eculidean.xls") diff --git a/mahal.py b/mahal.py new file mode 100644 index 0000000..299ea72 --- /dev/null +++ b/mahal.py @@ -0,0 +1,82 @@ +import pandas as pd +import xlsxwriter +from random import uniform +import math +import numpy as np + + +def calucateMean(df) -> tuple: + mean = [] + maximum = [] + minimum = [] + for col in range(df.shape[1]): # 0 ta 10 + mean.append(0) + maximum.append(0) + minimum.append(math.inf) + for row in df: + if (row[col] > maximum[col]): + maximum[col] = row[col] + if (row[col] < minimum[col]): + minimum[col] = row[col] + mean[col] += row[col] + mean[col] = mean[col] / len(df) + v = [] + for i in range(df.shape[1]): + v.append(uniform(minimum[i], maximum[i])) + vm = [] + for j in range(df.shape[1]): + vm.append(mean[j] - v[j]) + return (mean, vm) + + +def variance(col, mean) -> list: + var = 0 + for row in df: + var += (row[col] - mean[col]) ** 2 + var /= (df.shape[0]-1) + return var + + +def covariance(col1, col2, mean): + cov = 0 + for row in df: + cov += (row[col1] - mean[col1])*(row[col2] - mean[col2]) + cov /= (df.shape[0] - 1) + return cov + + +def matrixCov(df, mean): + matrix = [] + for col1 in range(df.shape[1]): + tmp = [] + for col2 in range(df.shape[1]): + if (col1 == col2): + tmp.append(variance(col1, mean)) + else: + tmp.append(covariance(col1, col2, mean)) + matrix.append(tmp) + return matrix + + +def multiply(matrix1, matrix2): + matrix = [0 for i in range(len(matrix1))] + col = 0 + for i in range(len(matrix1)): + for j in range(matrix2.shape[1]): + matrix[i] = matrix1[i] * matrix2[i][col] + col += 1 + return matrix + + +def multiplyTwoList(list1, list2): + return sum([i*j for i, j in zip(list1, list2)]) + + +if __name__ == "__main__": + df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy() + (mean, vm) = calucateMean(df) + matrix = matrixCov(df, mean) + invc_matrix = np.linalg.inv(matrix) + res = multiply(vm, invc_matrix) + res = multiplyTwoList(res, vm) + print(res**0.5)