import pandas as pd import xlsxwriter from random import uniform import math import numpy as np def calucateMean(df) -> tuple: mean = [] maximum = [] minimum = [] for col in range(df.shape[1]): # 0 ta 10 mean.append(0) maximum.append(0) minimum.append(math.inf) for row in df: if (row[col] > maximum[col]): maximum[col] = row[col] if (row[col] < minimum[col]): minimum[col] = row[col] mean[col] += row[col] mean[col] = ((int)((mean[col] / len(df))*10000))/10000 v = [] for i in range(df.shape[1]): v.append(uniform(minimum[i], maximum[i])) vm = [] for j in range(df.shape[1]): vm.append(mean[j] - v[j]) return (mean, vm) def matrixToxls(matrix, filename): workbook = xlsxwriter.Workbook(filename) worksheet = workbook.add_worksheet() row = 0 for col, data in enumerate(matrix): worksheet.write_column(row, col, data) workbook.close() def variance(col) -> list: return ((int)(sum([cell ** 2 for cell in matrixSubs[col]])*10000))/10000 def covariance(col1, col2): return ((int)(sum([p*q for p, q in zip(matrixSubs[col1], matrixSubs[col2])])*10000))/10000 def matrixCov(): matrix = [] for col1 in range(len(matrixSubs)): tmp = [] for col2 in range(len(matrixSubs)): if (col1 == col2): tmp.append(variance(col1)) else: tmp.append(covariance(col1, col2)) matrix.append(tmp) return matrix def corrMatrix(matrix): matrixCorr = [] for col1 in range(len(matrix)): tmp = [] for col2 in range(len(matrix)): if (col1 == col2): tmp.append(1) else: tmp.append((matrix[col1][col2]) / ((matrix[col1][col1]*matrix[col2][col2])**0.5)) matrixCorr.append(tmp) return matrixCorr if __name__ == "__main__": df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy() (mean, vm) = calucateMean(df) # print(mean) matrixSubs = [] for col in range(df.shape[1]): tmp = [] for row in df: tmp.append(row[col] - mean[col]) matrixSubs.append(tmp) matrix = matrixCov() # CorrelationMatrix = corrMatrix(matrix) matrixToxls(CorrelationMatrix, "CorrelationMatrix.xls")