90 lines
2.4 KiB
Python
90 lines
2.4 KiB
Python
import pandas as pd
|
|
import xlsxwriter
|
|
from random import uniform
|
|
import math
|
|
import numpy as np
|
|
|
|
|
|
def calucateMean(df) -> tuple:
|
|
mean = []
|
|
maximum = []
|
|
minimum = []
|
|
for col in range(df.shape[1]): # 0 ta 10
|
|
mean.append(0)
|
|
maximum.append(0)
|
|
minimum.append(math.inf)
|
|
for row in df:
|
|
if (row[col] > maximum[col]):
|
|
maximum[col] = row[col]
|
|
if (row[col] < minimum[col]):
|
|
minimum[col] = row[col]
|
|
mean[col] += row[col]
|
|
mean[col] = ((int)((mean[col] / len(df))*10000))/10000
|
|
v = []
|
|
for i in range(df.shape[1]):
|
|
v.append(uniform(minimum[i], maximum[i]))
|
|
vm = []
|
|
for j in range(df.shape[1]):
|
|
vm.append(mean[j] - v[j])
|
|
return (mean, vm)
|
|
|
|
|
|
def matrixToxls(matrix, filename):
|
|
workbook = xlsxwriter.Workbook(filename)
|
|
worksheet = workbook.add_worksheet()
|
|
row = 0
|
|
for col, data in enumerate(matrix):
|
|
worksheet.write_column(row, col, data)
|
|
workbook.close()
|
|
|
|
|
|
def variance(col) -> list:
|
|
return ((int)(sum([cell ** 2 for cell in matrixSubs[col]])*10000))/10000
|
|
|
|
|
|
def covariance(col1, col2):
|
|
return ((int)(sum([p*q for p, q in zip(matrixSubs[col1], matrixSubs[col2])])*10000))/10000
|
|
|
|
|
|
def matrixCov():
|
|
matrix = []
|
|
for col1 in range(len(matrixSubs)):
|
|
tmp = []
|
|
for col2 in range(len(matrixSubs)):
|
|
if (col1 == col2):
|
|
tmp.append(variance(col1))
|
|
else:
|
|
tmp.append(covariance(col1, col2))
|
|
matrix.append(tmp)
|
|
return matrix
|
|
|
|
|
|
def corrMatrix(matrix):
|
|
matrixCorr = []
|
|
for col1 in range(len(matrix)):
|
|
tmp = []
|
|
for col2 in range(len(matrix)):
|
|
if (col1 == col2):
|
|
tmp.append(1)
|
|
else:
|
|
tmp.append((matrix[col1][col2]) /
|
|
((matrix[col1][col1]*matrix[col2][col2])**0.5))
|
|
matrixCorr.append(tmp)
|
|
return matrixCorr
|
|
|
|
|
|
if __name__ == "__main__":
|
|
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
|
|
(mean, vm) = calucateMean(df)
|
|
# print(mean)
|
|
matrixSubs = []
|
|
for col in range(df.shape[1]):
|
|
tmp = []
|
|
for row in df:
|
|
tmp.append(row[col] - mean[col])
|
|
matrixSubs.append(tmp)
|
|
matrix = matrixCov()
|
|
#
|
|
CorrelationMatrix = corrMatrix(matrix)
|
|
matrixToxls(CorrelationMatrix, "CorrelationMatrix.xls")
|