exercise-2/co.py

90 lines
2.4 KiB
Python

import pandas as pd
import xlsxwriter
from random import uniform
import math
import numpy as np
def calucateMean(df) -> tuple:
mean = []
maximum = []
minimum = []
for col in range(df.shape[1]): # 0 ta 10
mean.append(0)
maximum.append(0)
minimum.append(math.inf)
for row in df:
if (row[col] > maximum[col]):
maximum[col] = row[col]
if (row[col] < minimum[col]):
minimum[col] = row[col]
mean[col] += row[col]
mean[col] = ((int)((mean[col] / len(df))*10000))/10000
v = []
for i in range(df.shape[1]):
v.append(uniform(minimum[i], maximum[i]))
vm = []
for j in range(df.shape[1]):
vm.append(mean[j] - v[j])
return (mean, vm)
def matrixToxls(matrix, filename):
workbook = xlsxwriter.Workbook(filename)
worksheet = workbook.add_worksheet()
row = 0
for col, data in enumerate(matrix):
worksheet.write_column(row, col, data)
workbook.close()
def variance(col) -> list:
return ((int)(sum([cell ** 2 for cell in matrixSubs[col]])*10000))/10000
def covariance(col1, col2):
return ((int)(sum([p*q for p, q in zip(matrixSubs[col1], matrixSubs[col2])])*10000))/10000
def matrixCov():
matrix = []
for col1 in range(len(matrixSubs)):
tmp = []
for col2 in range(len(matrixSubs)):
if (col1 == col2):
tmp.append(variance(col1))
else:
tmp.append(covariance(col1, col2))
matrix.append(tmp)
return matrix
def corrMatrix(matrix):
matrixCorr = []
for col1 in range(len(matrix)):
tmp = []
for col2 in range(len(matrix)):
if (col1 == col2):
tmp.append(1)
else:
tmp.append((matrix[col1][col2]) /
((matrix[col1][col1]*matrix[col2][col2])**0.5))
matrixCorr.append(tmp)
return matrixCorr
if __name__ == "__main__":
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
(mean, vm) = calucateMean(df)
# print(mean)
matrixSubs = []
for col in range(df.shape[1]):
tmp = []
for row in df:
tmp.append(row[col] - mean[col])
matrixSubs.append(tmp)
matrix = matrixCov()
#
CorrelationMatrix = corrMatrix(matrix)
matrixToxls(CorrelationMatrix, "CorrelationMatrix.xls")