exercise-2/mahal.py

83 lines
2.0 KiB
Python

import pandas as pd
import xlsxwriter
from random import uniform
import math
import numpy as np
def calucateMean(df) -> tuple:
mean = []
maximum = []
minimum = []
for col in range(df.shape[1]): # 0 ta 10
mean.append(0)
maximum.append(0)
minimum.append(math.inf)
for row in df:
if (row[col] > maximum[col]):
maximum[col] = row[col]
if (row[col] < minimum[col]):
minimum[col] = row[col]
mean[col] += row[col]
mean[col] = mean[col] / len(df)
v = []
for i in range(df.shape[1]):
v.append(uniform(minimum[i], maximum[i]))
vm = []
for j in range(df.shape[1]):
vm.append(mean[j] - v[j])
return (mean, vm)
def variance(col, mean) -> list:
var = 0
for row in df:
var += (row[col] - mean[col]) ** 2
var /= (df.shape[0]-1)
return var
def covariance(col1, col2, mean):
cov = 0
for row in df:
cov += (row[col1] - mean[col1])*(row[col2] - mean[col2])
cov /= (df.shape[0] - 1)
return cov
def matrixCov(df, mean):
matrix = []
for col1 in range(df.shape[1]):
tmp = []
for col2 in range(df.shape[1]):
if (col1 == col2):
tmp.append(variance(col1, mean))
else:
tmp.append(covariance(col1, col2, mean))
matrix.append(tmp)
return matrix
def multiply(matrix1, matrix2):
matrix = [0 for i in range(len(matrix1))]
col = 0
for i in range(len(matrix1)):
for j in range(matrix2.shape[1]):
matrix[i] = matrix1[i] * matrix2[i][col]
col += 1
return matrix
def multiplyTwoList(list1, list2):
return sum([i*j for i, j in zip(list1, list2)])
if __name__ == "__main__":
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
(mean, vm) = calucateMean(df)
matrix = matrixCov(df, mean)
invc_matrix = np.linalg.inv(matrix)
res = multiply(vm, invc_matrix)
res = multiplyTwoList(res, vm)
print(res**0.5)