corr
parent
1c8a83fdad
commit
6196f7e42e
|
@ -8,7 +8,7 @@ __pycache__/
|
|||
*.py[cod]
|
||||
.history
|
||||
*$py.class
|
||||
eculidean.*
|
||||
*.xls
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
import pandas as pd
|
||||
import xlsxwriter
|
||||
from random import uniform
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def calucateMean(df) -> tuple:
|
||||
mean = []
|
||||
maximum = []
|
||||
minimum = []
|
||||
for col in range(df.shape[1]): # 0 ta 10
|
||||
mean.append(0)
|
||||
maximum.append(0)
|
||||
minimum.append(math.inf)
|
||||
for row in df:
|
||||
if (row[col] > maximum[col]):
|
||||
maximum[col] = row[col]
|
||||
if (row[col] < minimum[col]):
|
||||
minimum[col] = row[col]
|
||||
mean[col] += row[col]
|
||||
mean[col] = ((int)((mean[col] / len(df))*10000))/10000
|
||||
v = []
|
||||
for i in range(df.shape[1]):
|
||||
v.append(uniform(minimum[i], maximum[i]))
|
||||
vm = []
|
||||
for j in range(df.shape[1]):
|
||||
vm.append(mean[j] - v[j])
|
||||
return (mean, vm)
|
||||
|
||||
|
||||
def matrixToxls(matrix, filename):
|
||||
workbook = xlsxwriter.Workbook(filename)
|
||||
worksheet = workbook.add_worksheet()
|
||||
row = 0
|
||||
for col, data in enumerate(matrix):
|
||||
worksheet.write_column(row, col, data)
|
||||
workbook.close()
|
||||
|
||||
|
||||
def variance(col) -> list:
|
||||
return ((int)(sum([cell ** 2 for cell in matrixSubs[col]])*10000))/10000
|
||||
|
||||
|
||||
def covariance(col1, col2):
|
||||
return ((int)(sum([p*q for p, q in zip(matrixSubs[col1], matrixSubs[col2])])*10000))/10000
|
||||
|
||||
|
||||
def matrixCov():
|
||||
matrix = []
|
||||
for col1 in range(len(matrixSubs)):
|
||||
tmp = []
|
||||
for col2 in range(len(matrixSubs)):
|
||||
if (col1 == col2):
|
||||
tmp.append(variance(col1))
|
||||
else:
|
||||
tmp.append(covariance(col1, col2))
|
||||
matrix.append(tmp)
|
||||
return matrix
|
||||
|
||||
|
||||
def corrMatrix(matrix):
|
||||
matrixCorr = []
|
||||
for col1 in range(len(matrix)):
|
||||
tmp = []
|
||||
for col2 in range(len(matrix)):
|
||||
if (col1 == col2):
|
||||
tmp.append(1)
|
||||
else:
|
||||
tmp.append((matrix[col1][col2]) /
|
||||
((matrix[col1][col1]*matrix[col2][col2])**0.5))
|
||||
matrixCorr.append(tmp)
|
||||
return matrixCorr
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
|
||||
(mean, vm) = calucateMean(df)
|
||||
# print(mean)
|
||||
matrixSubs = []
|
||||
for col in range(df.shape[1]):
|
||||
tmp = []
|
||||
for row in df:
|
||||
tmp.append(row[col] - mean[col])
|
||||
matrixSubs.append(tmp)
|
||||
matrix = matrixCov()
|
||||
#
|
||||
CorrelationMatrix = corrMatrix(matrix)
|
||||
matrixToxls(CorrelationMatrix, "CorrelationMatrix.xls")
|
|
@ -0,0 +1,53 @@
|
|||
import pandas as pd
|
||||
import xlsxwriter
|
||||
from random import uniform
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def calucateMean(df) -> tuple:
|
||||
mean = []
|
||||
maximum = []
|
||||
minimum = []
|
||||
for col in range(df.shape[1]): # 0 ta 10
|
||||
mean.append(0)
|
||||
maximum.append(0)
|
||||
minimum.append(math.inf)
|
||||
for row in df:
|
||||
if (row[col] > maximum[col]):
|
||||
maximum[col] = row[col]
|
||||
if (row[col] < minimum[col]):
|
||||
minimum[col] = row[col]
|
||||
mean[col] += row[col]
|
||||
mean[col] = ((int)((mean[col] / len(df))*10000))/10000
|
||||
v = []
|
||||
for i in range(df.shape[1]):
|
||||
v.append(uniform(minimum[i], maximum[i]))
|
||||
vm = []
|
||||
for j in range(df.shape[1]):
|
||||
vm.append(mean[j] - v[j])
|
||||
return (mean, vm)
|
||||
|
||||
|
||||
def matrixToxls(matrix, filename):
|
||||
workbook = xlsxwriter.Workbook(filename)
|
||||
worksheet = workbook.add_worksheet()
|
||||
row = 0
|
||||
for col, data in enumerate(matrix):
|
||||
worksheet.write_column(row, col, data)
|
||||
workbook.close()
|
||||
|
||||
|
||||
def entropy(col):
|
||||
total = sum([row[col] for row in df])
|
||||
print(total)
|
||||
return [-(math.fabs(row[col]) / total*100) * math.log((math.fabs(row[col]) / total*100), 2) for row in df]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
|
||||
(mean, vm) = calucateMean(df)
|
||||
entropyMatrix = []
|
||||
for col in range(df.shape[1]):
|
||||
entropyMatrix.append(entropy(col))
|
||||
matrixToxls(entropyMatrix, "entropiesMatrix.xls")
|
Loading…
Reference in New Issue