54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
import pandas as pd
|
|
import xlsxwriter
|
|
from random import uniform
|
|
import math
|
|
import numpy as np
|
|
|
|
|
|
def calucateMean(df) -> tuple:
|
|
mean = []
|
|
maximum = []
|
|
minimum = []
|
|
for col in range(df.shape[1]): # 0 ta 10
|
|
mean.append(0)
|
|
maximum.append(0)
|
|
minimum.append(math.inf)
|
|
for row in df:
|
|
if (row[col] > maximum[col]):
|
|
maximum[col] = row[col]
|
|
if (row[col] < minimum[col]):
|
|
minimum[col] = row[col]
|
|
mean[col] += row[col]
|
|
mean[col] = ((int)((mean[col] / len(df))*10000))/10000
|
|
v = []
|
|
for i in range(df.shape[1]):
|
|
v.append(uniform(minimum[i], maximum[i]))
|
|
vm = []
|
|
for j in range(df.shape[1]):
|
|
vm.append(mean[j] - v[j])
|
|
return (mean, vm)
|
|
|
|
|
|
def matrixToxls(matrix, filename):
|
|
workbook = xlsxwriter.Workbook(filename)
|
|
worksheet = workbook.add_worksheet()
|
|
row = 0
|
|
for col, data in enumerate(matrix):
|
|
worksheet.write_column(row, col, data)
|
|
workbook.close()
|
|
|
|
|
|
def entropy(col):
|
|
total = sum([row[col] for row in df])
|
|
print(total)
|
|
return [-(math.fabs(row[col]) / total*100) * math.log((math.fabs(row[col]) / total*100), 2) for row in df]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
df = pd.read_excel('dataset2.xls', sheet_name="forestfires").to_numpy()
|
|
(mean, vm) = calucateMean(df)
|
|
entropyMatrix = []
|
|
for col in range(df.shape[1]):
|
|
entropyMatrix.append(entropy(col))
|
|
matrixToxls(entropyMatrix, "entropiesMatrix.xls")
|