#import libs
import csv
#dont forget to install matplotlib
import matplotlib.pyplot as plt
#dont forget to install numpy
import numpy as np
import scipy
import math
from scipy import stats
#function to import data from csv
def RawMoment(xs, k):
    return sum(x**k for x in xs) / len(xs)
def find_middle(lst):
    length = len(lst)  # Get the length of the list
    middle_index = length // 2
    return lst[middle_index]
def CentralMoment(xs, k):
    mean = RawMoment(xs, 1)
    return sum((x - mean)**k for x in xs) / len(xs)
def StandardizedMoment(xs, k):
    var = CentralMoment(xs, 2)
    std = math.sqrt(var)
    return CentralMoment(xs, k) / std**k
def Skewness(xs):
    return StandardizedMoment(xs, 3)
def ecdf(a):
    x, counts = np.unique(a, return_counts=True)
    cusum = np.cumsum(counts)
    return x, cusum / cusum[-1]     
def Median(xs):
    cdf = ecdf(xs)
    return find_middle(cdf[0])
def PearsonMedianSkewness(xs):
    median = Median(xs)
    mean = RawMoment(xs, 1)
    var = CentralMoment(xs, 2)
    std = math.sqrt(var)
    gp = 3 * (mean - median) / std
    return gp

def importData(filename, column):
    dataArray=[]
    with open(filename, 'r') as file:
        csvreader = csv.reader(file)
        i=0
        for row in csvreader:
            if(i>3):          
            	dataArray.append(float(row[column-1].replace(',', '.')))
            i+=1
        return dataArray

def filter_zeros(array):
	resultArray=[]
	for i in range(0, len(array)):
		if (array[i]!=0):
			resultArray.append(array[i])
	return resultArray		    	
#calling function			                                
#dataArray1=importData('./datapv.csv',16)
dataArray1=filter_zeros(importData('./datapv.csv',9))
mu, sigma = np.mean(dataArray1), np.std(dataArray1) # mean and standard deviation
dataArray2 = np.random.normal(mu, sigma, len(dataArray1))
eval_points = np.linspace(np.min(dataArray1), np.max(dataArray1))
kde1=scipy.stats.gaussian_kde(dataArray1)
print (PearsonMedianSkewness(dataArray1))
y1 = kde1.pdf(eval_points)
kde2=scipy.stats.gaussian_kde(dataArray2)
y2 = kde2.pdf(eval_points)
plt.plot(eval_points, y1)
plt.plot(eval_points, y2)
plt.grid(True)
plt.show()
#dataArray=filter_zeros(importData('./datapv.csv',16))
#get_percentiles(dataArray)