python练习代码:
```
# -*- coding: utf-8 -*-
import pandas as pd
from pandas import DataFrame as df
import numpy as np
from functools import reduce
import sys
from scipy import stats
import matplotlib.pyplot as plt
def load_data_from_csv(csv_file_path):
data = pd.read_csv(csv_file_path)
return data
# 算术平均数
def cal_arithmatic_mean(series):
return series.mean()
# 几何平均数
def get_geometric_mean(series):
return pow(reduce(lambda x,y : x*y, series), 1.0 / len(series))
# 加权平均数
def get_weighted_average(series, weights):
return np.average(series, weights = weights)
# 中位数
def get_median(series):
return np.median(series)
# 分位数
def get_percentile(series, percentiles):
return np.percentile(series, percentiles)
# 众数
def get_mode(series):
count_dict = dict()
for num in series:
if not num in count_dict.keys():
count_dict[num] = 0
count_dict[num] += 1
sorted_nums = sorted(count_dict.items(), key = lambda num:num[1], reverse = True)
return sorted_nums[0][0]
# 极差
def get_range(series):
return max(series) - min(series)
# 中程数
def get_midrange(series):
return np.mean([max(series), min(series)])
# 方差
def get_variance(series, ddof): # ddof - 0: 总体方差, 1: 样本方差
return np.var(series, ddof = ddof)
# 标准差
def get_std_deviation(series, ddof): # ddof - 0: 总体标准差, 1: 样本标准差
return np.std(series, ddof = ddof)
# 平均差
def get_avg_deviation(series):
mean = np.mean(series)
sum = 0.0
for num in series:
sum += abs(num - mean)
return sum / len(series)
# 四分位差
def get_4percentile_deviation(series):
percentiles4 = [25, 75]
percentiles4_nums = get_percentile(series, percentiles4)
return percentiles4_nums[1] - percentiles4_nums[0]
# 异众比率
def get_variation_ratio(series):
count_dict = dict()
for num in series:
if not num in count_dict.keys():
count_dict[num] = 0
count_dict[num] += 1
sorted_nums = sorted(count_dict.items(), key = lambda num:num[1], reverse = True)
return (len(series) - sorted_nums[0][1]) * 1.0 / len(series)
# 离散系数
def get_variation_coefficient(series):
std_variation_sample = get_std_deviation(series, 1)
return std_variation_sample / np.mean(series)
# 偏态系数
def get_skew(series):
'''
#plt.hist(series,100,normed=True,facecolor='g',alpha=0.9)
#plt.show()
mean = np.mean(series)
return np.mean((series - mean) ** 3)
'''
return series.skew()
# 峰度系数
def get_kurt(series):
'''
mean = np.mean(series)
var = np.var(series, ddof = 0)
return np.mean((series - mean) ** 4) / pow(var, 2)
'''
return series.kurt()
csv_file_path = sys.argv[1]
data = load_data_from_csv(csv_file_path)
series = data['num6']
#print(series)
weights = np.random.randint(10, size = len(series))
#print(weights)
print('******数据的集中趋势******')
arithmatic_mean = cal_arithmatic_mean(series)
print('算术平均数: %f' % arithmatic_mean)
geometric_mean = get_geometric_mean(series)
print('几何平均数: %f' % geometric_mean)
weighted_average = get_weighted_average(series, weights)
print('加权平均数: %f' % weighted_average)
median = get_median(series)
print('中位数: %f' % median)
percentiles = [25, 50, 75]
percentiles_nums = get_percentile(series, percentiles)
print('四分位数: %s' % percentiles_nums)
mode = get_mode(series)
print('众数: %f' % mode)
range = get_range(series)
print('极差: %f' % range)
midrange = get_midrange(series)
print('中程数: %f' % midrange)
print('******数据的离中趋势******')
variance_total = get_variance(series, 0)
variance_sample = get_variance(series, 1)
print('总体方差: %f,样本方差: %f' % (variance_total, variance_sample))
std_deviation_total = get_std_deviation(series, 0)
std_deviation_sample = get_std_deviation(series, 1)
print('总体标准差: %f, 样本标准差: %f' % (std_deviation_total, std_deviation_sample))
avg_deviation = get_avg_deviation(series)
print('标准差: %f' % avg_deviation)
percentile4_deviation = get_4percentile_deviation(series)
print('四分位差: %f' % percentile4_deviation)
variation_ratio = get_variation_ratio(series)
print('异众比率: %f' % variation_ratio)
print('******数据的相对离散程度******')
variation_coefficient = get_variation_coefficient(series)
print('离散系数: %f' % variation_coefficient)
print('******数据的分布形状******')
skew = get_skew(series)
print('偏态系数: %f' % skew)
kurt = get_kurt(series)
print('峰度系数: %f' % kurt)
```