import pandas as pd
# Series数组
pd1 = pd.Series([1,2,3])
print(pd1)
print(pd1.sum()) # 和
print(pd1.std()) # 标准差
# 索引 默认是 0 1 2
p1 =pd.Series([1,2,3],index=['a','b','c'])
print(p1)
# DataFrame数组
p1 =pd.DataFrame({'name':['Jack','Lucy','Coke'],
'age':[18,19,21]})
print(p1)
# age name
# 0 18 Jack
# 1 19 Lucy
# 2 21 Coke
# 获取一列
print(p1['name'])
print(p1['age'])
print(p1['name'][0])
# 0 Jack
# 1 Lucy
# 2 Coke
# Name: name, dtype: object
# 0 18
# 1 19
# 2 21
# Name: age, dtype: int64
# 获取一行
print(p1.iloc[0])
# --apply 封装函数
def func(value):
return value * 3
pd1 = pd.Series([1, 2, 5])
print(pd1.apply(func))
# 计算行或者列
pd2 = pd.DataFrame({
'weight': [120, 130, 150],
'age': [18, 19, 21]
})
print(pd2.sum(axis='columns'))
print(pd2.sum(axis='index'))
# 分组
pd2 = pd.DataFrame({
'name': ['Jack', 'Lucy', 'Coke', 'Pol', 'Tude'],
'age': [18, 19, 21, 21, 19]
})
# 以年龄分组
print(pd2.groupby('age').groups)
# 向量运算
pd1 = pd.Series(
[1, 2, 3],
index = ['a', 'b', 'c']
)
pd2 = pd.Series(
[1, 2, 3],
index = ['a', 'c', 'd']
)
print(pd1+pd2)
print(pd1.add(pd2,fill_value=0))