pandas学习笔记
1.安装
brew install python3
wget https://bootstrap.pypa.io/get-pip.py | xargs python3
pip3 install pandas
2.学习代码-Series
- code 1
from pandas import Series, DataFrame
import pandas as pd
s = Series([100, 'python', 'soochow', 'qiwsir'])
print(s)
0 100
1 python
2 soochow
3 qiwsir
dtype: object
print(s.values)
[100 'python' 'soochow' 'qiwsir']
print(s.index)
RangeIndex(start=0, stop=4, step=1)
s2 = Series([100, 'python', 'soochow', 'qiwsir'], index=['mark', 'title', 'university', 'name'])
print(s2)
mark 100
title python
university soochow
name qiwsir
dtype: object
print(s2.index)
Index(['mark', 'title', 'university', 'name'], dtype='object')
print(s2['name'])
qiwsir
s2['name'] = 'aoi'
print(s2)
mark 100
title python
university soochow
name aoi
dtype: object
sd = {
'python': 8000,
'c': 8100,
'php': 4000
}
s4 = Series(sd)
print(s4)
dtype: object
c 8100
php 4000
python 8000
dtype: int64
s6 = Series(sd, index=['java', 'python', 'c', 'php'])
print(s6)
java NaN
python 8000.0
c 8100.0
php 4000.0
dtype: float64
alist = ['java', 'perl']
s5 = Series(sd, index=alist)
print(s5)
java NaN
perl NaN
dtype: float64
print(pd.isnull(s6))
java True
python False
c False
php False
dtype: bool
print(pd.notnull(s6))
java False
python True
c True
php True
dtype: bool
print(s6.isnull())
java True
python False
c False
php False
dtype: bool
s6.index = ['p1', 'p2', 'p3', 'p4']
print(s6)
p1 NaN
p2 8000.0
p3 8100.0
p4 4000.0
dtype: float64
s3 = Series([3, 9, 4, 7], index = ['a', 'b', 'c', 'd'])
print(s3)
a 3
b 9
c 4
d 7
dtype: int64
print(s3[s3 > 5])
b 9
d 7
dtype: int64
print(s3 * 5)
a 15
b 45
c 20
d 35
dtype: int64
print (s5 + s6)
java NaN
p1 NaN
p2 NaN
p3 NaN
p4 NaN
perl NaN
dtype: float64
3.学习代码-DataFrame
import pandas as pd
from pandas import Series, DataFrame
data = {
'name': ['yahoo', 'google', 'facebook'],
'marks': [200, 400, 800],
'price': [9, 3, 7]
}
f1 = DataFrame(data)
print (f1)
marks name price
0 200 yahoo 9
1 400 google 3
2 800 facebook 7
f2 = DataFrame(data, columns=['name', 'price', 'marks'])
print (f2)
name price marks
0 yahoo 9 200
1 google 3 400
2 facebook 7 800
f3 = DataFrame(data, columns=['name', 'price', 'marks', 'debt'], index=['a', 'b', 'c'])
print (f3)
name price marks debt
a yahoo 9 200 NaN
b google 3 400 NaN
c facebook 7 800 NaN
new_data = {
'lang': {
'firstline': 'python',
'secondline': 'java'
},
'price': {
'firstline': 8000
}
}
f4 = DataFrame(new_data)
print (f4)
lang price
firstline python 8000.0
secondline java NaN
f4 = DataFrame(new_data, index=['firstline', 'secondline', 'thirdline'])
print (f4)
lang price
firstline python 8000.0
secondline java NaN
thirdline NaN NaN
print (f3.columns)
Index(['name', 'price', 'marks', 'debt'], dtype='object')
print (f3['name'])
a yahoo
b google
c facebook
Name: name, dtype: object
f3['debt'] = 80.1
print (f3)
name price marks debt
a yahoo 9 200 80.1
b google 3 400 80.1
c facebook 7 800 80.1
sdebt = Series([2.2, 3.3], index=['a', 'c'])
f3['debt'] = sdebt
print (f3)
name price marks debt
a yahoo 9 200 2.2
b google 3 400 NaN
c facebook 7 800 3.3
f3['price']['c'] = 300
print (f3)
name price marks debt
a yahoo 9 200 2.2
b google 3 400 NaN
c facebook 300 800 3.3
4.pandas使用
- data
name,physics,python,math,english
Google,100,100,25,12
Facebook,45,54,44,88
Twitter,54,76,13,91
Yahoo,54,452,26,100
import pandas as pd
# 1
marks = pd.read_csv('marks.csv')
print (marks)
# 2
marks2 = pd.read_table('marks.csv', sep=',')
print (marks2)
name physics python math english
0 Google 100 100 25 12
1 Facebook 45 54 44 88
2 Twitter 54 76 13 91
3 Yahoo 54 452 26 100
print (marks.index)
RangeIndex(start=0, stop=4, step=1)
print (marks.columns)
Index(['name', 'physics', 'python', 'math', 'english'], dtype='object')
print (marks['name'][1])
print (marks[:1])
name physics python math english
0 Google 100 100 25 12
print (marks[1:2])
name physics python math english
1 Facebook 45 54 44 88
print (marks['physics'])
name physics python math english
1 Facebook 45 54 44 88
0 100
1 45
2 54
3 54
Name: physics, dtype: int64