map-apply-applymap
In [1]:
import warnings import math import pandas as pd import numpy as np import matplotlib warnings.filterwarnings('ignore') pd.options.display.max_rows = 100 pd.options.display.max_columns = 100 pd.set_option('max_colwidth', 500) get_ipython().magic(u'matplotlib inline') matplotlib.style.use('ggplot') from matplotlib import pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False myfont = matplotlib.font_manager.FontProperties(fname=u'simsun.ttc', size=14)In [11]:
data = pd.DataFrame({ 'age' : np.random.randint(15, 100, 100), 'height':np.random.randint(140, 180, 100), 'weight':np.random.randint(40, 80, 100), 'gender':np.random.randint(0,2, 100), 'salary':np.random.randint(3000, 30000, 100) }) data.head()Out[11]:
age | height | weight | gender | salary | |
---|---|---|---|---|---|
0 | 68 | 140 | 58 | 0 | 15988 |
1 | 22 | 160 | 59 | 0 | 5807 |
2 | 90 | 160 | 60 | 0 | 21741 |
3 | 71 | 178 | 45 | 0 | 14133 |
4 | 61 | 165 | 59 | 1 | 14346 |
# series map:值转换 data['gender'] = data.gender.map({0:'man', 1:'women'}) data.head()Out[12]:
age | height | weight | gender | salary | |
---|---|---|---|---|---|
0 | 68 | 140 | 58 | man | 15988 |
1 | 22 | 160 | 59 | man | 5807 |
2 | 90 | 160 | 60 | man | 21741 |
3 | 71 | 178 | 45 | man | 14133 |
4 | 61 | 165 | 59 | women | 14346 |
# series map: 也可以使用字符串format data.gender.map('i am a {} '.format).head()Out[14]:
0 i am a man 1 i am a man 2 i am a man 3 i am a man 4 i am a women Name: gender, dtype: objectIn [19]:
# series apply: 对序列的每个值做相应操作 data.salary = data.salary.apply(lambda x: x* 1.3) data.head()Out[19]:
age | height | weight | gender | salary | |
---|---|---|---|---|---|
0 | 68 | 140 | 58 | man | 35125.636 |
1 | 22 | 160 | 59 | man | 12757.979 |
2 | 90 | 160 | 60 | man | 47764.977 |
3 | 71 | 178 | 45 | man | 31050.201 |
4 | 61 | 165 | 59 | women | 31518.162 |
# series apply: 对序列的每个值做相应操作,可传递复杂参数和函数 def fun1(x, bias): return x * bias data.salary = data.salary.apply(fun1, args=(1.3,)) data.head()Out[33]:
age | height | weight | gender | salary | |
---|---|---|---|---|---|
0 | 68 | 140 | 58 | man | 45663.3268 |
1 | 22 | 160 | 59 | man | 16585.3727 |
2 | 90 | 160 | 60 | man | 62094.4701 |
3 | 71 | 178 | 45 | man | 40365.2613 |
4 | 61 | 165 | 59 | women | 40973.6106 |
# series apply: 对序列的每个值做相应操作,可传递复杂参数和函数 def fun2(x, **kwargs): for month in kwargs: x += kwargs[month] * 100 return x data.salary = data.salary.apply(fun2, june=30, july=20, august=25) data.head()Out[36]:
age | height | weight | gender | salary | |
---|---|---|---|---|---|
0 | 68 | 140 | 58 | man | 53238.3268 |
1 | 22 | 160 | 59 | man | 24160.3727 |
2 | 90 | 160 | 60 | man | 69669.4701 |
3 | 71 | 178 | 45 | man | 47940.2613 |
4 | 61 | 165 | 59 | women | 48548.6106 |
# dataframe apply:根据函数对行或者列应用对应操作 data[['age','height', 'weight']].apply(np.sqrt).head()Out[28]:
age | height | weight | |
---|---|---|---|
0 | 8.246211 | 11.832160 | 7.615773 |
1 | 4.690416 | 12.649111 | 7.681146 |
2 | 9.486833 | 12.649111 | 7.745967 |
3 | 8.426150 | 13.341664 | 6.708204 |
4 | 7.810250 | 12.845233 | 7.681146 |
# dataframe apply:根据函数对行或者列应用对应操作 data[['age','height', 'weight']].apply(np.sum).head()Out[37]:
age 5818 height 16225 weight 6081 dtype: int64In [29]:
# dataframe apply:根据函数对行应用对应操作,做比较复杂计算 def BMI(series): height = series['height']/100.0 weight = series['weight'] return weight / height ** 2 data.apply(BMI, axis = 1).head()Out[29]:
0 29.591837 1 23.046875 2 23.437500 3 14.202752 4 21.671258 dtype: float64In [43]:
data[['age','height', 'weight']].head()Out[43]:
age | height | weight | |
---|---|---|---|
0 | 68 | 140 | 58 |
1 | 22 | 160 | 59 |
2 | 90 | 160 | 60 |
3 | 71 | 178 | 45 |
4 | 61 | 165 | 59 |
# dataframe applymap:对每个元素操作 data[['age','height', 'weight']].applymap(lambda x:x-2).head()Out[44]:
age | height | weight | |
---|---|---|---|
0 | 66 | 138 | 56 |
1 | 20 | 158 | 57 |
2 | 88 | 158 | 58 |
3 | 69 | 176 | 43 |
4 | 59 | 163 | 57 |

更多精彩