In [46]:
import numpy as np import pandas as pd np.add()/df.add() 加法运算 + np.aubtract()/df.aubtract() 减法运算 - np.megative() 负数运算 - np.multiply() 乘法运算 * np.divide() 除法运算 / np.floor_dicide() 向下整除 // np.power() 指数运算 ** np.mod() 求余数 % np.abs() 求绝对值 np.sin() ,cos(), tan() 求正弦 余弦 正切 np.exp(x) e为底的x次方 np.exp2(x),2为底的x次方 np.log(x) e为底对x开根号 np.log2(x) 2为底 np.sum() min() max() 求和 求最小值 求最大值 axis=0 列 axis=1 行 np.prod() 计算元素的积 np.mean() 计算元素的平均值 np.std() 计算元素的标准差 np.var() 计算元素的方差 np.argmin()找出最小值的索引 np.median()计算元素的中位数 np.any() 验证是否存在元素为真 np.all() 验证所有元素是否为真
Out[46]:
In [2]:
data=pd.Series([0.25,0.5,0.75,1.0])#Series是一个带索引的一维数组 data
Out[2]:
In [3]:
data.index#可获取索引与值
Out[3]:
In [4]:
data.values
Out[4]:
In [5]:
data[0]#可切片
Out[5]:
In [6]:
data.index=["a","b","c","d"]#可定义索引 data
Out[6]:
In [7]:
population_dic={"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995} population=pd.Series(population_dic) population
Out[7]:
In [8]:
area_dict={"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995} aeea=pd.Series(area_dict) aeea
Out[8]:
In [9]:
states=pd.DataFrame({"population":population,"aeea":aeea})#DataFrom是特殊的二维字典,一个标题行对应一列数据,创建时用字典格式创建 states
Out[9]:
In [10]:
states.index
Out[10]:
In [11]:
states.columns
Out[11]:
In [12]:
states["aeea"]
Out[12]:
In [13]:
pd.DataFrame(population,columns=["population"])#创建单列
Out[13]:
In [14]:
data=[{"a":i,"b":i*2}for i in range(5)] pd.DataFrame(data)
Out[14]:
In [15]:
pd.DataFrame(np.random.rand(3,2),index=(["a","b","c"]),columns=["foo","bar"])
Out[15]:
In [16]:
data=pd.Series([0.25,0.5,0.75,1.0],index=["a","b","c","d"])#Series是一个带索引的一维数组 data
Out[16]:
In [17]:
data["e"]=1.25#添加数据 data
Out[17]:
In [18]:
#切片应用
data["a":"c"]
Out[18]:
In [19]:
data[0:2]
Out[19]:
In [20]:
data[(data>0.3)&(data<0.8)]
Out[20]:
In [21]:
data=pd.Series(["a","b","c"],index=[1,3,5]) data
Out[21]:
In [22]:
data.loc[1]#看得见的索引
Out[22]:
In [23]:
data.loc[1:3]
Out[23]:
In [24]:
data.iloc[1]#看不见的索引
Out[24]:
In [25]:
data.iloc[1:3]
Out[25]:
In [26]:
#DataFrame的数据选择方法 area=pd.Series({"california":423967,"texas":695663,"new york":141297,"florida":170312,"tllinois":149995}) pop=pd.Series({"california":33323967,"texas":33395663,"new york":11111297,"florida":22220312,"tllinois":22229995}) data=pd.DataFrame({"area":area,"pop":pop}) data
Out[26]:
In [27]:
data["area"]
Out[27]:
In [28]:
data.area
Out[28]:
In [29]:
data["density"]=data["pop"]/data["area"]#增加一列data
Out[29]:
In [30]:
data.values#查看数据
Out[30]:
In [31]:
data.T#转置
Out[31]:
In [32]:
data.loc[data.density>100,["pop","density"]]
Out[32]:
In [33]:
data.iloc[0,2]=90#修改数据 data
Out[33]:
In [34]:
data[data.density>100]#掩码过滤
Out[34]:
In [35]:
#pandas 的数值运算方法 import numpy as np import pandas as pd
In [36]:
rng=np.random.RandomState(42) ser=pd.Series(rng.randint(0,10,4)) ser
Out[36]:
In [38]:
df=pd.DataFrame(rng.randint(0,10,(3,4)),columns=["A","B","C","D"]) df
Out[38]:
In [39]:
np.exp(ser)
Out[39]:
In [40]:
np.sin(df*np.pi/4)
Out[40]:
In [45]:
ser//2
Out[45]: