Indices y selección de datos
Selección de datos en Series
¶
Series
como diccionario¶
In [1]:
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0],
index=['a', 'b', 'c', 'd'])
data
Out[1]:
In [2]:
data['b']
Out[2]:
In [3]:
'a' in data
Out[3]:
In [4]:
data.keys()
Out[4]:
In [5]:
list(data.items())
Out[5]:
In [6]:
data['e'] = 1.25
data
Out[6]:
Series
como un array uni-dimensional¶
In [7]:
# slicing by explicit index
data['a':'c']
Out[7]:
In [8]:
# slicing by implicit integer index
data[0:2]
Out[8]:
In [9]:
# masking
data[(data > 0.3) & (data < 0.8)]
Out[9]:
In [10]:
# fancy indexing
data[['a', 'e']]
Out[10]:
"Indexadores": loc
, iloc
y ix
¶
In [11]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data
Out[11]:
In [12]:
# explicit index when indexing
data[1]
Out[12]:
In [13]:
# implicit index when slicing
data[1:3]
Out[13]:
In [14]:
data.loc[1]
Out[14]:
In [15]:
data.loc[1:3]
Out[15]:
In [16]:
data.iloc[1]
Out[16]:
In [17]:
data.iloc[1:3]
Out[17]:
Selección de datos en un DataFrame
¶
DataFrame
como un diccionario¶
In [18]:
area = pd.Series({'California': 423967, 'Texas': 695662,
'New York': 141297, 'Florida': 170312,
'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
'New York': 19651127, 'Florida': 19552860,
'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data
Out[18]:
In [19]:
data['area']
Out[19]:
In [20]:
data.area
Out[20]:
In [21]:
data.area is data['area']
Out[21]:
In [22]:
data.pop is data['pop']
Out[22]:
In [23]:
data['density'] = data['pop'] / data['area']
data
Out[23]:
DataFrame
como un array bi-dimensional¶
In [24]:
data.values
Out[24]:
In [25]:
data.T
Out[25]:
In [26]:
data.values[0]
Out[26]:
In [27]:
data['area']
Out[27]:
In [28]:
data.iloc[:3, :2]
Out[28]:
In [29]:
data.loc[:'Illinois', :'pop']
Out[29]:
In [30]:
data.ix[:3, :'pop']
Out[30]:
In [31]:
data.loc[data.density > 100, ['pop', 'density']]
Out[31]:
In [32]:
data.iloc[0, 2] = 90
data
Out[32]:
Convenciones adicionales sobre el indexado¶
In [33]:
data['Florida':'Illinois']
Out[33]:
In [34]:
data[1:3]
Out[34]:
In [35]:
data[data.density > 100]
Out[35]: