This is an excerpt from the Python Data Science Handbook by Jake VanderPlas; Jupyter notebooks are available on GitHub.

The text is released under the CC-BY-NC-ND license, and code is released under the MIT license. If you find this content useful, please consider supporting the work by buying the book!

Ordenando elementos de un arreglo

In [1]:
import numpy as np

def selection_sort(x):
    for i in range(len(x)):
        swap = i + np.argmin(x[i:])
        (x[i], x[swap]) = (x[swap], x[i])
    return x
In [2]:
x = np.array([2, 1, 4, 3, 5])
selection_sort(x)
Out[2]:
array([1, 2, 3, 4, 5])
In [3]:
def bogosort(x):
    while np.any(x[:-1] > x[1:]):
        np.random.shuffle(x)
    return x
In [4]:
x = np.array([2, 1, 4, 3, 5])
bogosort(x)
Out[4]:
array([1, 2, 3, 4, 5])

Ordenando con NumPy: np.sort y np.argsort

In [5]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)
Out[5]:
array([1, 2, 3, 4, 5])
In [6]:
x.sort()
print(x)
[1 2 3 4 5]
In [7]:
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)
[1 0 3 2 4]
In [8]:
x[i]
Out[8]:
array([1, 2, 3, 4, 5])

Ordenando sobre renglones o columnas

In [9]:
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4, 6))
print(X)
[[6 3 7 4 6 9]
 [2 6 7 4 3 7]
 [7 2 5 4 1 7]
 [5 1 4 0 9 5]]
In [10]:
# sort each column of X
np.sort(X, axis=0)
Out[10]:
array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])
In [11]:
# sort each row of X
np.sort(X, axis=1)
Out[11]:
array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

Orden parcial: particiones

In [12]:
x = np.array([7, 2, 3, 1, 6, 5, 4])
np.partition(x, 3)
Out[12]:
array([2, 1, 3, 4, 6, 5, 7])
In [13]:
np.partition(X, 2, axis=1)
Out[13]:
array([[3, 4, 6, 7, 6, 9],
       [2, 3, 4, 7, 6, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 9, 5]])

Ejemplo: K-Vecinos (K-Nearest Neighbors)

In [14]:
X = rand.rand(10, 2)
In [15]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set() # Plot styling
plt.scatter(X[:, 0], X[:, 1], s=100);
In [16]:
dist_sq = np.sum((X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2, axis=-1)
In [17]:
# for each pair of points, compute differences in their coordinates
differences = X[:, np.newaxis, :] - X[np.newaxis, :, :]
differences.shape
Out[17]:
(10, 10, 2)
In [18]:
# square the coordinate differences
sq_differences = differences ** 2
sq_differences.shape
Out[18]:
(10, 10, 2)
In [19]:
# sum the coordinate differences to get the squared distance
dist_sq = sq_differences.sum(-1)
dist_sq.shape
Out[19]:
(10, 10)
In [20]:
dist_sq.diagonal()
Out[20]:
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
In [21]:
nearest = np.argsort(dist_sq, axis=1)
print(nearest)
[[0 3 9 7 1 4 2 5 6 8]
 [1 4 7 9 3 6 8 5 0 2]
 [2 1 4 6 3 0 8 9 7 5]
 [3 9 7 0 1 4 5 8 6 2]
 [4 1 8 5 6 7 9 3 0 2]
 [5 8 6 4 1 7 9 3 2 0]
 [6 8 5 4 1 7 9 3 2 0]
 [7 9 3 1 4 0 5 8 6 2]
 [8 5 6 4 1 7 9 3 2 0]
 [9 7 3 0 1 4 5 8 6 2]]
In [22]:
K = 2
nearest_partition = np.argpartition(dist_sq, K + 1, axis=1)
In [23]:
plt.scatter(X[:, 0], X[:, 1], s=100)

# draw lines from each point to its two nearest neighbors
K = 2

for i in range(X.shape[0]):
    for j in nearest_partition[i, :K+1]:
        # plot a line from X[i] to X[j]
        # use some zip magic to make it happen:
        plt.plot(*zip(X[j], X[i]), color='black')