Numpy#

Why Numpy Array?#

  • If compared to built-in data structures (e.g., list), numpy array is more efficient, faster in computation.

## Basics
import numpy as np
## memory size and data type
x = np.array([1,2,3], dtype=np.float32)
print(x)
print(x.itemsize) # four bytes for item
print(x.nbytes)
[1. 2. 3.]
4
12
print(x.ndim) ## get num of dimensions
print(x.shape) ## shape
print(x.dtype) ## data type
print(x.size) ## num of elements
1
(3,)
float32
3
%%time
## unary function
np.sin(x)
CPU times: user 18 µs, sys: 5 µs, total: 23 µs
Wall time: 26 µs
array([0.84147096, 0.9092974 , 0.14112   ], dtype=float32)
%%time
## math equivalent
## we have to use list comprehension
from math import sin
[sin(i) for i in x]
CPU times: user 57 µs, sys: 4 µs, total: 61 µs
Wall time: 65.1 µs
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
## Multidimensional array
## np supports at maximum 32-dimension array
x = np.array([range(10),range(10)])
print(x)
print(x.shape)
[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
(2, 10)

Array Properties#

  • arr.size: Return number of elements in the arr

  • arr.shape: Return dimensions of array (rows, columns)

  • arr.dtype: Return type of elements in arr

  • arr.astype(dtype): Convert arr elements to type dtype

  • arr.tolist(): Convert arr into a list

Subsetting and Slicing#

  • arr[START:END:STEP]: Slicing elements

  • arr[4,4]: Indexing specific element by (row, column)

## Subsetting
print(x[:,0]) # first column
print(x[:,1]) # second column
print(x[0,:]) # first row

## Subsetting sections of array
print(x[:, 1:]) # columsn from first to the last
print(x[:, ::2]) # all rows, every other columns
print(x[:, ::-1]) # all rows, reversed columns
print(x[:, 5:9:2]) # [, StartIndex:EndIndex:StepSize]
print(x[::-1,:]) ## all columns, reversed rows
[0 0]
[1 1]
[0 1 2 3 4 5 6 7 8 9]
[[1 2 3 4 5 6 7 8 9]
 [1 2 3 4 5 6 7 8 9]]
[[0 2 4 6 8]
 [0 2 4 6 8]]
[[9 8 7 6 5 4 3 2 1 0]
 [9 8 7 6 5 4 3 2 1 0]]
[[5 7]
 [5 7]]
[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
## Subsetting 3D Array
## Principle: From outside in!
x = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(x)
print(x[0,1,1]) # should be 4
y = np.copy(x)
y[:,1,:]=[99,99]
print(y)
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
4
[[[ 1  2]
  [99 99]]

 [[ 5  6]
  [99 99]]]
## Pass-by-reference
x = np.ones((2,3))
y = x
print(x)
print(y)
x[1,1]=2

## Note that both x and y objects are altered
print(x)
print(y)
[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 2. 1.]]
[[1. 1. 1.]
 [1. 2. 1.]]

Creating Arrays#

  • np.array([1,2,3]): 1-D array

  • np.array([1,2,3],[4,5,6]): 2-D array

  • np.zeros()

  • np.ones((3,4)): 3x4 aray with all values 1

  • np.eye(5): 5x5 array of 0 with 1 on diagonal (identity matrix)

  • np.linespace(0, 100, 6): Array of 6 evenly divided values from 0 to 100

  • np.arrange(0, 10, 3): Array of values from 0 to less than 10 with step 3

  • np.full((2,3), 8): 2x3 array with all values 8

  • np.random.ran(6,7)*100: 6x7 array of random floats between 0-100

  • np.random.randint(5, size=(2,3)): 2x3 array with random ints between 0-1

Note

In Python, the indices (esp. the closing indices) are often NOT inclusive.

## Initialize different types of Arrays

print(np.zeros((2,3)))
print(np.ones((2,3)))
print(np.full((2,3),99)) # create an array with self-defined default
x = np.array([[1,2,3],[4,5,6]])
print(x)
print(np.full_like(x,99)) # copy an array with default values

print(np.random.rand(4,3)) # random decimal numbers
print(np.random.randint(-10,10, size=(3,3))) ## random integer values
print(np.identity(5))
x1 = np.array([[1,2,3]])
x2 = np.array([1,2,3])
print(np.repeat(x1, 4, axis=0))
print(np.repeat(x2, 4, axis=0))
print(x1.shape)
print(x2.shape)
[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[99 99 99]
 [99 99 99]]
[[1 2 3]
 [4 5 6]]
[[99 99 99]
 [99 99 99]]
[[0.06702106 0.11582396 0.86849828]
 [0.66777362 0.46304232 0.72715271]
 [0.36493309 0.97211601 0.11689888]
 [0.93841395 0.75317111 0.673672  ]]
[[  6   4   6]
 [  9  -2  -1]
 [-10   1   5]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[1 2 3]
 [1 2 3]
 [1 2 3]
 [1 2 3]]
[1 1 1 1 2 2 2 2 3 3 3 3]
(1, 3)
(3,)
## 
x = np.ones((2,3))
y = x[:,[0,1,2]]
print(x)
print(y)
x[1,1] = 99
## Note that only x object is altered. y is stil the original!!!
print(x)
print(y)
[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[ 1.  1.  1.]
 [ 1. 99.  1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
## To explicity create a copy of an array
x = np.ones((2,3))
y = x.copy()
print(x)
print(y)
x[1,1]=99
print(x)
print(y)
[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[ 1.  1.  1.]
 [ 1. 99.  1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
## Numpy Broadcasting
X, Y = np.meshgrid(np.arange(2), np.arange(2))
print(X)
print(Y)
X + Y
[[0 1]
 [0 1]]
[[0 0]
 [1 1]]
array([[0, 1],
       [1, 2]])
x = np.array([0,1])
y = np.array([0,1])
print(x+y)
print(x + y[:,np.newaxis]) # the np.newaxis (None) makes copies of y along the dimension
[0 2]
[[0 1]
 [1 2]]

Adding/Removing Elements#

  • np.append(arr, values)

  • np.insert(arr, 2 values): Insert values into arr before index 2

  • np.delete(arr, 3, axis=0): Delete row (axis=0) on index 3 of arr

  • np.delete(arr, 3, axis=1): Delete column (axis=1) on index 3 of arr

  • np.repeat()

np.repeat(3, 4)
np.repeat([2,8],[2,5])
array([2, 2, 8, 8, 8, 8, 8])

Concatenating/Slitting Arrays#

  • np.concatenate((arr1, arr2), axis=0): Row-bind arrays

  • np.concatenate((arr1, arr2), axis=1): Column-bind arrays

  • np.split(arr, 3): Split arr into 3 sub-arrays based on rows

  • np.hsplit(arr, 3): Split arr into 3 euqal-sized sub-arrays based on the columns

x = np.random.randint(0,100,size=(3,4))
print(x)
print(np.split(x,3))
print(np.hsplit(x,2))
[[90 72 35 23]
 [13 26 63 79]
 [89 85 73 89]]
[array([[90, 72, 35, 23]]), array([[13, 26, 63, 79]]), array([[89, 85, 73, 89]])]
[array([[90, 72],
       [13, 26],
       [89, 85]]), array([[35, 23],
       [63, 79],
       [73, 89]])]

Masked Array#

## Masked Array
from numpy import ma
x = np.arange(10)
y = ma.masked_array(x , x<5) # copy=False
print(y)
print(y.shape)
x[6]=99
print(x)
print(y)
## The above shows that masked_array does not force an implicit copy operation
[-- -- -- -- -- 5 6 7 8 9]
(10,)
[ 0  1  2  3  4  5 99  7  8  9]
[-- -- -- -- -- 5 99 7 8 9]

Linear Algebra#

  • np.add(arr, 2)

  • np.substract(arr, 2)

  • np.multiply(arr, 2)

  • np.divide(arr, 2)

  • np.power(arr, 2)

  • np.array_equal(arr1, arr2)

  • np.sqrt()

  • np.sin()

  • np.log()

  • np.abs()

  • np.ceil(): Round up to the nearest int

  • np.floor()

  • np.round()

## Linear Algebra

## Matrix Multiplication
a = np.ones((2,3))
print(a)
b = np.full((3,2),2)
print(b)
print(np.matmul(a,b))

## Find the determinant
x = np.identity(3)
np.linalg.det(x)
[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]
[[6. 6.]
 [6. 6.]]
1.0

Statistics#

  • np.mean(arr)

  • arr.sum()

  • arr.max()

  • arr.max(axis=0): Return max values of the rows

  • arr.max(axis=1): Return max values of the columns

  • arr.var():

  • arr.std()

  • arr.correcoef(): Returns correlation coefficient of array

  • np.where(arr==2): Return the index of which elements in arr is equal to 2

  • np.argmin(arr): Return the index of the min value of arr

  • np.argmax(arr): Return the index of the max value of arr

## Statistics
x = np.random.randint(0,100, size=(2,3))
print(x)
print(np.min(x, axis=0)) # min of each column
print(np.min(x, axis=1)) # min of each row
## 2D-array, first axis is the column?
print(np.sum(x, axis=0)) # sum of columsn
[[63 89 86]
 [96 63 92]]
[63 63 86]
[63 63]
[159 152 178]
## Reorganizing Arrays
x = np.array([range(4),range(4)])
print(x)
y = x.reshape((4,2))
print(y)

# Stacking arrays
x = np.full((3,),3)
y = np.full((3,),6)
print(x)
print(y)
print(np.vstack([x,y]))
print(np.hstack([x,y]))
[[0 1 2 3]
 [0 1 2 3]]
[[0 1]
 [2 3]
 [0 1]
 [2 3]]
[3 3 3]
[6 6 6]
[[3 3 3]
 [6 6 6]]
[3 3 3 6 6 6]
  • Find which element has a specific value

## Search Elements in array
x = [1,2,3,4,0,1,2,3,4,11] 
x=np.array(x)
np.where(x == 2)
(array([1, 6]),)
  • Identify the first index of the element that is of the specific value

np.min(np.where(x==2))
1
  • Find the index of the MIN/MAX

np.argmin(x)
np.argmax(x)
9

Load from File#

filedata = np.genformat('', delimiter=',')
filedata = filedata.astype('int32')
print(filedata

Requirements#

numpy==1.18.1

References#