Numpy

Numpy#

Why Numpy Array?#

If compared to built-in data structures (e.g., list), numpy array is more efficient, faster in computation.

## Basics
import numpy as np
## memory size and data type
x = np.array([1,2,3], dtype=np.float32)
print(x)
print(x.itemsize) # four bytes for item
print(x.nbytes)

[1. 2. 3.]
4
12

print(x.ndim) ## get num of dimensions
print(x.shape) ## shape
print(x.dtype) ## data type
print(x.size) ## num of elements

1
(3,)
float32
3

%%time
## unary function
np.sin(x)

CPU times: user 18 µs, sys: 5 µs, total: 23 µs
Wall time: 26 µs

array([0.84147096, 0.9092974 , 0.14112   ], dtype=float32)

%%time
## math equivalent
## we have to use list comprehension
from math import sin
[sin(i) for i in x]

CPU times: user 57 µs, sys: 4 µs, total: 61 µs
Wall time: 65.1 µs

[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]

## Multidimensional array
## np supports at maximum 32-dimension array
x = np.array([range(10),range(10)])
print(x)
print(x.shape)

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
(2, 10)

Array Properties#

arr.size: Return number of elements in the arr
arr.shape: Return dimensions of array (rows, columns)
arr.dtype: Return type of elements in arr
arr.astype(dtype): Convert arr elements to type dtype
arr.tolist(): Convert arr into a list

Subsetting and Slicing#

arr[START:END:STEP]: Slicing elements
arr[4,4]: Indexing specific element by (row, column)

## Subsetting
print(x[:,0]) # first column
print(x[:,1]) # second column
print(x[0,:]) # first row

## Subsetting sections of array
print(x[:, 1:]) # columsn from first to the last
print(x[:, ::2]) # all rows, every other columns
print(x[:, ::-1]) # all rows, reversed columns
print(x[:, 5:9:2]) # [, StartIndex:EndIndex:StepSize]
print(x[::-1,:]) ## all columns, reversed rows

[0 0]
[1 1]
[0 1 2 3 4 5 6 7 8 9]
[[1 2 3 4 5 6 7 8 9]
 [1 2 3 4 5 6 7 8 9]]
[[0 2 4 6 8]
 [0 2 4 6 8]]
[[9 8 7 6 5 4 3 2 1 0]
 [9 8 7 6 5 4 3 2 1 0]]
[[5 7]
 [5 7]]
[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]

## Subsetting 3D Array
## Principle: From outside in!
x = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(x)
print(x[0,1,1]) # should be 4
y = np.copy(x)
y[:,1,:]=[99,99]
print(y)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
4
[[[ 1  2]
  [99 99]]

 [[ 5  6]
  [99 99]]]

## Pass-by-reference
x = np.ones((2,3))
y = x
print(x)
print(y)
x[1,1]=2

## Note that both x and y objects are altered
print(x)
print(y)

[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 2. 1.]]
[[1. 1. 1.]
 [1. 2. 1.]]

Creating Arrays#

np.array([1,2,3]): 1-D array
np.array([1,2,3],[4,5,6]): 2-D array
np.zeros()
np.ones((3,4)): 3x4 aray with all values 1
np.eye(5): 5x5 array of 0 with 1 on diagonal (identity matrix)
np.linespace(0, 100, 6): Array of 6 evenly divided values from 0 to 100
np.arrange(0, 10, 3): Array of values from 0 to less than 10 with step 3
np.full((2,3), 8): 2x3 array with all values 8
np.random.ran(6,7)*100: 6x7 array of random floats between 0-100
np.random.randint(5, size=(2,3)): 2x3 array with random ints between 0-1

Note

In Python, the indices (esp. the closing indices) are often NOT inclusive.

## Initialize different types of Arrays

print(np.zeros((2,3)))
print(np.ones((2,3)))
print(np.full((2,3),99)) # create an array with self-defined default
x = np.array([[1,2,3],[4,5,6]])
print(x)
print(np.full_like(x,99)) # copy an array with default values

print(np.random.rand(4,3)) # random decimal numbers
print(np.random.randint(-10,10, size=(3,3))) ## random integer values
print(np.identity(5))
x1 = np.array([[1,2,3]])
x2 = np.array([1,2,3])
print(np.repeat(x1, 4, axis=0))
print(np.repeat(x2, 4, axis=0))
print(x1.shape)
print(x2.shape)

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[99 99 99]
 [99 99 99]]
[[1 2 3]
 [4 5 6]]
[[99 99 99]
 [99 99 99]]
[[0.06702106 0.11582396 0.86849828]
 [0.66777362 0.46304232 0.72715271]
 [0.36493309 0.97211601 0.11689888]
 [0.93841395 0.75317111 0.673672  ]]
[[  6   4   6]
 [  9  -2  -1]
 [-10   1   5]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[1 2 3]
 [1 2 3]
 [1 2 3]
 [1 2 3]]
[1 1 1 1 2 2 2 2 3 3 3 3]
(1, 3)
(3,)

## 
x = np.ones((2,3))
y = x[:,[0,1,2]]
print(x)
print(y)
x[1,1] = 99
## Note that only x object is altered. y is stil the original!!!
print(x)
print(y)

[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[ 1.  1.  1.]
 [ 1. 99.  1.]]
[[1. 1. 1.]
 [1. 1. 1.]]

## To explicity create a copy of an array
x = np.ones((2,3))
y = x.copy()
print(x)
print(y)
x[1,1]=99
print(x)
print(y)

[[1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[ 1.  1.  1.]
 [ 1. 99.  1.]]
[[1. 1. 1.]
 [1. 1. 1.]]

## Numpy Broadcasting
X, Y = np.meshgrid(np.arange(2), np.arange(2))
print(X)
print(Y)
X + Y

[[0 1]
 [0 1]]
[[0 0]
 [1 1]]

array([[0, 1],
       [1, 2]])

x = np.array([0,1])
y = np.array([0,1])
print(x+y)
print(x + y[:,np.newaxis]) # the np.newaxis (None) makes copies of y along the dimension

[0 2]
[[0 1]
 [1 2]]

Adding/Removing Elements#

np.append(arr, values)
np.insert(arr, 2 values): Insert values into arr before index 2
np.delete(arr, 3, axis=0): Delete row (axis=0) on index 3 of arr
np.delete(arr, 3, axis=1): Delete column (axis=1) on index 3 of arr
np.repeat()

np.repeat(3, 4)
np.repeat([2,8],[2,5])

array([2, 2, 8, 8, 8, 8, 8])

Concatenating/Slitting Arrays#

np.concatenate((arr1, arr2), axis=0): Row-bind arrays
np.concatenate((arr1, arr2), axis=1): Column-bind arrays
np.split(arr, 3): Split arr into 3 sub-arrays based on rows
np.hsplit(arr, 3): Split arr into 3 euqal-sized sub-arrays based on the columns

x = np.random.randint(0,100,size=(3,4))
print(x)
print(np.split(x,3))
print(np.hsplit(x,2))

[[90 72 35 23]
 [13 26 63 79]
 [89 85 73 89]]
[array([[90, 72, 35, 23]]), array([[13, 26, 63, 79]]), array([[89, 85, 73, 89]])]
[array([[90, 72],
       [13, 26],
       [89, 85]]), array([[35, 23],
       [63, 79],
       [73, 89]])]

Masked Array#

## Masked Array
from numpy import ma
x = np.arange(10)
y = ma.masked_array(x , x<5) # copy=False
print(y)
print(y.shape)
x[6]=99
print(x)
print(y)
## The above shows that masked_array does not force an implicit copy operation

[-- -- -- -- -- 5 6 7 8 9]
(10,)
[ 0  1  2  3  4  5 99  7  8  9]
[-- -- -- -- -- 5 99 7 8 9]

Linear Algebra#

np.add(arr, 2)
np.substract(arr, 2)
np.multiply(arr, 2)
np.divide(arr, 2)
np.power(arr, 2)
np.array_equal(arr1, arr2)
np.sqrt()
np.sin()
np.log()
np.abs()
np.ceil(): Round up to the nearest int
np.floor()
np.round()

## Linear Algebra

## Matrix Multiplication
a = np.ones((2,3))
print(a)
b = np.full((3,2),2)
print(b)
print(np.matmul(a,b))

## Find the determinant
x = np.identity(3)
np.linalg.det(x)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]
[[6. 6.]
 [6. 6.]]

1.0

Statistics#

np.mean(arr)
arr.sum()
arr.max()
arr.max(axis=0): Return max values of the rows
arr.max(axis=1): Return max values of the columns
arr.var():
arr.std()
arr.correcoef(): Returns correlation coefficient of array
np.where(arr==2): Return the index of which elements in arr is equal to 2
np.argmin(arr): Return the index of the min value of arr
np.argmax(arr): Return the index of the max value of arr

## Statistics
x = np.random.randint(0,100, size=(2,3))
print(x)
print(np.min(x, axis=0)) # min of each column
print(np.min(x, axis=1)) # min of each row
## 2D-array, first axis is the column?
print(np.sum(x, axis=0)) # sum of columsn

[[63 89 86]
 [96 63 92]]
[63 63 86]
[63 63]
[159 152 178]

## Reorganizing Arrays
x = np.array([range(4),range(4)])
print(x)
y = x.reshape((4,2))
print(y)

# Stacking arrays
x = np.full((3,),3)
y = np.full((3,),6)
print(x)
print(y)
print(np.vstack([x,y]))
print(np.hstack([x,y]))

[[0 1 2 3]
 [0 1 2 3]]
[[0 1]
 [2 3]
 [0 1]
 [2 3]]
[3 3 3]
[6 6 6]
[[3 3 3]
 [6 6 6]]
[3 3 3 6 6 6]

Find which element has a specific value

## Search Elements in array
x = [1,2,3,4,0,1,2,3,4,11] 
x=np.array(x)
np.where(x == 2)

(array([1, 6]),)

Identify the first index of the element that is of the specific value

np.min(np.where(x==2))

Find the index of the MIN/MAX

np.argmin(x)
np.argmax(x)

Load from File#

filedata = np.genformat('', delimiter=',')
filedata = filedata.astype('int32')
print(filedata

Requirements#

numpy==1.18.1

Numpy

Contents

Numpy#

Why Numpy Array?#

Array Properties#

Subsetting and Slicing#

Creating Arrays#

Adding/Removing Elements#

Concatenating/Slitting Arrays#

Masked Array#

Linear Algebra#

Statistics#

Load from File#

Requirements#

References#