Numpy#
Why Numpy Array?#
If compared to built-in data structures (e.g.,
list
), numpy array is more efficient, faster in computation.
## Basics
import numpy as np
## memory size and data type
x = np.array([1,2,3], dtype=np.float32)
print(x)
print(x.itemsize) # four bytes for item
print(x.nbytes)
[1. 2. 3.]
4
12
print(x.ndim) ## get num of dimensions
print(x.shape) ## shape
print(x.dtype) ## data type
print(x.size) ## num of elements
1
(3,)
float32
3
%%time
## unary function
np.sin(x)
CPU times: user 18 µs, sys: 5 µs, total: 23 µs
Wall time: 26 µs
array([0.84147096, 0.9092974 , 0.14112 ], dtype=float32)
%%time
## math equivalent
## we have to use list comprehension
from math import sin
[sin(i) for i in x]
CPU times: user 57 µs, sys: 4 µs, total: 61 µs
Wall time: 65.1 µs
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
## Multidimensional array
## np supports at maximum 32-dimension array
x = np.array([range(10),range(10)])
print(x)
print(x.shape)
[[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]]
(2, 10)
Array Properties#
arr.size
: Return number of elements in thearr
arr.shape
: Return dimensions of array (rows, columns)arr.dtype
: Return type of elements inarr
arr.astype(dtype)
: Convertarr
elements to typedtype
arr.tolist()
: Convertarr
into a list
Subsetting and Slicing#
arr[START:END:STEP]
: Slicing elementsarr[4,4]
: Indexing specific element by (row, column)
## Subsetting
print(x[:,0]) # first column
print(x[:,1]) # second column
print(x[0,:]) # first row
## Subsetting sections of array
print(x[:, 1:]) # columsn from first to the last
print(x[:, ::2]) # all rows, every other columns
print(x[:, ::-1]) # all rows, reversed columns
print(x[:, 5:9:2]) # [, StartIndex:EndIndex:StepSize]
print(x[::-1,:]) ## all columns, reversed rows
[0 0]
[1 1]
[0 1 2 3 4 5 6 7 8 9]
[[1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9]]
[[0 2 4 6 8]
[0 2 4 6 8]]
[[9 8 7 6 5 4 3 2 1 0]
[9 8 7 6 5 4 3 2 1 0]]
[[5 7]
[5 7]]
[[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]]
## Subsetting 3D Array
## Principle: From outside in!
x = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(x)
print(x[0,1,1]) # should be 4
y = np.copy(x)
y[:,1,:]=[99,99]
print(y)
[[[1 2]
[3 4]]
[[5 6]
[7 8]]]
4
[[[ 1 2]
[99 99]]
[[ 5 6]
[99 99]]]
## Pass-by-reference
x = np.ones((2,3))
y = x
print(x)
print(y)
x[1,1]=2
## Note that both x and y objects are altered
print(x)
print(y)
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 2. 1.]]
[[1. 1. 1.]
[1. 2. 1.]]
Creating Arrays#
np.array([1,2,3])
: 1-D arraynp.array([1,2,3],[4,5,6])
: 2-D arraynp.zeros()
np.ones((3,4))
: 3x4 aray with all values 1np.eye(5)
: 5x5 array of 0 with 1 on diagonal (identity matrix)np.linespace(0, 100, 6)
: Array of 6 evenly divided values from 0 to 100np.arrange(0, 10, 3)
: Array of values from 0 to less than 10 with step 3np.full((2,3), 8)
: 2x3 array with all values 8np.random.ran(6,7)*100
: 6x7 array of random floats between 0-100np.random.randint(5, size=(2,3))
: 2x3 array with random ints between 0-1
Note
In Python, the indices (esp. the closing indices) are often NOT inclusive.
## Initialize different types of Arrays
print(np.zeros((2,3)))
print(np.ones((2,3)))
print(np.full((2,3),99)) # create an array with self-defined default
x = np.array([[1,2,3],[4,5,6]])
print(x)
print(np.full_like(x,99)) # copy an array with default values
print(np.random.rand(4,3)) # random decimal numbers
print(np.random.randint(-10,10, size=(3,3))) ## random integer values
print(np.identity(5))
x1 = np.array([[1,2,3]])
x2 = np.array([1,2,3])
print(np.repeat(x1, 4, axis=0))
print(np.repeat(x2, 4, axis=0))
print(x1.shape)
print(x2.shape)
[[0. 0. 0.]
[0. 0. 0.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[99 99 99]
[99 99 99]]
[[1 2 3]
[4 5 6]]
[[99 99 99]
[99 99 99]]
[[0.06702106 0.11582396 0.86849828]
[0.66777362 0.46304232 0.72715271]
[0.36493309 0.97211601 0.11689888]
[0.93841395 0.75317111 0.673672 ]]
[[ 6 4 6]
[ 9 -2 -1]
[-10 1 5]]
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
[[1 2 3]
[1 2 3]
[1 2 3]
[1 2 3]]
[1 1 1 1 2 2 2 2 3 3 3 3]
(1, 3)
(3,)
##
x = np.ones((2,3))
y = x[:,[0,1,2]]
print(x)
print(y)
x[1,1] = 99
## Note that only x object is altered. y is stil the original!!!
print(x)
print(y)
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[ 1. 1. 1.]
[ 1. 99. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
## To explicity create a copy of an array
x = np.ones((2,3))
y = x.copy()
print(x)
print(y)
x[1,1]=99
print(x)
print(y)
[[1. 1. 1.]
[1. 1. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
[[ 1. 1. 1.]
[ 1. 99. 1.]]
[[1. 1. 1.]
[1. 1. 1.]]
## Numpy Broadcasting
X, Y = np.meshgrid(np.arange(2), np.arange(2))
print(X)
print(Y)
X + Y
[[0 1]
[0 1]]
[[0 0]
[1 1]]
array([[0, 1],
[1, 2]])
x = np.array([0,1])
y = np.array([0,1])
print(x+y)
print(x + y[:,np.newaxis]) # the np.newaxis (None) makes copies of y along the dimension
[0 2]
[[0 1]
[1 2]]
Adding/Removing Elements#
np.append(arr, values)
np.insert(arr, 2 values)
: Insertvalues
intoarr
before index 2np.delete(arr, 3, axis=0)
: Delete row (axis=0
) on index 3 ofarr
np.delete(arr, 3, axis=1)
: Delete column (axis=1
) on index 3 ofarr
np.repeat()
np.repeat(3, 4)
np.repeat([2,8],[2,5])
array([2, 2, 8, 8, 8, 8, 8])
Concatenating/Slitting Arrays#
np.concatenate((arr1, arr2), axis=0)
: Row-bind arraysnp.concatenate((arr1, arr2), axis=1)
: Column-bind arraysnp.split(arr, 3)
: Splitarr
into 3 sub-arrays based on rowsnp.hsplit(arr, 3)
: Splitarr
into 3 euqal-sized sub-arrays based on the columns
x = np.random.randint(0,100,size=(3,4))
print(x)
print(np.split(x,3))
print(np.hsplit(x,2))
[[90 72 35 23]
[13 26 63 79]
[89 85 73 89]]
[array([[90, 72, 35, 23]]), array([[13, 26, 63, 79]]), array([[89, 85, 73, 89]])]
[array([[90, 72],
[13, 26],
[89, 85]]), array([[35, 23],
[63, 79],
[73, 89]])]
Masked Array#
## Masked Array
from numpy import ma
x = np.arange(10)
y = ma.masked_array(x , x<5) # copy=False
print(y)
print(y.shape)
x[6]=99
print(x)
print(y)
## The above shows that masked_array does not force an implicit copy operation
[-- -- -- -- -- 5 6 7 8 9]
(10,)
[ 0 1 2 3 4 5 99 7 8 9]
[-- -- -- -- -- 5 99 7 8 9]
Linear Algebra#
np.add(arr, 2)
np.substract(arr, 2)
np.multiply(arr, 2)
np.divide(arr, 2)
np.power(arr, 2)
np.array_equal(arr1, arr2)
np.sqrt()
np.sin()
np.log()
np.abs()
np.ceil()
: Round up to the nearest intnp.floor()
np.round()
## Linear Algebra
## Matrix Multiplication
a = np.ones((2,3))
print(a)
b = np.full((3,2),2)
print(b)
print(np.matmul(a,b))
## Find the determinant
x = np.identity(3)
np.linalg.det(x)
[[1. 1. 1.]
[1. 1. 1.]]
[[2 2]
[2 2]
[2 2]]
[[6. 6.]
[6. 6.]]
1.0
Statistics#
np.mean(arr)
arr.sum()
arr.max()
arr.max(axis=0)
: Return max values of the rowsarr.max(axis=1)
: Return max values of the columnsarr.var()
:arr.std()
arr.correcoef()
: Returns correlation coefficient of arraynp.where(arr==2)
: Return the index of which elements inarr
is equal to 2np.argmin(arr)
: Return the index of the min value ofarr
np.argmax(arr)
: Return the index of the max value ofarr
## Statistics
x = np.random.randint(0,100, size=(2,3))
print(x)
print(np.min(x, axis=0)) # min of each column
print(np.min(x, axis=1)) # min of each row
## 2D-array, first axis is the column?
print(np.sum(x, axis=0)) # sum of columsn
[[63 89 86]
[96 63 92]]
[63 63 86]
[63 63]
[159 152 178]
## Reorganizing Arrays
x = np.array([range(4),range(4)])
print(x)
y = x.reshape((4,2))
print(y)
# Stacking arrays
x = np.full((3,),3)
y = np.full((3,),6)
print(x)
print(y)
print(np.vstack([x,y]))
print(np.hstack([x,y]))
[[0 1 2 3]
[0 1 2 3]]
[[0 1]
[2 3]
[0 1]
[2 3]]
[3 3 3]
[6 6 6]
[[3 3 3]
[6 6 6]]
[3 3 3 6 6 6]
Find which element has a specific value
## Search Elements in array
x = [1,2,3,4,0,1,2,3,4,11]
x=np.array(x)
np.where(x == 2)
(array([1, 6]),)
Identify the first index of the element that is of the specific value
np.min(np.where(x==2))
1
Find the index of the MIN/MAX
np.argmin(x)
np.argmax(x)
9
Load from File#
filedata = np.genformat('', delimiter=',')
filedata = filedata.astype('int32')
print(filedata
Requirements#
numpy==1.18.1