Descriptive Statistics#
DEMO_DATA_ROOT = "../../../RepositoryData/data"
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
Univariate Statistics#
UHM = pd.read_table(DEMO_DATA_ROOT+"/gries_sflwr/_inputfiles/03-1_uh(m).csv")
UHM
CASE | SEX | FILLER | GENRE | LENGTH | |
---|---|---|---|---|---|
0 | 1 | male | uhm | monolog | 1014 |
1 | 2 | female | silence | monolog | 1188 |
2 | 3 | female | silence | dialog | 889 |
3 | 4 | female | uhm | dialog | 265 |
4 | 5 | male | uh | dialog | 465 |
... | ... | ... | ... | ... | ... |
995 | 996 | male | silence | dialog | 751 |
996 | 997 | female | uhm | dialog | 1005 |
997 | 998 | female | uhm | monolog | 568 |
998 | 999 | female | uh | dialog | 984 |
999 | 1000 | female | uh | dialog | 1521 |
1000 rows × 5 columns
UHM.value_counts(UHM['FILLER'])
FILLER
uh 394
silence 332
uhm 274
dtype: int64
UHM.value_counts(UHM['FILLER'], normalize=True)
FILLER
uh 0.394
silence 0.332
uhm 0.274
dtype: float64
def ecdf(data):
x = np.sort(data)
y = np.arange(1, len(x)+1)/len(x) # percentiles
return(x,y)
ecdf(UHM.value_counts(UHM['FILLER']))
(array([274, 332, 394]), array([0.33333333, 0.66666667, 1. ]))