Praat TextGrid Data#
Module: praatio
from praatio import tgio
tg = tgio.openTextgrid('../../../../../Dropbox/Projects/MOST-Prosody/data/2014_di702_TextGrid_Alvin/di_001.TextGrid')
tg.tierDict
{'DU': <praatio.tgio.IntervalTier at 0x7fa96e7475f8>,
'PU': <praatio.tgio.IntervalTier at 0x7fa96e7a0518>,
'Hanzi': <praatio.tgio.IntervalTier at 0x7fa96e7a0ac8>,
'Word': <praatio.tgio.IntervalTier at 0x7fa96e7a0630>,
'POS': <praatio.tgio.IntervalTier at 0x7fa96e7a0e10>,
'Syllable': <praatio.tgio.IntervalTier at 0x7fa96e747ef0>}
# get all intervals
tg.tierDict['PU'].entryList
[Interval(start=0.0, end=0.08910100003511151, label='SILENCE'),
Interval(start=0.08910100003511151, end=2.545392639802688, label='001-01'),
Interval(start=2.545392639802688, end=2.827885181500808, label='INHALE'),
Interval(start=2.827885181500808, end=3.796965181500808, label='001-02'),
Interval(start=3.796965181500808, end=4.0534851815008075, label='001-03'),
Interval(start=4.0534851815008075, end=5.667585181500808, label='001-04'),
Interval(start=5.667585181500808, end=6.857576537125045, label='001-05'),
Interval(start=6.857576537125045, end=7.267132404585203, label='INHALE'),
Interval(start=7.267132404585203, end=8.307932404585202, label='001-06'),
Interval(start=8.307932404585202, end=9.189732404585204, label='001-07'),
Interval(start=9.189732404585204, end=9.586900772653916, label='001-08'),
Interval(start=9.586900772653916, end=9.72222249424322, label='BREAK'),
Interval(start=9.72222249424322, end=10.896550071849948, label='001-09'),
Interval(start=10.896550071849948, end=11.253792659155748, label='INHALE'),
Interval(start=11.253792659155748, end=12.072532659155748, label='001-10'),
Interval(start=12.072532659155748, end=12.529592659155748, label='001-11'),
Interval(start=12.529592659155748, end=13.493012631305778, label='001-12'),
Interval(start=13.493012631305778, end=14.002673795348752, label='INHALE'),
Interval(start=14.002673795348752, end=15.546373795348751, label='001-13'),
Interval(start=15.546373795348751, end=16.905172445406897, label='001-14'),
Interval(start=16.905172445406897, end=17.333458754946722, label='BREAK'),
Interval(start=17.333458754946722, end=17.814973694535343, label='001-15'),
Interval(start=17.814973694535343, end=18.6811750044612, label='001-16'),
Interval(start=18.6811750044612, end=18.788004535147394, label='SILENCE')]
tg.tierDict['Word'].entryList
[Interval(start=0.0, end=0.08910100003511151, label='SILENCE'),
Interval(start=0.08910100003511151, end=0.8030583475894366, label='南港'),
Interval(start=0.8030583475894366, end=1.4300964566345282, label='過去'),
Interval(start=1.4300964566345282, end=1.6983112476321507, label='的'),
Interval(start=1.6983112476321507, end=2.545392639802688, label='話'),
Interval(start=2.545392639802688, end=2.827885181500808, label='INHALE'),
Interval(start=2.827885181500808, end=2.894020472246423, label='我'),
Interval(start=2.894020472246423, end=3.073005181500808, label='就'),
Interval(start=3.073005181500808, end=3.38547756708703, label='比較'),
Interval(start=3.38547756708703, end=3.4684488581941855, label='不'),
Interval(start=3.4684488581941855, end=3.796965181500808, label='清楚'),
Interval(start=3.796965181500808, end=3.9740698058745503, label='像'),
Interval(start=3.9740698058745503, end=4.0534851815008075, label='UNCERTAIN'),
Interval(start=4.0534851815008075, end=4.260025476170655, label='我'),
Interval(start=4.260025476170655, end=4.455985181500808, label='每'),
Interval(start=4.455985181500808, end=4.6733324608007285, label='天'),
Interval(start=4.6733324608007285, end=5.1662851815008075, label='上班'),
Interval(start=5.1662851815008075, end=5.321504689041388, label='是'),
Interval(start=5.321504689041388, end=5.667585181500808, label='從'),
Interval(start=5.667585181500808, end=6.018412388059074, label='捷運'),
Interval(start=6.018412388059074, end=6.549597723575854, label='永春'),
Interval(start=6.549597723575854, end=6.857576537125045, label='站'),
Interval(start=6.857576537125045, end=7.267132404585203, label='INHALE'),
Interval(start=7.267132404585203, end=7.4823724045852025, label='搭'),
Interval(start=7.4823724045852025, end=8.307932404585202, label='捷運'),
Interval(start=8.307932404585202, end=8.546352955764087, label='到'),
Interval(start=8.546352955764087, end=9.189732404585204, label='NE GE'),
Interval(start=9.189732404585204, end=9.586900772653916, label='捷運'),
Interval(start=9.586900772653916, end=9.72222249424322, label='BREAK'),
Interval(start=9.72222249424322, end=10.066174868235244, label='忠孝'),
Interval(start=10.066174868235244, end=10.62507065477828, label='復興'),
Interval(start=10.62507065477828, end=10.896550071849948, label='站'),
Interval(start=10.896550071849948, end=11.253792659155748, label='INHALE'),
Interval(start=11.253792659155748, end=11.507412659155747, label='再'),
Interval(start=11.507412659155747, end=12.072532659155748, label='轉'),
Interval(start=12.072532659155748, end=12.529592659155748, label='NE GE'),
Interval(start=12.529592659155748, end=13.493012631305778, label='木柵線'),
Interval(start=13.493012631305778, end=14.002673795348752, label='INHALE'),
Interval(start=14.002673795348752, end=14.190468411637928, label='到'),
Interval(start=14.190468411637928, end=14.56504394461461, label='南京'),
Interval(start=14.56504394461461, end=14.761767837630165, label='站'),
Interval(start=14.761767837630165, end=15.546373795348751, label='下車'),
Interval(start=15.546373795348751, end=15.772873795348751, label='再'),
Interval(start=15.772873795348751, end=15.926809885128144, label='搭'),
Interval(start=15.926809885128144, end=16.025073795348753, label='一'),
Interval(start=16.025073795348753, end=16.33847379534875, label='程'),
Interval(start=16.33847379534875, end=16.905172445406897, label='公車'),
Interval(start=16.905172445406897, end=17.333458754946722, label='BREAK'),
Interval(start=17.333458754946722, end=17.498171747880015, label='到'),
Interval(start=17.498171747880015, end=17.814973694535343, label='NE GE'),
Interval(start=17.814973694535343, end=18.415958754946722, label='建國北路'),
Interval(start=18.415958754946722, end=18.6811750044612, label='口'),
Interval(start=18.6811750044612, end=18.788004535147394, label='SILENCE')]
tg.tierDict['Word'].find('我')
[6, 13]
import pandas as pd
word_tier = tg.tierDict['Word']
pd.DataFrame([(start, end, label) for (start, end, label) in word_tier.entryList],
columns = ['start','end','label'])
start | end | label | |
---|---|---|---|
0 | 0.000000 | 0.089101 | SILENCE |
1 | 0.089101 | 0.803058 | 南港 |
2 | 0.803058 | 1.430096 | 過去 |
3 | 1.430096 | 1.698311 | 的 |
4 | 1.698311 | 2.545393 | 話 |
5 | 2.545393 | 2.827885 | INHALE |
6 | 2.827885 | 2.894020 | 我 |
7 | 2.894020 | 3.073005 | 就 |
8 | 3.073005 | 3.385478 | 比較 |
9 | 3.385478 | 3.468449 | 不 |
10 | 3.468449 | 3.796965 | 清楚 |
11 | 3.796965 | 3.974070 | 像 |
12 | 3.974070 | 4.053485 | UNCERTAIN |
13 | 4.053485 | 4.260025 | 我 |
14 | 4.260025 | 4.455985 | 每 |
15 | 4.455985 | 4.673332 | 天 |
16 | 4.673332 | 5.166285 | 上班 |
17 | 5.166285 | 5.321505 | 是 |
18 | 5.321505 | 5.667585 | 從 |
19 | 5.667585 | 6.018412 | 捷運 |
20 | 6.018412 | 6.549598 | 永春 |
21 | 6.549598 | 6.857577 | 站 |
22 | 6.857577 | 7.267132 | INHALE |
23 | 7.267132 | 7.482372 | 搭 |
24 | 7.482372 | 8.307932 | 捷運 |
25 | 8.307932 | 8.546353 | 到 |
26 | 8.546353 | 9.189732 | NE GE |
27 | 9.189732 | 9.586901 | 捷運 |
28 | 9.586901 | 9.722222 | BREAK |
29 | 9.722222 | 10.066175 | 忠孝 |
30 | 10.066175 | 10.625071 | 復興 |
31 | 10.625071 | 10.896550 | 站 |
32 | 10.896550 | 11.253793 | INHALE |
33 | 11.253793 | 11.507413 | 再 |
34 | 11.507413 | 12.072533 | 轉 |
35 | 12.072533 | 12.529593 | NE GE |
36 | 12.529593 | 13.493013 | 木柵線 |
37 | 13.493013 | 14.002674 | INHALE |
38 | 14.002674 | 14.190468 | 到 |
39 | 14.190468 | 14.565044 | 南京 |
40 | 14.565044 | 14.761768 | 站 |
41 | 14.761768 | 15.546374 | 下車 |
42 | 15.546374 | 15.772874 | 再 |
43 | 15.772874 | 15.926810 | 搭 |
44 | 15.926810 | 16.025074 | 一 |
45 | 16.025074 | 16.338474 | 程 |
46 | 16.338474 | 16.905172 | 公車 |
47 | 16.905172 | 17.333459 | BREAK |
48 | 17.333459 | 17.498172 | 到 |
49 | 17.498172 | 17.814974 | NE GE |
50 | 17.814974 | 18.415959 | 建國北路 |
51 | 18.415959 | 18.681175 | 口 |
52 | 18.681175 | 18.788005 | SILENCE |
pu_tier = tg.tierDict['PU']
pd.DataFrame([(start, end, label) for (start, end, label) in pu_tier.entryList],
columns = ['start','end','label'])
start | end | label | |
---|---|---|---|
0 | 0.000000 | 0.089101 | SILENCE |
1 | 0.089101 | 2.545393 | 001-01 |
2 | 2.545393 | 2.827885 | INHALE |
3 | 2.827885 | 3.796965 | 001-02 |
4 | 3.796965 | 4.053485 | 001-03 |
5 | 4.053485 | 5.667585 | 001-04 |
6 | 5.667585 | 6.857577 | 001-05 |
7 | 6.857577 | 7.267132 | INHALE |
8 | 7.267132 | 8.307932 | 001-06 |
9 | 8.307932 | 9.189732 | 001-07 |
10 | 9.189732 | 9.586901 | 001-08 |
11 | 9.586901 | 9.722222 | BREAK |
12 | 9.722222 | 10.896550 | 001-09 |
13 | 10.896550 | 11.253793 | INHALE |
14 | 11.253793 | 12.072533 | 001-10 |
15 | 12.072533 | 12.529593 | 001-11 |
16 | 12.529593 | 13.493013 | 001-12 |
17 | 13.493013 | 14.002674 | INHALE |
18 | 14.002674 | 15.546374 | 001-13 |
19 | 15.546374 | 16.905172 | 001-14 |
20 | 16.905172 | 17.333459 | BREAK |
21 | 17.333459 | 17.814974 | 001-15 |
22 | 17.814974 | 18.681175 | 001-16 |
23 | 18.681175 | 18.788005 | SILENCE |