{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Vectorizing Texts\n", "\n", "- Feature engineering for text represenation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import nltk\n", "from nltk.corpus import brown" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['adventure',\n", " 'belles_lettres',\n", " 'editorial',\n", " 'fiction',\n", " 'government',\n", " 'hobbies',\n", " 'humor',\n", " 'learned',\n", " 'lore',\n", " 'mystery',\n", " 'news',\n", " 'religion',\n", " 'reviews',\n", " 'romance',\n", " 'science_fiction']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "brown.categories()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "It is not news that Nathan Milstein is a wizard of the violin . Certainly not in Orchestra Hall wher\n", "55\n", "55\n", "55\n" ] }, { "data": { "text/plain": [ "list" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corpus_id = brown.fileids(categories=['reviews','fiction','humor'])\n", "corpus_text = [' '.join(w) for w in [brown.words(fileids=cid) for cid in corpus_id]]\n", "print(corpus_text[0][:100])\n", "corpus_cat = [brown.categories(fileids=cid)[0] for cid in corpus_id]\n", "\n", "print(len(corpus_text))\n", "print(len(corpus_id))\n", "print(len(corpus_cat))\n", "type(corpus_id)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextCategoryID
0It is not news that Nathan Milstein is a wizar...reviewscc01
1Television has yet to work out a living arrang...reviewscc02
2Francois D'Albert , Hungarian-born violinist w...reviewscc03
3The Theatre-by-the-Sea , Matunuck , presents `...reviewscc04
4The superb intellectual and spiritual vitality...reviewscc05
5George Kennan's account of relations between R...reviewscc06
6Some of the New York Philharmonic musicians wh...reviewscc07
7Had a funny experience at Newport yesterday af...reviewscc08
8Murray Louis and his dance company appeared at...reviewscc09
9Ring Of Bright Water , by Gavin Maxwell . 211 ...reviewscc10
10Mischa Elman shared last night's Lewisohn Stad...reviewscc11
11Radio is easily outdistancing television in it...reviewscc12
12A tribe in ancient India believed the earth wa...reviewscc13
13Elisabeth Schwarzkopf sang so magnificently Sa...reviewscc14
14As autumn starts its annual sweep , few Americ...reviewscc15
15A year ago it was bruited that the primary cha...reviewscc16
16The reading public , the theatergoing public ,...reviewscc17
17Thirty-three Scotty did not go back to school ...fictionck01
18Where their sharp edges seemed restless as sea...fictionck02
19Mickie sat over his second whisky-on-the-rocks...fictionck03
20The Bishop looked at him coldly and said , `` ...fictionck04
21Payne dismounted in Madison Place and handed t...fictionck05
22With a sneer , the man spread his legs and , a...fictionck06
23If the crummy bastard could write ! ! That's h...fictionck07
24Rousseau is so persuasive that Voltaire is alm...fictionck08
25It was the first time any of us had laughed si...fictionck09
26That summer the gambling houses were closed , ...fictionck10
27Standing in the shelter of the tent -- a rejec...fictionck11
28She was a child too much a part of her environ...fictionck12
29In the dim underwater light they dressed and s...fictionck13
30He brought with him a mixture of myrrh and alo...fictionck14
31Beth was very still and her breath came in sma...fictionck15
32The red glow from the cove had died out of the...fictionck16
33Burly leathered men and wrinkled women in drab...fictionck17
34She was getting real dramatic . I'd have been ...fictionck18
35There was one fact which Rector could not over...fictionck19
36She concluded by asking him to name another ho...fictionck20
37Beckworth handed the pass to the colonel . He ...fictionck21
38I would not want to be one of those writers wh...fictionck22
39It was not as though she noted clearly that he...fictionck23
40His eyes were old and they never saw well , bu...fictionck24
41He was in his mid-fifties at this time , long ...fictionck25
42But they all said , `` No , your time will com...fictionck26
43`` But tell me , doctor , where do you plan to...fictionck27
44Going downstairs with the tray , Winston wishe...fictionck28
45Was it love ? ? I had no doubt that it was . D...fictionck29
46It was among these that Hinkle identified a ph...humorcr01
47I realized that Hamlet was faced with an entir...humorcr02
48Needless to say , I was furious at this unpara...humorcr03
49Up to date , however , his garden was still mo...humorcr04
50Ambiguity Nothing in English has been ridicule...humorcr05
51I called the other afternoon on my old friend ...humorcr06
52One day , the children had wanted to get up on...humorcr07
53Pueri aquam de silvas ad agricolas portant , a...humorcr08
54Dear Sirs : Let me begin by clearing up any po...humorcr09
\n", "
" ], "text/plain": [ " Text Category ID\n", "0 It is not news that Nathan Milstein is a wizar... reviews cc01\n", "1 Television has yet to work out a living arrang... reviews cc02\n", "2 Francois D'Albert , Hungarian-born violinist w... reviews cc03\n", "3 The Theatre-by-the-Sea , Matunuck , presents `... reviews cc04\n", "4 The superb intellectual and spiritual vitality... reviews cc05\n", "5 George Kennan's account of relations between R... reviews cc06\n", "6 Some of the New York Philharmonic musicians wh... reviews cc07\n", "7 Had a funny experience at Newport yesterday af... reviews cc08\n", "8 Murray Louis and his dance company appeared at... reviews cc09\n", "9 Ring Of Bright Water , by Gavin Maxwell . 211 ... reviews cc10\n", "10 Mischa Elman shared last night's Lewisohn Stad... reviews cc11\n", "11 Radio is easily outdistancing television in it... reviews cc12\n", "12 A tribe in ancient India believed the earth wa... reviews cc13\n", "13 Elisabeth Schwarzkopf sang so magnificently Sa... reviews cc14\n", "14 As autumn starts its annual sweep , few Americ... reviews cc15\n", "15 A year ago it was bruited that the primary cha... reviews cc16\n", "16 The reading public , the theatergoing public ,... reviews cc17\n", "17 Thirty-three Scotty did not go back to school ... fiction ck01\n", "18 Where their sharp edges seemed restless as sea... fiction ck02\n", "19 Mickie sat over his second whisky-on-the-rocks... fiction ck03\n", "20 The Bishop looked at him coldly and said , `` ... fiction ck04\n", "21 Payne dismounted in Madison Place and handed t... fiction ck05\n", "22 With a sneer , the man spread his legs and , a... fiction ck06\n", "23 If the crummy bastard could write ! ! That's h... fiction ck07\n", "24 Rousseau is so persuasive that Voltaire is alm... fiction ck08\n", "25 It was the first time any of us had laughed si... fiction ck09\n", "26 That summer the gambling houses were closed , ... fiction ck10\n", "27 Standing in the shelter of the tent -- a rejec... fiction ck11\n", "28 She was a child too much a part of her environ... fiction ck12\n", "29 In the dim underwater light they dressed and s... fiction ck13\n", "30 He brought with him a mixture of myrrh and alo... fiction ck14\n", "31 Beth was very still and her breath came in sma... fiction ck15\n", "32 The red glow from the cove had died out of the... fiction ck16\n", "33 Burly leathered men and wrinkled women in drab... fiction ck17\n", "34 She was getting real dramatic . I'd have been ... fiction ck18\n", "35 There was one fact which Rector could not over... fiction ck19\n", "36 She concluded by asking him to name another ho... fiction ck20\n", "37 Beckworth handed the pass to the colonel . He ... fiction ck21\n", "38 I would not want to be one of those writers wh... fiction ck22\n", "39 It was not as though she noted clearly that he... fiction ck23\n", "40 His eyes were old and they never saw well , bu... fiction ck24\n", "41 He was in his mid-fifties at this time , long ... fiction ck25\n", "42 But they all said , `` No , your time will com... fiction ck26\n", "43 `` But tell me , doctor , where do you plan to... fiction ck27\n", "44 Going downstairs with the tray , Winston wishe... fiction ck28\n", "45 Was it love ? ? I had no doubt that it was . D... fiction ck29\n", "46 It was among these that Hinkle identified a ph... humor cr01\n", "47 I realized that Hamlet was faced with an entir... humor cr02\n", "48 Needless to say , I was furious at this unpara... humor cr03\n", "49 Up to date , however , his garden was still mo... humor cr04\n", "50 Ambiguity Nothing in English has been ridicule... humor cr05\n", "51 I called the other afternoon on my old friend ... humor cr06\n", "52 One day , the children had wanted to get up on... humor cr07\n", "53 Pueri aquam de silvas ad agricolas portant , a... humor cr08\n", "54 Dear Sirs : Let me begin by clearing up any po... humor cr09" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import re\n", "\n", "assert len(corpus_text)==len(corpus_cat)\n", "\n", "corpus_df = pd.DataFrame({'Text': corpus_text, 'Category': corpus_cat, 'ID': corpus_id})\n", "corpus_df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n" ] } ], "source": [ "## Clean up texts\n", "wpt = nltk.WordPunctTokenizer()\n", "stop_words = nltk.corpus.stopwords.words('english')\n", "print(stop_words)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'television has yet to work out a living arrangement with jazz which comes to the medium more as an uneasy guest than as a relaxed member of the family there seems to be an unfortunate assumption that '" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## function\n", "def normalize_text(text):\n", " ## remove special characters\n", " text = re.sub(r'[^a-zA-Z\\s]','', text, re.I|re.A)\n", " text = text.lower().strip()\n", " tokens = wpt.tokenize(text)\n", " ## filtering\n", " #tokens_filtered = [w for w in tokens if w not in stop_words and re.search(r'\\D+', w)]\n", " tokens_filtered = tokens\n", " text_output = ' '.join(tokens_filtered)\n", " \n", " return text_output\n", "\n", "## vectorize function\n", "normalize_corpus= np.vectorize(normalize_text)\n", "\n", "corpus_norm = normalize_corpus(corpus_text)\n", "corpus_norm[1][:200]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bag of Words" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ableaboutaboveacrossactaddedafterafternoonagainagainst...writtenwroteyearyearsyesyetyorkyouyoungyour
01500001010...0000010301
10301201010...0004021814
21300103000...1005003421
30200402030...1001002021
41410001011...1000010602
501100004011...0011010600
61110116220...0110102020
711000004430...0131000312
81110010020...2101026120
90101003001...0024012410
101610011012...0022010010
110300001001...0034015020
122400302010...0006002111
130310103010...4001003010
140511101000...0040003210
151910002011...1012011000
160410101010...0052001000
1711102003220...00000401610
180920001122...0000030902
1901012000010...00010122232
2001003003020...00080101021
211621303020...0000021610
220103001150...00000211102
230510001060...00000101112
240400006022...11024001304
250911003001...0000020612
260120003022...10010001302
270324003171...0000000440
280502001030...0001320101
291502102111...0000000710
301400014110...0011050831
310601005234...14011001124
320002000020...1011113100
331401000120...00032002104
3401611112020...0101001600
3521200012201...10000101300
360700100032...03010201202
370002001134...1000400900
380100004201...00320011405
390211010052...0107100821
4001200003020...00011002104
410121004150...0006220010
4201200003122...0021100901
430202002120...0001000510
440901014000...01050003032
450401102130...0124021110
460200004030...0012000501
471600018121...00211021102
480300001014...0311020130
492610005121...00010002701
500100001010...01321102608
511700002122...13191002003
521500012030...00002002005
530402005011...0002012201
541221011002...000300045117
\n", "

55 rows × 739 columns

\n", "
" ], "text/plain": [ " able about above across act added after afternoon again against \\\n", "0 1 5 0 0 0 0 1 0 1 0 \n", "1 0 3 0 1 2 0 1 0 1 0 \n", "2 1 3 0 0 1 0 3 0 0 0 \n", "3 0 2 0 0 4 0 2 0 3 0 \n", "4 1 4 1 0 0 0 1 0 1 1 \n", "5 0 11 0 0 0 0 4 0 1 1 \n", "6 1 1 1 0 1 1 6 2 2 0 \n", "7 1 10 0 0 0 0 4 4 3 0 \n", "8 1 1 1 0 0 1 0 0 2 0 \n", "9 0 1 0 1 0 0 3 0 0 1 \n", "10 1 6 1 0 0 1 1 0 1 2 \n", "11 0 3 0 0 0 0 1 0 0 1 \n", "12 2 4 0 0 3 0 2 0 1 0 \n", "13 0 3 1 0 1 0 3 0 1 0 \n", "14 0 5 1 1 1 0 1 0 0 0 \n", "15 1 9 1 0 0 0 2 0 1 1 \n", "16 0 4 1 0 1 0 1 0 1 0 \n", "17 1 11 0 2 0 0 3 2 2 0 \n", "18 0 9 2 0 0 0 1 1 2 2 \n", "19 0 10 1 2 0 0 0 0 1 0 \n", "20 0 10 0 3 0 0 3 0 2 0 \n", "21 1 6 2 1 3 0 3 0 2 0 \n", "22 0 1 0 3 0 0 1 1 5 0 \n", "23 0 5 1 0 0 0 1 0 6 0 \n", "24 0 4 0 0 0 0 6 0 2 2 \n", "25 0 9 1 1 0 0 3 0 0 1 \n", "26 0 1 2 0 0 0 3 0 2 2 \n", "27 0 3 2 4 0 0 3 1 7 1 \n", "28 0 5 0 2 0 0 1 0 3 0 \n", "29 1 5 0 2 1 0 2 1 1 1 \n", "30 1 4 0 0 0 1 4 1 1 0 \n", "31 0 6 0 1 0 0 5 2 3 4 \n", "32 0 0 0 2 0 0 0 0 2 0 \n", "33 1 4 0 1 0 0 0 1 2 0 \n", "34 0 16 1 1 1 1 2 0 2 0 \n", "35 2 12 0 0 0 1 2 2 0 1 \n", "36 0 7 0 0 1 0 0 0 3 2 \n", "37 0 0 0 2 0 0 1 1 3 4 \n", "38 0 1 0 0 0 0 4 2 0 1 \n", "39 0 2 1 1 0 1 0 0 5 2 \n", "40 0 12 0 0 0 0 3 0 2 0 \n", "41 0 1 2 1 0 0 4 1 5 0 \n", "42 0 12 0 0 0 0 3 1 2 2 \n", "43 0 2 0 2 0 0 2 1 2 0 \n", "44 0 9 0 1 0 1 4 0 0 0 \n", "45 0 4 0 1 1 0 2 1 3 0 \n", "46 0 2 0 0 0 0 4 0 3 0 \n", "47 1 6 0 0 0 1 8 1 2 1 \n", "48 0 3 0 0 0 0 1 0 1 4 \n", "49 2 6 1 0 0 0 5 1 2 1 \n", "50 0 1 0 0 0 0 1 0 1 0 \n", "51 1 7 0 0 0 0 2 1 2 2 \n", "52 1 5 0 0 0 1 2 0 3 0 \n", "53 0 4 0 2 0 0 5 0 1 1 \n", "54 1 2 2 1 0 1 1 0 0 2 \n", "\n", " ... written wrote year years yes yet york you young your \n", "0 ... 0 0 0 0 0 1 0 3 0 1 \n", "1 ... 0 0 0 4 0 2 1 8 1 4 \n", "2 ... 1 0 0 5 0 0 3 4 2 1 \n", "3 ... 1 0 0 1 0 0 2 0 2 1 \n", "4 ... 1 0 0 0 0 1 0 6 0 2 \n", "5 ... 0 0 1 1 0 1 0 6 0 0 \n", "6 ... 0 1 1 0 1 0 2 0 2 0 \n", "7 ... 0 1 3 1 0 0 0 3 1 2 \n", "8 ... 2 1 0 1 0 2 6 1 2 0 \n", "9 ... 0 0 2 4 0 1 2 4 1 0 \n", "10 ... 0 0 2 2 0 1 0 0 1 0 \n", "11 ... 0 0 3 4 0 1 5 0 2 0 \n", "12 ... 0 0 0 6 0 0 2 1 1 1 \n", "13 ... 4 0 0 1 0 0 3 0 1 0 \n", "14 ... 0 0 4 0 0 0 3 2 1 0 \n", "15 ... 1 0 1 2 0 1 1 0 0 0 \n", "16 ... 0 0 5 2 0 0 1 0 0 0 \n", "17 ... 0 0 0 0 0 4 0 16 1 0 \n", "18 ... 0 0 0 0 0 3 0 9 0 2 \n", "19 ... 0 0 0 1 0 1 2 22 3 2 \n", "20 ... 0 0 0 8 0 1 0 10 2 1 \n", "21 ... 0 0 0 0 0 2 1 6 1 0 \n", "22 ... 0 0 0 0 0 2 1 11 0 2 \n", "23 ... 0 0 0 0 0 1 0 11 1 2 \n", "24 ... 1 1 0 2 4 0 0 13 0 4 \n", "25 ... 0 0 0 0 0 2 0 6 1 2 \n", "26 ... 1 0 0 1 0 0 0 13 0 2 \n", "27 ... 0 0 0 0 0 0 0 4 4 0 \n", "28 ... 0 0 0 1 3 2 0 1 0 1 \n", "29 ... 0 0 0 0 0 0 0 7 1 0 \n", "30 ... 0 0 1 1 0 5 0 8 3 1 \n", "31 ... 1 4 0 1 1 0 0 11 2 4 \n", "32 ... 1 0 1 1 1 1 3 1 0 0 \n", "33 ... 0 0 0 3 2 0 0 21 0 4 \n", "34 ... 0 1 0 1 0 0 1 6 0 0 \n", "35 ... 1 0 0 0 0 1 0 13 0 0 \n", "36 ... 0 3 0 1 0 2 0 12 0 2 \n", "37 ... 1 0 0 0 4 0 0 9 0 0 \n", "38 ... 0 0 3 2 0 0 1 14 0 5 \n", "39 ... 0 1 0 7 1 0 0 8 2 1 \n", "40 ... 0 0 0 1 1 0 0 21 0 4 \n", "41 ... 0 0 0 6 2 2 0 0 1 0 \n", "42 ... 0 0 2 1 1 0 0 9 0 1 \n", "43 ... 0 0 0 1 0 0 0 5 1 0 \n", "44 ... 0 1 0 5 0 0 0 30 3 2 \n", "45 ... 0 1 2 4 0 2 1 1 1 0 \n", "46 ... 0 0 1 2 0 0 0 5 0 1 \n", "47 ... 0 0 2 1 1 0 2 11 0 2 \n", "48 ... 0 3 1 1 0 2 0 1 3 0 \n", "49 ... 0 0 0 1 0 0 0 27 0 1 \n", "50 ... 0 1 3 2 1 1 0 26 0 8 \n", "51 ... 1 3 1 9 1 0 0 20 0 3 \n", "52 ... 0 0 0 0 2 0 0 20 0 5 \n", "53 ... 0 0 0 2 0 1 2 2 0 1 \n", "54 ... 0 0 0 3 0 0 0 45 1 17 \n", "\n", "[55 rows x 739 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "cv = CountVectorizer(min_df=0.2, max_df=1.)\n", "cv_matrix = cv.fit_transform(corpus_norm)\n", "cv_matrix\n", "\n", "\n", "## view the array\n", "type(cv_matrix)\n", "\n", "cv_matrix = cv_matrix.toarray()\n", "\n", "vocab = cv.get_feature_names()\n", "boa_unigram = pd.DataFrame(cv_matrix, columns=vocab)\n", "boa_unigram" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## More Complex Bag-of-Words\n", "\n", "- Filter features based on word classes\n", "- Include n-gram features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bag of N-grams Model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ableable toaboutabout itabout theaboveacrossacross theactadded...yetyorkyouyou areyou canyou couldyou haveyou knowyoungyour
01050300000...1030000001
10030001020...2181001014
21030000010...0341001021
30020100040...0200000021
41141110000...1060111002
\n", "

5 rows × 1193 columns

\n", "
" ], "text/plain": [ " able able to about about it about the above across across the act \\\n", "0 1 0 5 0 3 0 0 0 0 \n", "1 0 0 3 0 0 0 1 0 2 \n", "2 1 0 3 0 0 0 0 0 1 \n", "3 0 0 2 0 1 0 0 0 4 \n", "4 1 1 4 1 1 1 0 0 0 \n", "\n", " added ... yet york you you are you can you could you have \\\n", "0 0 ... 1 0 3 0 0 0 0 \n", "1 0 ... 2 1 8 1 0 0 1 \n", "2 0 ... 0 3 4 1 0 0 1 \n", "3 0 ... 0 2 0 0 0 0 0 \n", "4 0 ... 1 0 6 0 1 1 1 \n", "\n", " you know young your \n", "0 0 0 1 \n", "1 0 1 4 \n", "2 0 2 1 \n", "3 0 2 1 \n", "4 0 0 2 \n", "\n", "[5 rows x 1193 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## N-grams\n", "\n", "cv_ngram = CountVectorizer(ngram_range=(1,3), min_df = 0.2)\n", "cv_ngram_matrix = cv_ngram.fit_transform(corpus_norm)\n", "\n", "boa_ngram = pd.DataFrame(cv_ngram_matrix.toarray(),columns = cv_ngram.get_feature_names())\n", "boa_ngram.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TF-IDF Model" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ableaboutaboveacrossactaddedafterafternoonagainagainst...writtenwroteyearyearsyesyetyorkyouyoungyour
00.0094620.0247540.0000000.0000000.0000000.0000000.0053180.0000000.0056140.000000...0.0000000.0000000.0000000.0000000.0000000.0080880.0000000.0165400.0000000.007161
10.0000000.0145330.0000000.0084440.0223090.0000000.0052040.0000000.0054930.000000...0.0000000.0000000.0000000.0245280.0000000.0158290.0090420.0431600.0075920.028028
20.0094050.0147630.0000000.0000000.0113310.0000000.0158590.0000000.0000000.000000...0.0113310.0000000.0000000.0311430.0000000.0000000.0275530.0219200.0154240.007117
30.0000000.0105550.0000000.0000000.0486090.0000000.0113390.0000000.0179540.000000...0.0121520.0000000.0000000.0066800.0000000.0000000.0197010.0000000.0165420.007634
40.0098870.0206900.0098870.0000000.0000000.0000000.0055570.0000000.0058660.008821...0.0119100.0000000.0000000.0000000.0000000.0084510.0000000.0345630.0000000.014963
50.0000000.0469360.0000000.0000000.0000000.0000000.0183350.0000000.0048390.007276...0.0000000.0000000.0083560.0054010.0000000.0069710.0000000.0285110.0000000.000000
60.0094120.0049240.0094120.0000000.0113380.0120710.0317400.0188240.0111680.000000...0.0000000.0113380.0096440.0000000.0107040.0000000.0183820.0000000.0154340.000000
70.0097250.0508830.0000000.0000000.0000000.0000000.0218650.0389020.0173100.000000...0.0000000.0117160.0298950.0064410.0000000.0000000.0000000.0170000.0079740.014719
80.0088830.0046480.0088830.0000000.0000000.0113930.0000000.0000000.0105400.000000...0.0214030.0107010.0000000.0058830.0000000.0151860.0520470.0051760.0145670.000000
90.0000000.0048980.0000000.0085380.0000000.0000000.0157860.0000000.0000000.008353...0.0000000.0000000.0191860.0248000.0000000.0080030.0182850.0218200.0076770.000000
100.0093360.0293070.0093360.0000000.0000000.0119730.0052470.0000000.0055390.016659...0.0000000.0000000.0191320.0123650.0000000.0079800.0000000.0000000.0076550.000000
110.0000000.0137600.0000000.0000000.0000000.0000000.0049270.0000000.0000000.007821...0.0000000.0000000.0269480.0232230.0000000.0074930.0428040.0000000.0143760.000000
120.0179450.0187780.0000000.0000000.0324280.0000000.0100860.0000000.0053230.000000...0.0000000.0000000.0000000.0356520.0000000.0000000.0175240.0052280.0073570.006790
130.0000000.0134230.0085520.0000000.0103030.0000000.0144200.0000000.0050740.000000...0.0412100.0000000.0000000.0056630.0000000.0000000.0250540.0000000.0070120.000000
140.0000000.0269450.0103000.0093940.0124090.0000000.0057890.0000000.0000000.000000...0.0000000.0000000.0422160.0000000.0000000.0000000.0301750.0120030.0084460.000000
150.0097800.0460520.0097800.0000000.0000000.0000000.0109940.0000000.0058020.008725...0.0117820.0000000.0100210.0129530.0000000.0083600.0095500.0000000.0000000.000000
160.0000000.0202790.0096900.0000000.0116740.0000000.0054460.0000000.0057490.000000...0.0000000.0000000.0496440.0128340.0000000.0000000.0094630.0000000.0000000.000000
170.0098860.0568980.0000000.0180320.0000000.0000000.0166700.0197730.0117310.000000...0.0000000.0000000.0000000.0000000.0000000.0338020.0000000.0921670.0081060.000000
180.0000000.0448690.0190580.0000000.0000000.0000000.0053560.0095290.0113070.017003...0.0000000.0000000.0000000.0000000.0000000.0244350.0000000.0499690.0000000.014422
190.0000000.0498570.0095290.0173810.0000000.0000000.0000000.0000000.0056540.000000...0.0000000.0000000.0000000.0063110.0000000.0081450.0186110.1221520.0234400.014423
200.0000000.0468280.0000000.0244880.0000000.0000000.0150920.0000000.0106200.000000...0.0000000.0000000.0000000.0474180.0000000.0076500.0000000.0521500.0146770.006773
210.0091710.0287910.0183430.0083640.0331460.0000000.0154640.0000000.0108830.000000...0.0000000.0000000.0000000.0000000.0000000.0156790.0089560.0320630.0075200.000000
220.0000000.0045910.0000000.0240060.0000000.0000000.0049320.0087740.0260280.000000...0.0000000.0000000.0000000.0000000.0000000.0150000.0085680.0562360.0000000.013280
230.0000000.0271810.0103900.0000000.0000000.0000000.0058400.0000000.0369870.000000...0.0000000.0000000.0000000.0000000.0000000.0088810.0000000.0665950.0085190.015726
240.0000000.0195410.0000000.0000000.0000000.0000000.0314880.0000000.0110790.016661...0.0112480.0112480.0000000.0123670.0424760.0000000.0000000.0707250.0000000.028264
250.0000000.0416020.0088350.0080570.0000000.0000000.0148970.0000000.0000000.007882...0.0000000.0000000.0000000.0000000.0000000.0151040.0000000.0308870.0072440.013372
260.0000000.0047290.0180770.0000000.0000000.0000000.0152400.0000000.0107250.016128...0.0108890.0000000.0000000.0059860.0000000.0000000.0000000.0684630.0000000.013680
270.0000000.0125720.0160190.0292190.0000000.0000000.0135060.0080100.0332640.007146...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0186680.0262700.000000
280.0000000.0262870.0000000.0183280.0000000.0000000.0056480.0000000.0178850.000000...0.0000000.0000000.0000000.0066550.0342840.0171780.0000000.0058550.0000000.007604
290.0085470.0223590.0000000.0155900.0102970.0000000.0096080.0085470.0050710.007626...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0348610.0070080.000000
300.0098660.0206470.0000000.0000000.0000000.0126530.0221800.0098660.0058530.000000...0.0000000.0000000.0101090.0065330.0000000.0421640.0000000.0459870.0242680.007466
310.0000000.0289490.0000000.0084100.0000000.0000000.0259160.0184440.0164140.032910...0.0111090.0444380.0000000.0061070.0104880.0000000.0000000.0591050.0151230.027914
320.0000000.0000000.0000000.0163630.0000000.0000000.0000000.0000000.0106450.000000...0.0108070.0000000.0091920.0059410.0102030.0076680.0262810.0052270.0000000.000000
330.0103170.0215920.0000000.0094090.0000000.0000000.0000000.0103170.0122420.000000...0.0000000.0000000.0000000.0204970.0234670.0000000.0000000.1262400.0000000.031230
340.0000000.0922120.0110150.0100460.0132700.0141270.0123820.0000000.0130710.000000...0.0000000.0132700.0000000.0072950.0000000.0000000.0107570.0385100.0000000.000000
350.0168570.0529180.0000000.0000000.0000000.0108100.0094750.0168570.0000000.007520...0.0101540.0000000.0000000.0000000.0000000.0072040.0000000.0638430.0000000.000000
360.0000000.0362440.0000000.0000000.0119220.0000000.0000000.0000000.0176140.017659...0.0000000.0357660.0000000.0065540.0000000.0169180.0000000.0691940.0000000.014978
370.0000000.0000000.0000000.0158380.0000000.0000000.0048800.0086830.0154550.030987...0.0104600.0000000.0000000.0000000.0395000.0000000.0000000.0455340.0000000.000000
380.0000000.0051140.0000000.0000000.0000000.0000000.0219750.0195480.0000000.008720...0.0000000.0000000.0300450.0129460.0000000.0000000.0095450.0797310.0000000.036983
390.0000000.0086470.0082640.0075360.0000000.0105980.0000000.0000000.0245140.014745...0.0000000.0099550.0000000.0383080.0093980.0000000.0000000.0385190.0135510.006254
400.0000000.0621350.0000000.0000000.0000000.0000000.0166870.0000000.0117430.000000...0.0000000.0000000.0000000.0065540.0112550.0000000.0000000.1210930.0000000.029957
410.0000000.0050350.0192450.0087760.0000000.0000000.0216340.0096230.0285450.000000...0.0000000.0000000.0000000.0382350.0218870.0164500.0000000.0000000.0078900.000000
420.0000000.0625560.0000000.0000000.0000000.0000000.0168000.0099640.0118230.017779...0.0000000.0000000.0204180.0065980.0113310.0000000.0000000.0522490.0000000.007540
430.0000000.0084080.0000000.0146570.0000000.0000000.0090330.0080360.0095350.000000...0.0000000.0000000.0000000.0053210.0000000.0000000.0000000.0234100.0065890.000000
440.0000000.0451690.0000000.0087480.0000000.0123030.0215660.0000000.0000000.000000...0.0000000.0115560.0000000.0317630.0000000.0000000.0000000.1676760.0235960.014518
450.0000000.0206850.0000000.0090140.0119070.0000000.0111110.0098840.0175920.000000...0.0000000.0119070.0202550.0261820.0000000.0168970.0096520.0057590.0081040.000000
460.0000000.0103880.0000000.0000000.0000000.0000000.0223180.0000000.0176690.000000...0.0000000.0000000.0101710.0131480.0000000.0000000.0000000.0289200.0000000.007512
470.0106980.0335830.0000000.0000000.0000000.0137200.0481020.0106980.0126940.009544...0.0000000.0000000.0219230.0070850.0121670.0000000.0208930.0685660.0000000.016191
480.0000000.0154270.0000000.0000000.0000000.0000000.0055240.0000000.0058310.035076...0.0000000.0355220.0100710.0065090.0000000.0168020.0000000.0057270.0241770.000000
490.0198500.0311570.0099250.0000000.0000000.0000000.0278920.0099250.0117770.008855...0.0000000.0000000.0000000.0065730.0000000.0000000.0000000.1561410.0000000.007511
500.0000000.0057300.0000000.0000000.0000000.0000000.0061550.0000000.0064970.000000...0.0000000.0131930.0336620.0145040.0124540.0093610.0000000.1658990.0000000.066297
510.0093810.0343560.0000000.0000000.0000000.0000000.0105450.0093810.0111310.016739...0.0113010.0339030.0096120.0559110.0106690.0000000.0000000.1093170.0000000.021297
520.0102020.0266880.0000000.0000000.0000000.0130840.0114680.0000000.0181580.000000...0.0000000.0000000.0000000.0000000.0232050.0000000.0000000.1188850.0000000.038601
530.0000000.0189510.0000000.0165170.0000000.0000000.0254480.0000000.0053730.008079...0.0000000.0000000.0000000.0119940.0000000.0077400.0176860.0105530.0000000.006853
540.0102980.0107760.0205960.0093920.0000000.0132080.0057880.0000000.0000000.018376...0.0000000.0000000.0000000.0204600.0000000.0000000.0000000.2700160.0084440.132484
\n", "

55 rows × 739 columns

\n", "
" ], "text/plain": [ " able about above across act added after \\\n", "0 0.009462 0.024754 0.000000 0.000000 0.000000 0.000000 0.005318 \n", "1 0.000000 0.014533 0.000000 0.008444 0.022309 0.000000 0.005204 \n", "2 0.009405 0.014763 0.000000 0.000000 0.011331 0.000000 0.015859 \n", "3 0.000000 0.010555 0.000000 0.000000 0.048609 0.000000 0.011339 \n", "4 0.009887 0.020690 0.009887 0.000000 0.000000 0.000000 0.005557 \n", "5 0.000000 0.046936 0.000000 0.000000 0.000000 0.000000 0.018335 \n", "6 0.009412 0.004924 0.009412 0.000000 0.011338 0.012071 0.031740 \n", "7 0.009725 0.050883 0.000000 0.000000 0.000000 0.000000 0.021865 \n", "8 0.008883 0.004648 0.008883 0.000000 0.000000 0.011393 0.000000 \n", "9 0.000000 0.004898 0.000000 0.008538 0.000000 0.000000 0.015786 \n", "10 0.009336 0.029307 0.009336 0.000000 0.000000 0.011973 0.005247 \n", "11 0.000000 0.013760 0.000000 0.000000 0.000000 0.000000 0.004927 \n", "12 0.017945 0.018778 0.000000 0.000000 0.032428 0.000000 0.010086 \n", "13 0.000000 0.013423 0.008552 0.000000 0.010303 0.000000 0.014420 \n", "14 0.000000 0.026945 0.010300 0.009394 0.012409 0.000000 0.005789 \n", "15 0.009780 0.046052 0.009780 0.000000 0.000000 0.000000 0.010994 \n", "16 0.000000 0.020279 0.009690 0.000000 0.011674 0.000000 0.005446 \n", "17 0.009886 0.056898 0.000000 0.018032 0.000000 0.000000 0.016670 \n", "18 0.000000 0.044869 0.019058 0.000000 0.000000 0.000000 0.005356 \n", "19 0.000000 0.049857 0.009529 0.017381 0.000000 0.000000 0.000000 \n", "20 0.000000 0.046828 0.000000 0.024488 0.000000 0.000000 0.015092 \n", "21 0.009171 0.028791 0.018343 0.008364 0.033146 0.000000 0.015464 \n", "22 0.000000 0.004591 0.000000 0.024006 0.000000 0.000000 0.004932 \n", "23 0.000000 0.027181 0.010390 0.000000 0.000000 0.000000 0.005840 \n", "24 0.000000 0.019541 0.000000 0.000000 0.000000 0.000000 0.031488 \n", "25 0.000000 0.041602 0.008835 0.008057 0.000000 0.000000 0.014897 \n", "26 0.000000 0.004729 0.018077 0.000000 0.000000 0.000000 0.015240 \n", "27 0.000000 0.012572 0.016019 0.029219 0.000000 0.000000 0.013506 \n", "28 0.000000 0.026287 0.000000 0.018328 0.000000 0.000000 0.005648 \n", "29 0.008547 0.022359 0.000000 0.015590 0.010297 0.000000 0.009608 \n", "30 0.009866 0.020647 0.000000 0.000000 0.000000 0.012653 0.022180 \n", "31 0.000000 0.028949 0.000000 0.008410 0.000000 0.000000 0.025916 \n", "32 0.000000 0.000000 0.000000 0.016363 0.000000 0.000000 0.000000 \n", "33 0.010317 0.021592 0.000000 0.009409 0.000000 0.000000 0.000000 \n", "34 0.000000 0.092212 0.011015 0.010046 0.013270 0.014127 0.012382 \n", "35 0.016857 0.052918 0.000000 0.000000 0.000000 0.010810 0.009475 \n", "36 0.000000 0.036244 0.000000 0.000000 0.011922 0.000000 0.000000 \n", "37 0.000000 0.000000 0.000000 0.015838 0.000000 0.000000 0.004880 \n", "38 0.000000 0.005114 0.000000 0.000000 0.000000 0.000000 0.021975 \n", "39 0.000000 0.008647 0.008264 0.007536 0.000000 0.010598 0.000000 \n", "40 0.000000 0.062135 0.000000 0.000000 0.000000 0.000000 0.016687 \n", "41 0.000000 0.005035 0.019245 0.008776 0.000000 0.000000 0.021634 \n", "42 0.000000 0.062556 0.000000 0.000000 0.000000 0.000000 0.016800 \n", "43 0.000000 0.008408 0.000000 0.014657 0.000000 0.000000 0.009033 \n", "44 0.000000 0.045169 0.000000 0.008748 0.000000 0.012303 0.021566 \n", "45 0.000000 0.020685 0.000000 0.009014 0.011907 0.000000 0.011111 \n", "46 0.000000 0.010388 0.000000 0.000000 0.000000 0.000000 0.022318 \n", "47 0.010698 0.033583 0.000000 0.000000 0.000000 0.013720 0.048102 \n", "48 0.000000 0.015427 0.000000 0.000000 0.000000 0.000000 0.005524 \n", "49 0.019850 0.031157 0.009925 0.000000 0.000000 0.000000 0.027892 \n", "50 0.000000 0.005730 0.000000 0.000000 0.000000 0.000000 0.006155 \n", "51 0.009381 0.034356 0.000000 0.000000 0.000000 0.000000 0.010545 \n", "52 0.010202 0.026688 0.000000 0.000000 0.000000 0.013084 0.011468 \n", "53 0.000000 0.018951 0.000000 0.016517 0.000000 0.000000 0.025448 \n", "54 0.010298 0.010776 0.020596 0.009392 0.000000 0.013208 0.005788 \n", "\n", " afternoon again against ... written wrote year \\\n", "0 0.000000 0.005614 0.000000 ... 0.000000 0.000000 0.000000 \n", "1 0.000000 0.005493 0.000000 ... 0.000000 0.000000 0.000000 \n", "2 0.000000 0.000000 0.000000 ... 0.011331 0.000000 0.000000 \n", "3 0.000000 0.017954 0.000000 ... 0.012152 0.000000 0.000000 \n", "4 0.000000 0.005866 0.008821 ... 0.011910 0.000000 0.000000 \n", "5 0.000000 0.004839 0.007276 ... 0.000000 0.000000 0.008356 \n", "6 0.018824 0.011168 0.000000 ... 0.000000 0.011338 0.009644 \n", "7 0.038902 0.017310 0.000000 ... 0.000000 0.011716 0.029895 \n", "8 0.000000 0.010540 0.000000 ... 0.021403 0.010701 0.000000 \n", "9 0.000000 0.000000 0.008353 ... 0.000000 0.000000 0.019186 \n", "10 0.000000 0.005539 0.016659 ... 0.000000 0.000000 0.019132 \n", "11 0.000000 0.000000 0.007821 ... 0.000000 0.000000 0.026948 \n", "12 0.000000 0.005323 0.000000 ... 0.000000 0.000000 0.000000 \n", "13 0.000000 0.005074 0.000000 ... 0.041210 0.000000 0.000000 \n", "14 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.042216 \n", "15 0.000000 0.005802 0.008725 ... 0.011782 0.000000 0.010021 \n", "16 0.000000 0.005749 0.000000 ... 0.000000 0.000000 0.049644 \n", "17 0.019773 0.011731 0.000000 ... 0.000000 0.000000 0.000000 \n", "18 0.009529 0.011307 0.017003 ... 0.000000 0.000000 0.000000 \n", "19 0.000000 0.005654 0.000000 ... 0.000000 0.000000 0.000000 \n", "20 0.000000 0.010620 0.000000 ... 0.000000 0.000000 0.000000 \n", "21 0.000000 0.010883 0.000000 ... 0.000000 0.000000 0.000000 \n", "22 0.008774 0.026028 0.000000 ... 0.000000 0.000000 0.000000 \n", "23 0.000000 0.036987 0.000000 ... 0.000000 0.000000 0.000000 \n", "24 0.000000 0.011079 0.016661 ... 0.011248 0.011248 0.000000 \n", "25 0.000000 0.000000 0.007882 ... 0.000000 0.000000 0.000000 \n", "26 0.000000 0.010725 0.016128 ... 0.010889 0.000000 0.000000 \n", "27 0.008010 0.033264 0.007146 ... 0.000000 0.000000 0.000000 \n", "28 0.000000 0.017885 0.000000 ... 0.000000 0.000000 0.000000 \n", "29 0.008547 0.005071 0.007626 ... 0.000000 0.000000 0.000000 \n", "30 0.009866 0.005853 0.000000 ... 0.000000 0.000000 0.010109 \n", "31 0.018444 0.016414 0.032910 ... 0.011109 0.044438 0.000000 \n", "32 0.000000 0.010645 0.000000 ... 0.010807 0.000000 0.009192 \n", "33 0.010317 0.012242 0.000000 ... 0.000000 0.000000 0.000000 \n", "34 0.000000 0.013071 0.000000 ... 0.000000 0.013270 0.000000 \n", "35 0.016857 0.000000 0.007520 ... 0.010154 0.000000 0.000000 \n", "36 0.000000 0.017614 0.017659 ... 0.000000 0.035766 0.000000 \n", "37 0.008683 0.015455 0.030987 ... 0.010460 0.000000 0.000000 \n", "38 0.019548 0.000000 0.008720 ... 0.000000 0.000000 0.030045 \n", "39 0.000000 0.024514 0.014745 ... 0.000000 0.009955 0.000000 \n", "40 0.000000 0.011743 0.000000 ... 0.000000 0.000000 0.000000 \n", "41 0.009623 0.028545 0.000000 ... 0.000000 0.000000 0.000000 \n", "42 0.009964 0.011823 0.017779 ... 0.000000 0.000000 0.020418 \n", "43 0.008036 0.009535 0.000000 ... 0.000000 0.000000 0.000000 \n", "44 0.000000 0.000000 0.000000 ... 0.000000 0.011556 0.000000 \n", "45 0.009884 0.017592 0.000000 ... 0.000000 0.011907 0.020255 \n", "46 0.000000 0.017669 0.000000 ... 0.000000 0.000000 0.010171 \n", "47 0.010698 0.012694 0.009544 ... 0.000000 0.000000 0.021923 \n", "48 0.000000 0.005831 0.035076 ... 0.000000 0.035522 0.010071 \n", "49 0.009925 0.011777 0.008855 ... 0.000000 0.000000 0.000000 \n", "50 0.000000 0.006497 0.000000 ... 0.000000 0.013193 0.033662 \n", "51 0.009381 0.011131 0.016739 ... 0.011301 0.033903 0.009612 \n", "52 0.000000 0.018158 0.000000 ... 0.000000 0.000000 0.000000 \n", "53 0.000000 0.005373 0.008079 ... 0.000000 0.000000 0.000000 \n", "54 0.000000 0.000000 0.018376 ... 0.000000 0.000000 0.000000 \n", "\n", " years yes yet york you young your \n", "0 0.000000 0.000000 0.008088 0.000000 0.016540 0.000000 0.007161 \n", "1 0.024528 0.000000 0.015829 0.009042 0.043160 0.007592 0.028028 \n", "2 0.031143 0.000000 0.000000 0.027553 0.021920 0.015424 0.007117 \n", "3 0.006680 0.000000 0.000000 0.019701 0.000000 0.016542 0.007634 \n", "4 0.000000 0.000000 0.008451 0.000000 0.034563 0.000000 0.014963 \n", "5 0.005401 0.000000 0.006971 0.000000 0.028511 0.000000 0.000000 \n", "6 0.000000 0.010704 0.000000 0.018382 0.000000 0.015434 0.000000 \n", "7 0.006441 0.000000 0.000000 0.000000 0.017000 0.007974 0.014719 \n", "8 0.005883 0.000000 0.015186 0.052047 0.005176 0.014567 0.000000 \n", "9 0.024800 0.000000 0.008003 0.018285 0.021820 0.007677 0.000000 \n", "10 0.012365 0.000000 0.007980 0.000000 0.000000 0.007655 0.000000 \n", "11 0.023223 0.000000 0.007493 0.042804 0.000000 0.014376 0.000000 \n", "12 0.035652 0.000000 0.000000 0.017524 0.005228 0.007357 0.006790 \n", "13 0.005663 0.000000 0.000000 0.025054 0.000000 0.007012 0.000000 \n", "14 0.000000 0.000000 0.000000 0.030175 0.012003 0.008446 0.000000 \n", "15 0.012953 0.000000 0.008360 0.009550 0.000000 0.000000 0.000000 \n", "16 0.012834 0.000000 0.000000 0.009463 0.000000 0.000000 0.000000 \n", "17 0.000000 0.000000 0.033802 0.000000 0.092167 0.008106 0.000000 \n", "18 0.000000 0.000000 0.024435 0.000000 0.049969 0.000000 0.014422 \n", "19 0.006311 0.000000 0.008145 0.018611 0.122152 0.023440 0.014423 \n", "20 0.047418 0.000000 0.007650 0.000000 0.052150 0.014677 0.006773 \n", "21 0.000000 0.000000 0.015679 0.008956 0.032063 0.007520 0.000000 \n", "22 0.000000 0.000000 0.015000 0.008568 0.056236 0.000000 0.013280 \n", "23 0.000000 0.000000 0.008881 0.000000 0.066595 0.008519 0.015726 \n", "24 0.012367 0.042476 0.000000 0.000000 0.070725 0.000000 0.028264 \n", "25 0.000000 0.000000 0.015104 0.000000 0.030887 0.007244 0.013372 \n", "26 0.005986 0.000000 0.000000 0.000000 0.068463 0.000000 0.013680 \n", "27 0.000000 0.000000 0.000000 0.000000 0.018668 0.026270 0.000000 \n", "28 0.006655 0.034284 0.017178 0.000000 0.005855 0.000000 0.007604 \n", "29 0.000000 0.000000 0.000000 0.000000 0.034861 0.007008 0.000000 \n", "30 0.006533 0.000000 0.042164 0.000000 0.045987 0.024268 0.007466 \n", "31 0.006107 0.010488 0.000000 0.000000 0.059105 0.015123 0.027914 \n", "32 0.005941 0.010203 0.007668 0.026281 0.005227 0.000000 0.000000 \n", "33 0.020497 0.023467 0.000000 0.000000 0.126240 0.000000 0.031230 \n", "34 0.007295 0.000000 0.000000 0.010757 0.038510 0.000000 0.000000 \n", "35 0.000000 0.000000 0.007204 0.000000 0.063843 0.000000 0.000000 \n", "36 0.006554 0.000000 0.016918 0.000000 0.069194 0.000000 0.014978 \n", "37 0.000000 0.039500 0.000000 0.000000 0.045534 0.000000 0.000000 \n", "38 0.012946 0.000000 0.000000 0.009545 0.079731 0.000000 0.036983 \n", "39 0.038308 0.009398 0.000000 0.000000 0.038519 0.013551 0.006254 \n", "40 0.006554 0.011255 0.000000 0.000000 0.121093 0.000000 0.029957 \n", "41 0.038235 0.021887 0.016450 0.000000 0.000000 0.007890 0.000000 \n", "42 0.006598 0.011331 0.000000 0.000000 0.052249 0.000000 0.007540 \n", "43 0.005321 0.000000 0.000000 0.000000 0.023410 0.006589 0.000000 \n", "44 0.031763 0.000000 0.000000 0.000000 0.167676 0.023596 0.014518 \n", "45 0.026182 0.000000 0.016897 0.009652 0.005759 0.008104 0.000000 \n", "46 0.013148 0.000000 0.000000 0.000000 0.028920 0.000000 0.007512 \n", "47 0.007085 0.012167 0.000000 0.020893 0.068566 0.000000 0.016191 \n", "48 0.006509 0.000000 0.016802 0.000000 0.005727 0.024177 0.000000 \n", "49 0.006573 0.000000 0.000000 0.000000 0.156141 0.000000 0.007511 \n", "50 0.014504 0.012454 0.009361 0.000000 0.165899 0.000000 0.066297 \n", "51 0.055911 0.010669 0.000000 0.000000 0.109317 0.000000 0.021297 \n", "52 0.000000 0.023205 0.000000 0.000000 0.118885 0.000000 0.038601 \n", "53 0.011994 0.000000 0.007740 0.017686 0.010553 0.000000 0.006853 \n", "54 0.020460 0.000000 0.000000 0.000000 0.270016 0.008444 0.132484 \n", "\n", "[55 rows x 739 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_extraction.text import TfidfTransformer\n", "tf = TfidfTransformer(norm = 'l2', use_idf=True)\n", "tf_matrix = tf.fit_transform(cv_matrix)\n", "\n", "tfidf_unigram = pd.DataFrame(tf_matrix.toarray(), columns = cv.get_feature_names())\n", "tfidf_unigram" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ableable toaboutabout itabout theaboveacrossacross theactadded...yetyorkyouyou areyou canyou couldyou haveyou knowyoungyour
00.0092170.0000000.0241120.0000000.0231460.0000000.0000000.0000000.0000000.000000...0.0078780.0000000.0161110.0000000.0000000.0000000.0000000.0000000.0000000.006975
10.0000000.0000000.0141430.0000000.0000000.0000000.0082170.0000000.0217100.000000...0.0154040.0087990.0420010.0111920.0000000.0000000.0102480.0000000.0073880.027275
20.0091640.0000000.0143840.0000000.0000000.0000000.0000000.0000000.0110400.000000...0.0000000.0268470.0213580.0113830.0000000.0000000.0104220.0000000.0150280.006935
30.0000000.0000000.0103310.0000000.0082640.0000000.0000000.0000000.0475770.000000...0.0000000.0192830.0000000.0000000.0000000.0000000.0000000.0000000.0161910.007472
40.0096640.0113050.0202240.0116420.0080890.0096640.0000000.0000000.0000000.000000...0.0082600.0000000.0337840.0000000.0109900.0120030.0109900.0000000.0000000.014626
50.0000000.0000000.0458610.0096000.0200100.0000000.0000000.0000000.0000000.000000...0.0068110.0000000.0278580.0000000.0090630.0000000.0090630.0000000.0000000.000000
60.0092530.0108250.0048410.0000000.0000000.0092530.0000000.0000000.0111470.011867...0.0000000.0180720.0000000.0000000.0000000.0000000.0000000.0000000.0151740.000000
70.0094440.0110480.0494100.0000000.0158100.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0165080.0000000.0000000.0000000.0107400.0000000.0077430.014293
80.0086530.0000000.0045270.0000000.0000000.0086530.0000000.0000000.0000000.011097...0.0147920.0506960.0050410.0000000.0000000.0000000.0000000.0000000.0141890.000000
90.0000000.0000000.0047900.0000000.0076630.0000000.0083490.0089400.0000000.000000...0.0078260.0178810.0213380.0000000.0104120.0000000.0000000.0000000.0075070.000000
100.0091460.0000000.0287100.0000000.0229660.0091460.0000000.0000000.0000000.011729...0.0078170.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0074990.000000
110.0000000.0000000.0134550.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0073270.0418550.0000000.0000000.0000000.0000000.0000000.0000000.0140570.000000
120.0176070.0205980.0184240.0000000.0147380.0000000.0000000.0000000.0318170.000000...0.0000000.0171940.0051300.0000000.0000000.0000000.0000000.0000000.0072180.006662
130.0000000.0000000.0131750.0000000.0000000.0083940.0000000.0000000.0101120.000000...0.0000000.0245910.0000000.0000000.0000000.0000000.0000000.0000000.0068830.000000
140.0000000.0000000.0264810.0000000.0084730.0101230.0092320.0098850.0121950.000000...0.0000000.0296550.0117960.0000000.0115120.0000000.0000000.0000000.0083000.000000
150.0095610.0000000.0450230.0000000.0080030.0095610.0000000.0000000.0000000.000000...0.0081730.0093370.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
160.0000000.0000000.0199910.0000000.0000000.0095520.0000000.0000000.0115080.000000...0.0000000.0093280.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
170.0094870.0000000.0545990.0228580.0079410.0000000.0173040.0185280.0000000.000000...0.0324360.0000000.0884420.0000000.0000000.0117840.0000000.0000000.0077790.000000
180.0000000.0000000.0435510.0000000.0154840.0184980.0000000.0000000.0000000.000000...0.0237170.0000000.0485000.0114880.0105190.0000000.0105190.0000000.0000000.013998
190.0000000.0000000.0483360.0000000.0154660.0092390.0168510.0180430.0000000.000000...0.0078970.0180430.1184240.0000000.0000000.0229510.0210140.0105070.0227250.013983
200.0000000.0000000.0454880.0418960.0000000.0000000.0237870.0254700.0000000.000000...0.0074320.0000000.0506580.0000000.0000000.0000000.0000000.0098880.0142570.006579
210.0087790.0102700.0275580.0105760.0000000.0175570.0080060.0000000.0317270.000000...0.0150080.0085730.0306900.0000000.0000000.0000000.0000000.0000000.0071980.000000
220.0000000.0000000.0044980.0000000.0000000.0000000.0235190.0251830.0000000.000000...0.0146960.0083940.0550960.0000000.0000000.0000000.0000000.0000000.0000000.013011
230.0000000.0000000.0262970.0121100.0000000.0100520.0000000.0000000.0000000.000000...0.0085920.0000000.0644280.0000000.0228650.0000000.0000000.0114320.0082420.015214
240.0000000.0000000.0189210.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0684810.0112300.0102820.0112300.0000000.0000000.0000000.027367
250.0000000.0000000.0406050.0207770.0216540.0086230.0078640.0084210.0000000.000000...0.0147420.0000000.0301460.0000000.0000000.0214220.0098070.0000000.0070700.013051
260.0000000.0000000.0046200.0000000.0000000.0176600.0000000.0000000.0000000.000000...0.0000000.0000000.0668840.0109680.0000000.0000000.0000000.0100420.0000000.013364
270.0000000.0000000.0122310.0000000.0000000.0155850.0284270.0000000.0000000.000000...0.0000000.0000000.0181620.0000000.0000000.0000000.0000000.0088630.0255580.000000
280.0000000.0000000.0255420.0000000.0081730.0000000.0178090.0190690.0000000.000000...0.0166910.0000000.0056890.0000000.0000000.0000000.0000000.0000000.0000000.007389
290.0082380.0096370.0215490.0000000.0000000.0000000.0150250.0160880.0099240.000000...0.0000000.0000000.0335980.0000000.0000000.0000000.0000000.0000000.0067540.000000
300.0095820.0112100.0200540.0000000.0000000.0000000.0000000.0000000.0000000.012289...0.0409530.0000000.0446660.0000000.0108980.0000000.0000000.0000000.0235710.007251
310.0000000.0000000.0281170.0000000.0149950.0000000.0081680.0087460.0000000.000000...0.0000000.0000000.0574070.0000000.0000000.0000000.0000000.0101860.0146880.027112
320.0000000.0000000.0000000.0000000.0000000.0000000.0160310.0085830.0000000.000000...0.0075130.0257490.0051210.0000000.0000000.0000000.0000000.0000000.0000000.000000
330.0100920.0118070.0211220.0121580.0084480.0000000.0092040.0098550.0000000.000000...0.0000000.0000000.1234900.0125360.0229560.0000000.0000000.0000000.0000000.030550
340.0000000.0000000.0888050.0127800.0177600.0106080.0096750.0103590.0127800.013605...0.0000000.0103590.0370870.0000000.0000000.0131770.0000000.0120650.0000000.000000
350.0162260.0189820.0509350.0293200.0067910.0000000.0000000.0000000.0000000.010405...0.0069350.0000000.0614510.0100770.0184530.0201540.0092270.0000000.0000000.000000
360.0000000.0000000.0351720.0000000.0160770.0000000.0000000.0000000.0115690.000000...0.0164180.0000000.0671480.0000000.0000000.0000000.0109220.0218440.0000000.014535
370.0000000.0000000.0000000.0000000.0000000.0000000.0153800.0164680.0000000.000000...0.0000000.0000000.0442180.0000000.0000000.0000000.0000000.0000000.0000000.000000
380.0000000.0000000.0049550.0000000.0079270.0000000.0000000.0000000.0000000.000000...0.0000000.0092470.0772470.0000000.0000000.0000000.0107700.0000000.0000000.035831
390.0000000.0000000.0084190.0096930.0000000.0080460.0073380.0078570.0000000.010319...0.0000000.0000000.0375040.0000000.0000000.0099940.0000000.0091500.0131940.006089
400.0000000.0000000.0597230.0000000.0238870.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.1163930.0118150.0000000.0000000.0000000.0108180.0000000.028794
410.0000000.0000000.0048260.0000000.0000000.0184490.0084130.0090080.0000000.000000...0.0157700.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0075640.000000
420.0000000.0000000.0595960.0000000.0158910.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0497770.0000000.0107950.0117900.0000000.0107950.0000000.007183
430.0000000.0000000.0082400.0000000.0065910.0000000.0143630.0153790.0000000.000000...0.0000000.0000000.0229410.0000000.0000000.0000000.0000000.0000000.0064570.000000
440.0000000.0000000.0435500.0111420.0077420.0000000.0084350.0090310.0000000.011862...0.0000000.0000000.1616650.0000000.0000000.0114880.0000000.0105180.0227500.013998
450.0000000.0000000.0201630.0000000.0000000.0000000.0087860.0094080.0116060.000000...0.0164700.0094080.0056140.0000000.0000000.0000000.0000000.0109570.0079000.000000
460.0000000.0000000.0101440.0000000.0081150.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0282430.0000000.0000000.0000000.0000000.0000000.0000000.007336
470.0103140.0120660.0323780.0000000.0000000.0000000.0000000.0000000.0000000.013228...0.0000000.0201440.0661050.0128110.0117300.0000000.0117300.0000000.0000000.015610
480.0000000.0000000.0151330.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0164820.0000000.0056180.0000000.0000000.0000000.0000000.0000000.0237160.000000
490.0191750.0224320.0300970.0115500.0000000.0095870.0000000.0000000.0000000.000000...0.0000000.0000000.1508270.0000000.0109040.0119090.0000000.0109040.0000000.007255
500.0000000.0000000.0055820.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0091190.0000000.1616140.0132510.0121330.0000000.0000000.0000000.0000000.064585
510.0091370.0106890.0334630.0000000.0229450.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.1064760.0340480.0000000.0000000.0207830.0000000.0000000.020743
520.0097850.0114470.0255970.0000000.0245710.0000000.0000000.0000000.0000000.012549...0.0000000.0000000.1140220.0000000.0000000.0121540.0000000.0111280.0000000.037023
530.0000000.0000000.0185480.0000000.0000000.0000000.0161660.0173100.0000000.000000...0.0075760.0173100.0103280.0000000.0000000.0000000.0100800.0000000.0000000.006707
540.0101300.0118510.0106000.0000000.0000000.0202600.0092380.0098920.0000000.012992...0.0000000.0000000.2656030.0125820.0230410.0000000.0115210.0000000.0083060.130319
\n", "

55 rows × 1193 columns

\n", "
" ], "text/plain": [ " able able to about about it about the above across \\\n", "0 0.009217 0.000000 0.024112 0.000000 0.023146 0.000000 0.000000 \n", "1 0.000000 0.000000 0.014143 0.000000 0.000000 0.000000 0.008217 \n", "2 0.009164 0.000000 0.014384 0.000000 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 0.010331 0.000000 0.008264 0.000000 0.000000 \n", "4 0.009664 0.011305 0.020224 0.011642 0.008089 0.009664 0.000000 \n", "5 0.000000 0.000000 0.045861 0.009600 0.020010 0.000000 0.000000 \n", "6 0.009253 0.010825 0.004841 0.000000 0.000000 0.009253 0.000000 \n", "7 0.009444 0.011048 0.049410 0.000000 0.015810 0.000000 0.000000 \n", "8 0.008653 0.000000 0.004527 0.000000 0.000000 0.008653 0.000000 \n", "9 0.000000 0.000000 0.004790 0.000000 0.007663 0.000000 0.008349 \n", "10 0.009146 0.000000 0.028710 0.000000 0.022966 0.009146 0.000000 \n", "11 0.000000 0.000000 0.013455 0.000000 0.000000 0.000000 0.000000 \n", "12 0.017607 0.020598 0.018424 0.000000 0.014738 0.000000 0.000000 \n", "13 0.000000 0.000000 0.013175 0.000000 0.000000 0.008394 0.000000 \n", "14 0.000000 0.000000 0.026481 0.000000 0.008473 0.010123 0.009232 \n", "15 0.009561 0.000000 0.045023 0.000000 0.008003 0.009561 0.000000 \n", "16 0.000000 0.000000 0.019991 0.000000 0.000000 0.009552 0.000000 \n", "17 0.009487 0.000000 0.054599 0.022858 0.007941 0.000000 0.017304 \n", "18 0.000000 0.000000 0.043551 0.000000 0.015484 0.018498 0.000000 \n", "19 0.000000 0.000000 0.048336 0.000000 0.015466 0.009239 0.016851 \n", "20 0.000000 0.000000 0.045488 0.041896 0.000000 0.000000 0.023787 \n", "21 0.008779 0.010270 0.027558 0.010576 0.000000 0.017557 0.008006 \n", "22 0.000000 0.000000 0.004498 0.000000 0.000000 0.000000 0.023519 \n", "23 0.000000 0.000000 0.026297 0.012110 0.000000 0.010052 0.000000 \n", "24 0.000000 0.000000 0.018921 0.000000 0.000000 0.000000 0.000000 \n", "25 0.000000 0.000000 0.040605 0.020777 0.021654 0.008623 0.007864 \n", "26 0.000000 0.000000 0.004620 0.000000 0.000000 0.017660 0.000000 \n", "27 0.000000 0.000000 0.012231 0.000000 0.000000 0.015585 0.028427 \n", "28 0.000000 0.000000 0.025542 0.000000 0.008173 0.000000 0.017809 \n", "29 0.008238 0.009637 0.021549 0.000000 0.000000 0.000000 0.015025 \n", "30 0.009582 0.011210 0.020054 0.000000 0.000000 0.000000 0.000000 \n", "31 0.000000 0.000000 0.028117 0.000000 0.014995 0.000000 0.008168 \n", "32 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.016031 \n", "33 0.010092 0.011807 0.021122 0.012158 0.008448 0.000000 0.009204 \n", "34 0.000000 0.000000 0.088805 0.012780 0.017760 0.010608 0.009675 \n", "35 0.016226 0.018982 0.050935 0.029320 0.006791 0.000000 0.000000 \n", "36 0.000000 0.000000 0.035172 0.000000 0.016077 0.000000 0.000000 \n", "37 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.015380 \n", "38 0.000000 0.000000 0.004955 0.000000 0.007927 0.000000 0.000000 \n", "39 0.000000 0.000000 0.008419 0.009693 0.000000 0.008046 0.007338 \n", "40 0.000000 0.000000 0.059723 0.000000 0.023887 0.000000 0.000000 \n", "41 0.000000 0.000000 0.004826 0.000000 0.000000 0.018449 0.008413 \n", "42 0.000000 0.000000 0.059596 0.000000 0.015891 0.000000 0.000000 \n", "43 0.000000 0.000000 0.008240 0.000000 0.006591 0.000000 0.014363 \n", "44 0.000000 0.000000 0.043550 0.011142 0.007742 0.000000 0.008435 \n", "45 0.000000 0.000000 0.020163 0.000000 0.000000 0.000000 0.008786 \n", "46 0.000000 0.000000 0.010144 0.000000 0.008115 0.000000 0.000000 \n", "47 0.010314 0.012066 0.032378 0.000000 0.000000 0.000000 0.000000 \n", "48 0.000000 0.000000 0.015133 0.000000 0.000000 0.000000 0.000000 \n", "49 0.019175 0.022432 0.030097 0.011550 0.000000 0.009587 0.000000 \n", "50 0.000000 0.000000 0.005582 0.000000 0.000000 0.000000 0.000000 \n", "51 0.009137 0.010689 0.033463 0.000000 0.022945 0.000000 0.000000 \n", "52 0.009785 0.011447 0.025597 0.000000 0.024571 0.000000 0.000000 \n", "53 0.000000 0.000000 0.018548 0.000000 0.000000 0.000000 0.016166 \n", "54 0.010130 0.011851 0.010600 0.000000 0.000000 0.020260 0.009238 \n", "\n", " across the act added ... yet york you \\\n", "0 0.000000 0.000000 0.000000 ... 0.007878 0.000000 0.016111 \n", "1 0.000000 0.021710 0.000000 ... 0.015404 0.008799 0.042001 \n", "2 0.000000 0.011040 0.000000 ... 0.000000 0.026847 0.021358 \n", "3 0.000000 0.047577 0.000000 ... 0.000000 0.019283 0.000000 \n", "4 0.000000 0.000000 0.000000 ... 0.008260 0.000000 0.033784 \n", "5 0.000000 0.000000 0.000000 ... 0.006811 0.000000 0.027858 \n", "6 0.000000 0.011147 0.011867 ... 0.000000 0.018072 0.000000 \n", "7 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.016508 \n", "8 0.000000 0.000000 0.011097 ... 0.014792 0.050696 0.005041 \n", "9 0.008940 0.000000 0.000000 ... 0.007826 0.017881 0.021338 \n", "10 0.000000 0.000000 0.011729 ... 0.007817 0.000000 0.000000 \n", "11 0.000000 0.000000 0.000000 ... 0.007327 0.041855 0.000000 \n", "12 0.000000 0.031817 0.000000 ... 0.000000 0.017194 0.005130 \n", "13 0.000000 0.010112 0.000000 ... 0.000000 0.024591 0.000000 \n", "14 0.009885 0.012195 0.000000 ... 0.000000 0.029655 0.011796 \n", "15 0.000000 0.000000 0.000000 ... 0.008173 0.009337 0.000000 \n", "16 0.000000 0.011508 0.000000 ... 0.000000 0.009328 0.000000 \n", "17 0.018528 0.000000 0.000000 ... 0.032436 0.000000 0.088442 \n", "18 0.000000 0.000000 0.000000 ... 0.023717 0.000000 0.048500 \n", "19 0.018043 0.000000 0.000000 ... 0.007897 0.018043 0.118424 \n", "20 0.025470 0.000000 0.000000 ... 0.007432 0.000000 0.050658 \n", "21 0.000000 0.031727 0.000000 ... 0.015008 0.008573 0.030690 \n", "22 0.025183 0.000000 0.000000 ... 0.014696 0.008394 0.055096 \n", "23 0.000000 0.000000 0.000000 ... 0.008592 0.000000 0.064428 \n", "24 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.068481 \n", "25 0.008421 0.000000 0.000000 ... 0.014742 0.000000 0.030146 \n", "26 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.066884 \n", "27 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.018162 \n", "28 0.019069 0.000000 0.000000 ... 0.016691 0.000000 0.005689 \n", "29 0.016088 0.009924 0.000000 ... 0.000000 0.000000 0.033598 \n", "30 0.000000 0.000000 0.012289 ... 0.040953 0.000000 0.044666 \n", "31 0.008746 0.000000 0.000000 ... 0.000000 0.000000 0.057407 \n", "32 0.008583 0.000000 0.000000 ... 0.007513 0.025749 0.005121 \n", "33 0.009855 0.000000 0.000000 ... 0.000000 0.000000 0.123490 \n", "34 0.010359 0.012780 0.013605 ... 0.000000 0.010359 0.037087 \n", "35 0.000000 0.000000 0.010405 ... 0.006935 0.000000 0.061451 \n", "36 0.000000 0.011569 0.000000 ... 0.016418 0.000000 0.067148 \n", "37 0.016468 0.000000 0.000000 ... 0.000000 0.000000 0.044218 \n", "38 0.000000 0.000000 0.000000 ... 0.000000 0.009247 0.077247 \n", "39 0.007857 0.000000 0.010319 ... 0.000000 0.000000 0.037504 \n", "40 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.116393 \n", "41 0.009008 0.000000 0.000000 ... 0.015770 0.000000 0.000000 \n", "42 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.049777 \n", "43 0.015379 0.000000 0.000000 ... 0.000000 0.000000 0.022941 \n", "44 0.009031 0.000000 0.011862 ... 0.000000 0.000000 0.161665 \n", "45 0.009408 0.011606 0.000000 ... 0.016470 0.009408 0.005614 \n", "46 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.028243 \n", "47 0.000000 0.000000 0.013228 ... 0.000000 0.020144 0.066105 \n", "48 0.000000 0.000000 0.000000 ... 0.016482 0.000000 0.005618 \n", "49 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.150827 \n", "50 0.000000 0.000000 0.000000 ... 0.009119 0.000000 0.161614 \n", "51 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.106476 \n", "52 0.000000 0.000000 0.012549 ... 0.000000 0.000000 0.114022 \n", "53 0.017310 0.000000 0.000000 ... 0.007576 0.017310 0.010328 \n", "54 0.009892 0.000000 0.012992 ... 0.000000 0.000000 0.265603 \n", "\n", " you are you can you could you have you know young your \n", "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.006975 \n", "1 0.011192 0.000000 0.000000 0.010248 0.000000 0.007388 0.027275 \n", "2 0.011383 0.000000 0.000000 0.010422 0.000000 0.015028 0.006935 \n", "3 0.000000 0.000000 0.000000 0.000000 0.000000 0.016191 0.007472 \n", "4 0.000000 0.010990 0.012003 0.010990 0.000000 0.000000 0.014626 \n", "5 0.000000 0.009063 0.000000 0.009063 0.000000 0.000000 0.000000 \n", "6 0.000000 0.000000 0.000000 0.000000 0.000000 0.015174 0.000000 \n", "7 0.000000 0.000000 0.000000 0.010740 0.000000 0.007743 0.014293 \n", "8 0.000000 0.000000 0.000000 0.000000 0.000000 0.014189 0.000000 \n", "9 0.000000 0.010412 0.000000 0.000000 0.000000 0.007507 0.000000 \n", "10 0.000000 0.000000 0.000000 0.000000 0.000000 0.007499 0.000000 \n", "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.014057 0.000000 \n", "12 0.000000 0.000000 0.000000 0.000000 0.000000 0.007218 0.006662 \n", "13 0.000000 0.000000 0.000000 0.000000 0.000000 0.006883 0.000000 \n", "14 0.000000 0.011512 0.000000 0.000000 0.000000 0.008300 0.000000 \n", "15 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "16 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "17 0.000000 0.000000 0.011784 0.000000 0.000000 0.007779 0.000000 \n", "18 0.011488 0.010519 0.000000 0.010519 0.000000 0.000000 0.013998 \n", "19 0.000000 0.000000 0.022951 0.021014 0.010507 0.022725 0.013983 \n", "20 0.000000 0.000000 0.000000 0.000000 0.009888 0.014257 0.006579 \n", "21 0.000000 0.000000 0.000000 0.000000 0.000000 0.007198 0.000000 \n", "22 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.013011 \n", "23 0.000000 0.022865 0.000000 0.000000 0.011432 0.008242 0.015214 \n", "24 0.011230 0.010282 0.011230 0.000000 0.000000 0.000000 0.027367 \n", "25 0.000000 0.000000 0.021422 0.009807 0.000000 0.007070 0.013051 \n", "26 0.010968 0.000000 0.000000 0.000000 0.010042 0.000000 0.013364 \n", "27 0.000000 0.000000 0.000000 0.000000 0.008863 0.025558 0.000000 \n", "28 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.007389 \n", "29 0.000000 0.000000 0.000000 0.000000 0.000000 0.006754 0.000000 \n", "30 0.000000 0.010898 0.000000 0.000000 0.000000 0.023571 0.007251 \n", "31 0.000000 0.000000 0.000000 0.000000 0.010186 0.014688 0.027112 \n", "32 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "33 0.012536 0.022956 0.000000 0.000000 0.000000 0.000000 0.030550 \n", "34 0.000000 0.000000 0.013177 0.000000 0.012065 0.000000 0.000000 \n", "35 0.010077 0.018453 0.020154 0.009227 0.000000 0.000000 0.000000 \n", "36 0.000000 0.000000 0.000000 0.010922 0.021844 0.000000 0.014535 \n", "37 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "38 0.000000 0.000000 0.000000 0.010770 0.000000 0.000000 0.035831 \n", "39 0.000000 0.000000 0.009994 0.000000 0.009150 0.013194 0.006089 \n", "40 0.011815 0.000000 0.000000 0.000000 0.010818 0.000000 0.028794 \n", "41 0.000000 0.000000 0.000000 0.000000 0.000000 0.007564 0.000000 \n", "42 0.000000 0.010795 0.011790 0.000000 0.010795 0.000000 0.007183 \n", "43 0.000000 0.000000 0.000000 0.000000 0.000000 0.006457 0.000000 \n", "44 0.000000 0.000000 0.011488 0.000000 0.010518 0.022750 0.013998 \n", "45 0.000000 0.000000 0.000000 0.000000 0.010957 0.007900 0.000000 \n", "46 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.007336 \n", "47 0.012811 0.011730 0.000000 0.011730 0.000000 0.000000 0.015610 \n", "48 0.000000 0.000000 0.000000 0.000000 0.000000 0.023716 0.000000 \n", "49 0.000000 0.010904 0.011909 0.000000 0.010904 0.000000 0.007255 \n", "50 0.013251 0.012133 0.000000 0.000000 0.000000 0.000000 0.064585 \n", "51 0.034048 0.000000 0.000000 0.020783 0.000000 0.000000 0.020743 \n", "52 0.000000 0.000000 0.012154 0.000000 0.011128 0.000000 0.037023 \n", "53 0.000000 0.000000 0.000000 0.010080 0.000000 0.000000 0.006707 \n", "54 0.012582 0.023041 0.000000 0.011521 0.000000 0.008306 0.130319 \n", "\n", "[55 rows x 1193 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf_matrix2 = tf.fit_transform(cv_ngram_matrix)\n", "tfidf_ngram = pd.DataFrame(tf_matrix2.toarray(), columns = cv_ngram.get_feature_names())\n", "tfidf_ngram" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- We can also create TF-IDF model directly from corpus" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ableable toaboutabout itabout theaboveacrossacross theactadded...yetyorkyouyou areyou canyou couldyou haveyou knowyoungyour
00.0092170.0000000.0241120.0000000.0231460.0000000.0000000.0000000.0000000.000000...0.0078780.0000000.0161110.0000000.0000000.0000000.0000000.0000000.0000000.006975
10.0000000.0000000.0141430.0000000.0000000.0000000.0082170.0000000.0217100.000000...0.0154040.0087990.0420010.0111920.0000000.0000000.0102480.0000000.0073880.027275
20.0091640.0000000.0143840.0000000.0000000.0000000.0000000.0000000.0110400.000000...0.0000000.0268470.0213580.0113830.0000000.0000000.0104220.0000000.0150280.006935
30.0000000.0000000.0103310.0000000.0082640.0000000.0000000.0000000.0475770.000000...0.0000000.0192830.0000000.0000000.0000000.0000000.0000000.0000000.0161910.007472
40.0096640.0113050.0202240.0116420.0080890.0096640.0000000.0000000.0000000.000000...0.0082600.0000000.0337840.0000000.0109900.0120030.0109900.0000000.0000000.014626
50.0000000.0000000.0458610.0096000.0200100.0000000.0000000.0000000.0000000.000000...0.0068110.0000000.0278580.0000000.0090630.0000000.0090630.0000000.0000000.000000
60.0092530.0108250.0048410.0000000.0000000.0092530.0000000.0000000.0111470.011867...0.0000000.0180720.0000000.0000000.0000000.0000000.0000000.0000000.0151740.000000
70.0094440.0110480.0494100.0000000.0158100.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0165080.0000000.0000000.0000000.0107400.0000000.0077430.014293
80.0086530.0000000.0045270.0000000.0000000.0086530.0000000.0000000.0000000.011097...0.0147920.0506960.0050410.0000000.0000000.0000000.0000000.0000000.0141890.000000
90.0000000.0000000.0047900.0000000.0076630.0000000.0083490.0089400.0000000.000000...0.0078260.0178810.0213380.0000000.0104120.0000000.0000000.0000000.0075070.000000
100.0091460.0000000.0287100.0000000.0229660.0091460.0000000.0000000.0000000.011729...0.0078170.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0074990.000000
110.0000000.0000000.0134550.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0073270.0418550.0000000.0000000.0000000.0000000.0000000.0000000.0140570.000000
120.0176070.0205980.0184240.0000000.0147380.0000000.0000000.0000000.0318170.000000...0.0000000.0171940.0051300.0000000.0000000.0000000.0000000.0000000.0072180.006662
130.0000000.0000000.0131750.0000000.0000000.0083940.0000000.0000000.0101120.000000...0.0000000.0245910.0000000.0000000.0000000.0000000.0000000.0000000.0068830.000000
140.0000000.0000000.0264810.0000000.0084730.0101230.0092320.0098850.0121950.000000...0.0000000.0296550.0117960.0000000.0115120.0000000.0000000.0000000.0083000.000000
150.0095610.0000000.0450230.0000000.0080030.0095610.0000000.0000000.0000000.000000...0.0081730.0093370.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
160.0000000.0000000.0199910.0000000.0000000.0095520.0000000.0000000.0115080.000000...0.0000000.0093280.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
170.0094870.0000000.0545990.0228580.0079410.0000000.0173040.0185280.0000000.000000...0.0324360.0000000.0884420.0000000.0000000.0117840.0000000.0000000.0077790.000000
180.0000000.0000000.0435510.0000000.0154840.0184980.0000000.0000000.0000000.000000...0.0237170.0000000.0485000.0114880.0105190.0000000.0105190.0000000.0000000.013998
190.0000000.0000000.0483360.0000000.0154660.0092390.0168510.0180430.0000000.000000...0.0078970.0180430.1184240.0000000.0000000.0229510.0210140.0105070.0227250.013983
200.0000000.0000000.0454880.0418960.0000000.0000000.0237870.0254700.0000000.000000...0.0074320.0000000.0506580.0000000.0000000.0000000.0000000.0098880.0142570.006579
210.0087790.0102700.0275580.0105760.0000000.0175570.0080060.0000000.0317270.000000...0.0150080.0085730.0306900.0000000.0000000.0000000.0000000.0000000.0071980.000000
220.0000000.0000000.0044980.0000000.0000000.0000000.0235190.0251830.0000000.000000...0.0146960.0083940.0550960.0000000.0000000.0000000.0000000.0000000.0000000.013011
230.0000000.0000000.0262970.0121100.0000000.0100520.0000000.0000000.0000000.000000...0.0085920.0000000.0644280.0000000.0228650.0000000.0000000.0114320.0082420.015214
240.0000000.0000000.0189210.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0684810.0112300.0102820.0112300.0000000.0000000.0000000.027367
250.0000000.0000000.0406050.0207770.0216540.0086230.0078640.0084210.0000000.000000...0.0147420.0000000.0301460.0000000.0000000.0214220.0098070.0000000.0070700.013051
260.0000000.0000000.0046200.0000000.0000000.0176600.0000000.0000000.0000000.000000...0.0000000.0000000.0668840.0109680.0000000.0000000.0000000.0100420.0000000.013364
270.0000000.0000000.0122310.0000000.0000000.0155850.0284270.0000000.0000000.000000...0.0000000.0000000.0181620.0000000.0000000.0000000.0000000.0088630.0255580.000000
280.0000000.0000000.0255420.0000000.0081730.0000000.0178090.0190690.0000000.000000...0.0166910.0000000.0056890.0000000.0000000.0000000.0000000.0000000.0000000.007389
290.0082380.0096370.0215490.0000000.0000000.0000000.0150250.0160880.0099240.000000...0.0000000.0000000.0335980.0000000.0000000.0000000.0000000.0000000.0067540.000000
300.0095820.0112100.0200540.0000000.0000000.0000000.0000000.0000000.0000000.012289...0.0409530.0000000.0446660.0000000.0108980.0000000.0000000.0000000.0235710.007251
310.0000000.0000000.0281170.0000000.0149950.0000000.0081680.0087460.0000000.000000...0.0000000.0000000.0574070.0000000.0000000.0000000.0000000.0101860.0146880.027112
320.0000000.0000000.0000000.0000000.0000000.0000000.0160310.0085830.0000000.000000...0.0075130.0257490.0051210.0000000.0000000.0000000.0000000.0000000.0000000.000000
330.0100920.0118070.0211220.0121580.0084480.0000000.0092040.0098550.0000000.000000...0.0000000.0000000.1234900.0125360.0229560.0000000.0000000.0000000.0000000.030550
340.0000000.0000000.0888050.0127800.0177600.0106080.0096750.0103590.0127800.013605...0.0000000.0103590.0370870.0000000.0000000.0131770.0000000.0120650.0000000.000000
350.0162260.0189820.0509350.0293200.0067910.0000000.0000000.0000000.0000000.010405...0.0069350.0000000.0614510.0100770.0184530.0201540.0092270.0000000.0000000.000000
360.0000000.0000000.0351720.0000000.0160770.0000000.0000000.0000000.0115690.000000...0.0164180.0000000.0671480.0000000.0000000.0000000.0109220.0218440.0000000.014535
370.0000000.0000000.0000000.0000000.0000000.0000000.0153800.0164680.0000000.000000...0.0000000.0000000.0442180.0000000.0000000.0000000.0000000.0000000.0000000.000000
380.0000000.0000000.0049550.0000000.0079270.0000000.0000000.0000000.0000000.000000...0.0000000.0092470.0772470.0000000.0000000.0000000.0107700.0000000.0000000.035831
390.0000000.0000000.0084190.0096930.0000000.0080460.0073380.0078570.0000000.010319...0.0000000.0000000.0375040.0000000.0000000.0099940.0000000.0091500.0131940.006089
400.0000000.0000000.0597230.0000000.0238870.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.1163930.0118150.0000000.0000000.0000000.0108180.0000000.028794
410.0000000.0000000.0048260.0000000.0000000.0184490.0084130.0090080.0000000.000000...0.0157700.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0075640.000000
420.0000000.0000000.0595960.0000000.0158910.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0497770.0000000.0107950.0117900.0000000.0107950.0000000.007183
430.0000000.0000000.0082400.0000000.0065910.0000000.0143630.0153790.0000000.000000...0.0000000.0000000.0229410.0000000.0000000.0000000.0000000.0000000.0064570.000000
440.0000000.0000000.0435500.0111420.0077420.0000000.0084350.0090310.0000000.011862...0.0000000.0000000.1616650.0000000.0000000.0114880.0000000.0105180.0227500.013998
450.0000000.0000000.0201630.0000000.0000000.0000000.0087860.0094080.0116060.000000...0.0164700.0094080.0056140.0000000.0000000.0000000.0000000.0109570.0079000.000000
460.0000000.0000000.0101440.0000000.0081150.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0282430.0000000.0000000.0000000.0000000.0000000.0000000.007336
470.0103140.0120660.0323780.0000000.0000000.0000000.0000000.0000000.0000000.013228...0.0000000.0201440.0661050.0128110.0117300.0000000.0117300.0000000.0000000.015610
480.0000000.0000000.0151330.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0164820.0000000.0056180.0000000.0000000.0000000.0000000.0000000.0237160.000000
490.0191750.0224320.0300970.0115500.0000000.0095870.0000000.0000000.0000000.000000...0.0000000.0000000.1508270.0000000.0109040.0119090.0000000.0109040.0000000.007255
500.0000000.0000000.0055820.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0091190.0000000.1616140.0132510.0121330.0000000.0000000.0000000.0000000.064585
510.0091370.0106890.0334630.0000000.0229450.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.1064760.0340480.0000000.0000000.0207830.0000000.0000000.020743
520.0097850.0114470.0255970.0000000.0245710.0000000.0000000.0000000.0000000.012549...0.0000000.0000000.1140220.0000000.0000000.0121540.0000000.0111280.0000000.037023
530.0000000.0000000.0185480.0000000.0000000.0000000.0161660.0173100.0000000.000000...0.0075760.0173100.0103280.0000000.0000000.0000000.0100800.0000000.0000000.006707
540.0101300.0118510.0106000.0000000.0000000.0202600.0092380.0098920.0000000.012992...0.0000000.0000000.2656030.0125820.0230410.0000000.0115210.0000000.0083060.130319
\n", "

55 rows × 1193 columns

\n", "
" ], "text/plain": [ " able able to about about it about the above across \\\n", "0 0.009217 0.000000 0.024112 0.000000 0.023146 0.000000 0.000000 \n", "1 0.000000 0.000000 0.014143 0.000000 0.000000 0.000000 0.008217 \n", "2 0.009164 0.000000 0.014384 0.000000 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 0.010331 0.000000 0.008264 0.000000 0.000000 \n", "4 0.009664 0.011305 0.020224 0.011642 0.008089 0.009664 0.000000 \n", "5 0.000000 0.000000 0.045861 0.009600 0.020010 0.000000 0.000000 \n", "6 0.009253 0.010825 0.004841 0.000000 0.000000 0.009253 0.000000 \n", "7 0.009444 0.011048 0.049410 0.000000 0.015810 0.000000 0.000000 \n", "8 0.008653 0.000000 0.004527 0.000000 0.000000 0.008653 0.000000 \n", "9 0.000000 0.000000 0.004790 0.000000 0.007663 0.000000 0.008349 \n", "10 0.009146 0.000000 0.028710 0.000000 0.022966 0.009146 0.000000 \n", "11 0.000000 0.000000 0.013455 0.000000 0.000000 0.000000 0.000000 \n", "12 0.017607 0.020598 0.018424 0.000000 0.014738 0.000000 0.000000 \n", "13 0.000000 0.000000 0.013175 0.000000 0.000000 0.008394 0.000000 \n", "14 0.000000 0.000000 0.026481 0.000000 0.008473 0.010123 0.009232 \n", "15 0.009561 0.000000 0.045023 0.000000 0.008003 0.009561 0.000000 \n", "16 0.000000 0.000000 0.019991 0.000000 0.000000 0.009552 0.000000 \n", "17 0.009487 0.000000 0.054599 0.022858 0.007941 0.000000 0.017304 \n", "18 0.000000 0.000000 0.043551 0.000000 0.015484 0.018498 0.000000 \n", "19 0.000000 0.000000 0.048336 0.000000 0.015466 0.009239 0.016851 \n", "20 0.000000 0.000000 0.045488 0.041896 0.000000 0.000000 0.023787 \n", "21 0.008779 0.010270 0.027558 0.010576 0.000000 0.017557 0.008006 \n", "22 0.000000 0.000000 0.004498 0.000000 0.000000 0.000000 0.023519 \n", "23 0.000000 0.000000 0.026297 0.012110 0.000000 0.010052 0.000000 \n", "24 0.000000 0.000000 0.018921 0.000000 0.000000 0.000000 0.000000 \n", "25 0.000000 0.000000 0.040605 0.020777 0.021654 0.008623 0.007864 \n", "26 0.000000 0.000000 0.004620 0.000000 0.000000 0.017660 0.000000 \n", "27 0.000000 0.000000 0.012231 0.000000 0.000000 0.015585 0.028427 \n", "28 0.000000 0.000000 0.025542 0.000000 0.008173 0.000000 0.017809 \n", "29 0.008238 0.009637 0.021549 0.000000 0.000000 0.000000 0.015025 \n", "30 0.009582 0.011210 0.020054 0.000000 0.000000 0.000000 0.000000 \n", "31 0.000000 0.000000 0.028117 0.000000 0.014995 0.000000 0.008168 \n", "32 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.016031 \n", "33 0.010092 0.011807 0.021122 0.012158 0.008448 0.000000 0.009204 \n", "34 0.000000 0.000000 0.088805 0.012780 0.017760 0.010608 0.009675 \n", "35 0.016226 0.018982 0.050935 0.029320 0.006791 0.000000 0.000000 \n", "36 0.000000 0.000000 0.035172 0.000000 0.016077 0.000000 0.000000 \n", "37 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.015380 \n", "38 0.000000 0.000000 0.004955 0.000000 0.007927 0.000000 0.000000 \n", "39 0.000000 0.000000 0.008419 0.009693 0.000000 0.008046 0.007338 \n", "40 0.000000 0.000000 0.059723 0.000000 0.023887 0.000000 0.000000 \n", "41 0.000000 0.000000 0.004826 0.000000 0.000000 0.018449 0.008413 \n", "42 0.000000 0.000000 0.059596 0.000000 0.015891 0.000000 0.000000 \n", "43 0.000000 0.000000 0.008240 0.000000 0.006591 0.000000 0.014363 \n", "44 0.000000 0.000000 0.043550 0.011142 0.007742 0.000000 0.008435 \n", "45 0.000000 0.000000 0.020163 0.000000 0.000000 0.000000 0.008786 \n", "46 0.000000 0.000000 0.010144 0.000000 0.008115 0.000000 0.000000 \n", "47 0.010314 0.012066 0.032378 0.000000 0.000000 0.000000 0.000000 \n", "48 0.000000 0.000000 0.015133 0.000000 0.000000 0.000000 0.000000 \n", "49 0.019175 0.022432 0.030097 0.011550 0.000000 0.009587 0.000000 \n", "50 0.000000 0.000000 0.005582 0.000000 0.000000 0.000000 0.000000 \n", "51 0.009137 0.010689 0.033463 0.000000 0.022945 0.000000 0.000000 \n", "52 0.009785 0.011447 0.025597 0.000000 0.024571 0.000000 0.000000 \n", "53 0.000000 0.000000 0.018548 0.000000 0.000000 0.000000 0.016166 \n", "54 0.010130 0.011851 0.010600 0.000000 0.000000 0.020260 0.009238 \n", "\n", " across the act added ... yet york you \\\n", "0 0.000000 0.000000 0.000000 ... 0.007878 0.000000 0.016111 \n", "1 0.000000 0.021710 0.000000 ... 0.015404 0.008799 0.042001 \n", "2 0.000000 0.011040 0.000000 ... 0.000000 0.026847 0.021358 \n", "3 0.000000 0.047577 0.000000 ... 0.000000 0.019283 0.000000 \n", "4 0.000000 0.000000 0.000000 ... 0.008260 0.000000 0.033784 \n", "5 0.000000 0.000000 0.000000 ... 0.006811 0.000000 0.027858 \n", "6 0.000000 0.011147 0.011867 ... 0.000000 0.018072 0.000000 \n", "7 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.016508 \n", "8 0.000000 0.000000 0.011097 ... 0.014792 0.050696 0.005041 \n", "9 0.008940 0.000000 0.000000 ... 0.007826 0.017881 0.021338 \n", "10 0.000000 0.000000 0.011729 ... 0.007817 0.000000 0.000000 \n", "11 0.000000 0.000000 0.000000 ... 0.007327 0.041855 0.000000 \n", "12 0.000000 0.031817 0.000000 ... 0.000000 0.017194 0.005130 \n", "13 0.000000 0.010112 0.000000 ... 0.000000 0.024591 0.000000 \n", "14 0.009885 0.012195 0.000000 ... 0.000000 0.029655 0.011796 \n", "15 0.000000 0.000000 0.000000 ... 0.008173 0.009337 0.000000 \n", "16 0.000000 0.011508 0.000000 ... 0.000000 0.009328 0.000000 \n", "17 0.018528 0.000000 0.000000 ... 0.032436 0.000000 0.088442 \n", "18 0.000000 0.000000 0.000000 ... 0.023717 0.000000 0.048500 \n", "19 0.018043 0.000000 0.000000 ... 0.007897 0.018043 0.118424 \n", "20 0.025470 0.000000 0.000000 ... 0.007432 0.000000 0.050658 \n", "21 0.000000 0.031727 0.000000 ... 0.015008 0.008573 0.030690 \n", "22 0.025183 0.000000 0.000000 ... 0.014696 0.008394 0.055096 \n", "23 0.000000 0.000000 0.000000 ... 0.008592 0.000000 0.064428 \n", "24 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.068481 \n", "25 0.008421 0.000000 0.000000 ... 0.014742 0.000000 0.030146 \n", "26 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.066884 \n", "27 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.018162 \n", "28 0.019069 0.000000 0.000000 ... 0.016691 0.000000 0.005689 \n", "29 0.016088 0.009924 0.000000 ... 0.000000 0.000000 0.033598 \n", "30 0.000000 0.000000 0.012289 ... 0.040953 0.000000 0.044666 \n", "31 0.008746 0.000000 0.000000 ... 0.000000 0.000000 0.057407 \n", "32 0.008583 0.000000 0.000000 ... 0.007513 0.025749 0.005121 \n", "33 0.009855 0.000000 0.000000 ... 0.000000 0.000000 0.123490 \n", "34 0.010359 0.012780 0.013605 ... 0.000000 0.010359 0.037087 \n", "35 0.000000 0.000000 0.010405 ... 0.006935 0.000000 0.061451 \n", "36 0.000000 0.011569 0.000000 ... 0.016418 0.000000 0.067148 \n", "37 0.016468 0.000000 0.000000 ... 0.000000 0.000000 0.044218 \n", "38 0.000000 0.000000 0.000000 ... 0.000000 0.009247 0.077247 \n", "39 0.007857 0.000000 0.010319 ... 0.000000 0.000000 0.037504 \n", "40 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.116393 \n", "41 0.009008 0.000000 0.000000 ... 0.015770 0.000000 0.000000 \n", "42 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.049777 \n", "43 0.015379 0.000000 0.000000 ... 0.000000 0.000000 0.022941 \n", "44 0.009031 0.000000 0.011862 ... 0.000000 0.000000 0.161665 \n", "45 0.009408 0.011606 0.000000 ... 0.016470 0.009408 0.005614 \n", "46 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.028243 \n", "47 0.000000 0.000000 0.013228 ... 0.000000 0.020144 0.066105 \n", "48 0.000000 0.000000 0.000000 ... 0.016482 0.000000 0.005618 \n", "49 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.150827 \n", "50 0.000000 0.000000 0.000000 ... 0.009119 0.000000 0.161614 \n", "51 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.106476 \n", "52 0.000000 0.000000 0.012549 ... 0.000000 0.000000 0.114022 \n", "53 0.017310 0.000000 0.000000 ... 0.007576 0.017310 0.010328 \n", "54 0.009892 0.000000 0.012992 ... 0.000000 0.000000 0.265603 \n", "\n", " you are you can you could you have you know young your \n", "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.006975 \n", "1 0.011192 0.000000 0.000000 0.010248 0.000000 0.007388 0.027275 \n", "2 0.011383 0.000000 0.000000 0.010422 0.000000 0.015028 0.006935 \n", "3 0.000000 0.000000 0.000000 0.000000 0.000000 0.016191 0.007472 \n", "4 0.000000 0.010990 0.012003 0.010990 0.000000 0.000000 0.014626 \n", "5 0.000000 0.009063 0.000000 0.009063 0.000000 0.000000 0.000000 \n", "6 0.000000 0.000000 0.000000 0.000000 0.000000 0.015174 0.000000 \n", "7 0.000000 0.000000 0.000000 0.010740 0.000000 0.007743 0.014293 \n", "8 0.000000 0.000000 0.000000 0.000000 0.000000 0.014189 0.000000 \n", "9 0.000000 0.010412 0.000000 0.000000 0.000000 0.007507 0.000000 \n", "10 0.000000 0.000000 0.000000 0.000000 0.000000 0.007499 0.000000 \n", "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.014057 0.000000 \n", "12 0.000000 0.000000 0.000000 0.000000 0.000000 0.007218 0.006662 \n", "13 0.000000 0.000000 0.000000 0.000000 0.000000 0.006883 0.000000 \n", "14 0.000000 0.011512 0.000000 0.000000 0.000000 0.008300 0.000000 \n", "15 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "16 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "17 0.000000 0.000000 0.011784 0.000000 0.000000 0.007779 0.000000 \n", "18 0.011488 0.010519 0.000000 0.010519 0.000000 0.000000 0.013998 \n", "19 0.000000 0.000000 0.022951 0.021014 0.010507 0.022725 0.013983 \n", "20 0.000000 0.000000 0.000000 0.000000 0.009888 0.014257 0.006579 \n", "21 0.000000 0.000000 0.000000 0.000000 0.000000 0.007198 0.000000 \n", "22 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.013011 \n", "23 0.000000 0.022865 0.000000 0.000000 0.011432 0.008242 0.015214 \n", "24 0.011230 0.010282 0.011230 0.000000 0.000000 0.000000 0.027367 \n", "25 0.000000 0.000000 0.021422 0.009807 0.000000 0.007070 0.013051 \n", "26 0.010968 0.000000 0.000000 0.000000 0.010042 0.000000 0.013364 \n", "27 0.000000 0.000000 0.000000 0.000000 0.008863 0.025558 0.000000 \n", "28 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.007389 \n", "29 0.000000 0.000000 0.000000 0.000000 0.000000 0.006754 0.000000 \n", "30 0.000000 0.010898 0.000000 0.000000 0.000000 0.023571 0.007251 \n", "31 0.000000 0.000000 0.000000 0.000000 0.010186 0.014688 0.027112 \n", "32 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "33 0.012536 0.022956 0.000000 0.000000 0.000000 0.000000 0.030550 \n", "34 0.000000 0.000000 0.013177 0.000000 0.012065 0.000000 0.000000 \n", "35 0.010077 0.018453 0.020154 0.009227 0.000000 0.000000 0.000000 \n", "36 0.000000 0.000000 0.000000 0.010922 0.021844 0.000000 0.014535 \n", "37 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "38 0.000000 0.000000 0.000000 0.010770 0.000000 0.000000 0.035831 \n", "39 0.000000 0.000000 0.009994 0.000000 0.009150 0.013194 0.006089 \n", "40 0.011815 0.000000 0.000000 0.000000 0.010818 0.000000 0.028794 \n", "41 0.000000 0.000000 0.000000 0.000000 0.000000 0.007564 0.000000 \n", "42 0.000000 0.010795 0.011790 0.000000 0.010795 0.000000 0.007183 \n", "43 0.000000 0.000000 0.000000 0.000000 0.000000 0.006457 0.000000 \n", "44 0.000000 0.000000 0.011488 0.000000 0.010518 0.022750 0.013998 \n", "45 0.000000 0.000000 0.000000 0.000000 0.010957 0.007900 0.000000 \n", "46 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.007336 \n", "47 0.012811 0.011730 0.000000 0.011730 0.000000 0.000000 0.015610 \n", "48 0.000000 0.000000 0.000000 0.000000 0.000000 0.023716 0.000000 \n", "49 0.000000 0.010904 0.011909 0.000000 0.010904 0.000000 0.007255 \n", "50 0.013251 0.012133 0.000000 0.000000 0.000000 0.000000 0.064585 \n", "51 0.034048 0.000000 0.000000 0.020783 0.000000 0.000000 0.020743 \n", "52 0.000000 0.000000 0.012154 0.000000 0.011128 0.000000 0.037023 \n", "53 0.000000 0.000000 0.000000 0.010080 0.000000 0.000000 0.006707 \n", "54 0.012582 0.023041 0.000000 0.011521 0.000000 0.008306 0.130319 \n", "\n", "[55 rows x 1193 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", "\n", "tv = TfidfVectorizer(min_df = 0.2,\n", " ngram_range=(1,3), \n", " use_idf=True\n", " )\n", "tv_matrix = tv.fit_transform(corpus_norm)\n", "tfidf_ngram2 = pd.DataFrame(tv_matrix.toarray(), columns=tv.get_feature_names())\n", "tfidf_ngram2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Document Similarity\n", "\n", "- Cluster analysis with R seems more intuitive to me" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...45464748495051525354
01.0000000.9030990.9017490.9011100.9075690.9301930.8747750.8944020.9191950.918026...0.7740570.8611760.7315670.8443090.7577810.8430690.8793840.7122730.9386020.866187
10.9030991.0000000.8719650.8671510.8807320.9135320.8619100.9027700.8878220.877728...0.7843840.8411740.7296020.8284790.7175050.8303690.8368960.6945440.9079400.852431
20.9017490.8719651.0000000.9051190.8739950.9019620.8836930.8821810.8968130.889755...0.7866360.8214840.7701260.8422090.7279680.8154290.8541840.7103970.8885560.842170
30.9011100.8671510.9051191.0000000.8872740.8987750.8725730.8823920.8957580.890735...0.7804140.8254790.7503770.8484130.7203070.8138780.8526320.6947050.8991650.838796
40.9075690.8807320.8739950.8872741.0000000.9076420.8404820.8658560.8843960.913948...0.7828340.8624650.7390620.8569360.7839670.8546420.8767500.7179060.9105630.878084
50.9301930.9135320.9019620.8987750.9076421.0000000.8748880.8969520.9156620.923222...0.7837590.8493920.7444310.8421410.7460430.8406810.8710480.7028830.9318140.865160
60.8747750.8619100.8836930.8725730.8404820.8748881.0000000.8601510.8825020.852995...0.7525240.7963560.7422640.8282740.7041680.7708700.8183640.6704900.8647910.816389
70.8944020.9027700.8821810.8823920.8658560.8969520.8601511.0000000.8787230.877895...0.7993980.8591840.7672730.8478240.7461110.8267010.8435240.7408930.8963330.845723
80.9191950.8878220.8968130.8957580.8843960.9156620.8825020.8787231.0000000.883385...0.7612830.8274170.7316100.8258900.7458110.8226580.8547920.6820480.9103040.840261
90.9180260.8777280.8897550.8907350.9139480.9232220.8529950.8778950.8833851.000000...0.7656220.8592780.7291470.8385110.7558030.8399390.8598530.6955090.9202410.863758
100.8936280.8677870.8812950.8774630.8893640.9024710.8607470.8732440.9073780.879919...0.7707570.8473290.7251590.8389960.7750470.8253310.8327680.7064810.8955850.835030
110.9180320.9225430.8774760.8731760.8961750.9234940.8659960.8970440.8977520.905827...0.7644350.8515560.7097330.8351110.7201470.8227380.8406740.6633720.9242520.848980
120.9273200.9150160.8825870.8796870.8947110.9226410.8564780.8986650.9182770.897774...0.7694050.8365320.7292730.8257340.7199750.8362360.8633770.6850540.9297220.853080
130.9169930.9009510.9133240.8922500.8975560.9238030.8887280.8881000.9200300.894838...0.7738780.8317130.7382610.8326800.7365120.8221010.8523000.6945460.9131040.843727
140.8798630.8654310.8785110.8637720.8631420.8789330.8446430.8468180.8718420.868329...0.7598970.7793360.7194400.8049860.6702690.7869460.8332960.6411610.8780970.832097
150.9039290.8531940.8739320.8836680.9041540.8968670.8326190.8547800.8790570.894117...0.7759720.8524440.7434080.8472930.7671510.8293950.8660520.7010450.8953060.857095
160.9076920.8743300.8667600.8716480.8923640.9051700.8362400.8588480.8925300.886955...0.7479560.8195380.7112030.8318130.7007310.8184880.8758290.6589740.9078220.840660
170.7227480.6766300.7206980.7046570.7504160.7089520.6906650.7233500.6826000.749956...0.7043890.8019740.7024930.7636390.8121510.7249430.7141010.7501280.7128100.747868
180.8522680.8206480.8002140.7896830.8545660.8400200.7795620.8399350.8165500.854762...0.7552260.8802510.7460720.8262340.8227050.8017680.8093470.7411130.8592470.840234
190.8401750.7887940.8226180.8042910.8370340.8242280.7920470.8309450.8030000.838556...0.7922800.8705590.7782270.8514100.8521380.8128320.8311840.7903610.8320350.858552
200.8333320.7983740.8341680.8170300.8210970.8252800.8082500.8433590.8130960.817110...0.8057190.8611500.8092620.8524240.8085330.7898400.8203790.7727710.8262650.832442
210.7456820.6916910.7247270.7087370.7323070.7100490.6950020.7559240.7123250.740827...0.7114840.8294700.7125950.7729760.8176390.7314150.7222790.7521130.7315880.744636
220.8162640.7690340.7990910.7823080.8122920.7940710.7871100.8101810.7954960.818210...0.7634230.8399150.7356780.8263650.7962300.7620800.7916480.7108640.8129840.831387
230.7418410.6698610.6897740.6859330.7356250.6925630.6694240.7196840.6912920.729020...0.6655080.7833790.6507700.7257220.8144850.7403120.7314990.7077770.7251590.747473
240.8410910.8044080.8050120.8088540.8622050.8279070.7919080.8081680.7981100.839622...0.7698510.8717650.7554330.8383000.8420240.7926070.8076540.7674300.8444230.839348
250.8207050.7851390.8150590.8032490.8100440.8228460.7938440.8168980.7989800.809980...0.8147410.8361250.7928510.8475210.7650310.7663370.8127540.7582840.8211260.822159
260.8465850.8083440.7896340.8001360.8530790.8329640.7750500.8178730.8085680.846014...0.7558620.8725660.7156100.8198970.7982170.7961020.8018810.7125680.8541320.846940
270.8513040.7977140.7975720.7845580.8262980.8269860.7783190.8245670.8174840.844344...0.7422220.8534060.6954240.8096650.7778290.7871530.8102960.7046940.8513610.817153
280.8249710.8167170.7868190.7776900.8168040.8188240.7710250.8315280.7995080.818772...0.8128830.8818820.7919360.8270540.7875980.8030410.7786360.7858320.8294460.819105
290.8523270.8147510.8398250.8171540.8407440.8397300.8191810.8427040.8331080.843954...0.7847250.8570580.8061870.8375270.8175670.8067010.8379800.7716340.8470440.846029
300.7940100.7760420.7559430.7544760.8273700.7907950.7331490.7882100.7500260.821269...0.7561610.8764420.7170540.8052610.8373790.7802250.7539930.7443220.8002860.814796
310.8239720.8134170.7824320.7751160.8111530.8102510.7632650.8337560.7916670.811959...0.7961110.8611170.7521170.8112590.7598710.8075960.7909190.7623480.8277330.827300
320.8783250.8525490.8662490.8416100.8623100.8754730.8557690.8727260.8641580.875575...0.8035740.8784320.7697460.8649090.7878050.7998030.8331200.7466610.8838480.849817
330.8835840.8524110.8452080.8210510.8750780.8866670.8077340.8575160.8379040.877588...0.7949450.8660580.7592730.8486220.7983700.8515700.8535940.7546320.8798340.888921
340.6757860.6619410.6726360.6716710.7000250.6638050.6547600.7200690.6561320.667695...0.8230570.7853720.7583520.7927150.7568190.7327080.7098000.7861380.6741750.726947
350.8109330.7735320.7637580.7566110.8221370.7891800.7392940.7913400.7642880.814760...0.7523780.8856620.7223310.8122750.8638830.8048540.7878810.7579750.8041070.825393
360.8447460.8318470.8300060.8227550.8575870.8444720.7928330.8534400.8131770.853282...0.8223970.9045330.7842270.8522880.8214850.8479950.8184710.8052940.8453150.849890
370.8241120.7806430.7651670.7579070.8141860.8051770.7502040.7983050.7845830.826856...0.7323870.8645410.6799610.7996030.7992580.7662780.7652760.6918830.8258640.812077
380.7654440.7596910.7823220.7489150.7648290.7661510.7410870.7927700.7488630.761727...0.7912900.8074290.7764080.7974590.7743300.7878690.7592140.8147360.7573320.777322
390.8411800.8427350.7951050.7894070.8127440.8349370.7813180.8579530.8217310.821977...0.7939830.8537370.7489580.8119710.7247160.8137350.7958750.7470160.8479190.821548
400.6611170.6016950.6454610.6263930.6745730.6238910.6091370.6654430.6032410.663814...0.6619010.7453580.6570600.7136430.7858330.6967020.6738760.7380640.6409750.710919
410.7536430.7459340.7777220.7513710.7765170.7518140.7332400.7875690.7246890.771514...0.7805590.8300940.7545320.7937430.7704560.7556060.7358750.8008070.7526460.760544
420.7284880.7105390.7323290.7188850.7239470.7123340.7037110.7572660.6985720.712019...0.7683370.8066580.8189250.7715880.7635660.7465990.7338420.8280150.7085620.748552
430.8808140.8606350.8357840.8354290.8600040.8694940.8424920.8760140.8628380.867116...0.8077590.8907100.7748150.8647970.7812650.8077960.8281130.7292360.8913530.874514
440.7053130.6790960.7051160.6792630.7146110.6817840.6602690.7283310.6815560.704017...0.7270930.7830060.7069360.7444370.7971240.7444910.7110700.7723610.6921650.748060
450.7740570.7843840.7866360.7804140.7828340.7837590.7525240.7993980.7612830.765622...1.0000000.8407970.8094860.8820530.7235020.7687550.7832020.7515290.7834540.811948
460.8611760.8411740.8214840.8254790.8624650.8493920.7963560.8591840.8274170.859278...0.8407971.0000000.7889500.8802540.8330020.8332170.8142470.7824750.8616000.861988
470.7315670.7296020.7701260.7503770.7390620.7444310.7422640.7672730.7316100.729147...0.8094860.7889501.0000000.7975820.7485040.7383630.7517530.7897400.7243110.776450
480.8443090.8284790.8422090.8484130.8569360.8421410.8282740.8478240.8258900.838511...0.8820530.8802540.7975821.0000000.7673750.8069440.8380580.7580500.8590210.857034
490.7577810.7175050.7279680.7203070.7839670.7460430.7041680.7461110.7458110.755803...0.7235020.8330020.7485040.7673751.0000000.7637420.7517730.7607350.7411230.794523
500.8430690.8303690.8154290.8138780.8546420.8406810.7708700.8267010.8226580.839939...0.7687550.8332170.7383630.8069440.7637421.0000000.8662170.7577380.8406020.871413
510.8793840.8368960.8541840.8526320.8767500.8710480.8183640.8435240.8547920.859853...0.7832020.8142470.7517530.8380580.7517730.8662171.0000000.7293740.8698480.869194
520.7122730.6945440.7103970.6947050.7179060.7028830.6704900.7408930.6820480.695509...0.7515290.7824750.7897400.7580500.7607350.7577380.7293741.0000000.6968120.737664
530.9386020.9079400.8885560.8991650.9105630.9318140.8647910.8963330.9103040.920241...0.7834540.8616000.7243110.8590210.7411230.8406020.8698480.6968121.0000000.871567
540.8661870.8524310.8421700.8387960.8780840.8651600.8163890.8457230.8402610.863758...0.8119480.8619880.7764500.8570340.7945230.8714130.8691940.7376640.8715671.000000
\n", "

55 rows × 55 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "0 1.000000 0.903099 0.901749 0.901110 0.907569 0.930193 0.874775 \n", "1 0.903099 1.000000 0.871965 0.867151 0.880732 0.913532 0.861910 \n", "2 0.901749 0.871965 1.000000 0.905119 0.873995 0.901962 0.883693 \n", "3 0.901110 0.867151 0.905119 1.000000 0.887274 0.898775 0.872573 \n", "4 0.907569 0.880732 0.873995 0.887274 1.000000 0.907642 0.840482 \n", "5 0.930193 0.913532 0.901962 0.898775 0.907642 1.000000 0.874888 \n", "6 0.874775 0.861910 0.883693 0.872573 0.840482 0.874888 1.000000 \n", "7 0.894402 0.902770 0.882181 0.882392 0.865856 0.896952 0.860151 \n", "8 0.919195 0.887822 0.896813 0.895758 0.884396 0.915662 0.882502 \n", "9 0.918026 0.877728 0.889755 0.890735 0.913948 0.923222 0.852995 \n", "10 0.893628 0.867787 0.881295 0.877463 0.889364 0.902471 0.860747 \n", "11 0.918032 0.922543 0.877476 0.873176 0.896175 0.923494 0.865996 \n", "12 0.927320 0.915016 0.882587 0.879687 0.894711 0.922641 0.856478 \n", "13 0.916993 0.900951 0.913324 0.892250 0.897556 0.923803 0.888728 \n", "14 0.879863 0.865431 0.878511 0.863772 0.863142 0.878933 0.844643 \n", "15 0.903929 0.853194 0.873932 0.883668 0.904154 0.896867 0.832619 \n", "16 0.907692 0.874330 0.866760 0.871648 0.892364 0.905170 0.836240 \n", "17 0.722748 0.676630 0.720698 0.704657 0.750416 0.708952 0.690665 \n", "18 0.852268 0.820648 0.800214 0.789683 0.854566 0.840020 0.779562 \n", "19 0.840175 0.788794 0.822618 0.804291 0.837034 0.824228 0.792047 \n", "20 0.833332 0.798374 0.834168 0.817030 0.821097 0.825280 0.808250 \n", "21 0.745682 0.691691 0.724727 0.708737 0.732307 0.710049 0.695002 \n", "22 0.816264 0.769034 0.799091 0.782308 0.812292 0.794071 0.787110 \n", "23 0.741841 0.669861 0.689774 0.685933 0.735625 0.692563 0.669424 \n", "24 0.841091 0.804408 0.805012 0.808854 0.862205 0.827907 0.791908 \n", "25 0.820705 0.785139 0.815059 0.803249 0.810044 0.822846 0.793844 \n", "26 0.846585 0.808344 0.789634 0.800136 0.853079 0.832964 0.775050 \n", "27 0.851304 0.797714 0.797572 0.784558 0.826298 0.826986 0.778319 \n", "28 0.824971 0.816717 0.786819 0.777690 0.816804 0.818824 0.771025 \n", "29 0.852327 0.814751 0.839825 0.817154 0.840744 0.839730 0.819181 \n", "30 0.794010 0.776042 0.755943 0.754476 0.827370 0.790795 0.733149 \n", "31 0.823972 0.813417 0.782432 0.775116 0.811153 0.810251 0.763265 \n", "32 0.878325 0.852549 0.866249 0.841610 0.862310 0.875473 0.855769 \n", "33 0.883584 0.852411 0.845208 0.821051 0.875078 0.886667 0.807734 \n", "34 0.675786 0.661941 0.672636 0.671671 0.700025 0.663805 0.654760 \n", "35 0.810933 0.773532 0.763758 0.756611 0.822137 0.789180 0.739294 \n", "36 0.844746 0.831847 0.830006 0.822755 0.857587 0.844472 0.792833 \n", "37 0.824112 0.780643 0.765167 0.757907 0.814186 0.805177 0.750204 \n", "38 0.765444 0.759691 0.782322 0.748915 0.764829 0.766151 0.741087 \n", "39 0.841180 0.842735 0.795105 0.789407 0.812744 0.834937 0.781318 \n", "40 0.661117 0.601695 0.645461 0.626393 0.674573 0.623891 0.609137 \n", "41 0.753643 0.745934 0.777722 0.751371 0.776517 0.751814 0.733240 \n", "42 0.728488 0.710539 0.732329 0.718885 0.723947 0.712334 0.703711 \n", "43 0.880814 0.860635 0.835784 0.835429 0.860004 0.869494 0.842492 \n", "44 0.705313 0.679096 0.705116 0.679263 0.714611 0.681784 0.660269 \n", "45 0.774057 0.784384 0.786636 0.780414 0.782834 0.783759 0.752524 \n", "46 0.861176 0.841174 0.821484 0.825479 0.862465 0.849392 0.796356 \n", "47 0.731567 0.729602 0.770126 0.750377 0.739062 0.744431 0.742264 \n", "48 0.844309 0.828479 0.842209 0.848413 0.856936 0.842141 0.828274 \n", "49 0.757781 0.717505 0.727968 0.720307 0.783967 0.746043 0.704168 \n", "50 0.843069 0.830369 0.815429 0.813878 0.854642 0.840681 0.770870 \n", "51 0.879384 0.836896 0.854184 0.852632 0.876750 0.871048 0.818364 \n", "52 0.712273 0.694544 0.710397 0.694705 0.717906 0.702883 0.670490 \n", "53 0.938602 0.907940 0.888556 0.899165 0.910563 0.931814 0.864791 \n", "54 0.866187 0.852431 0.842170 0.838796 0.878084 0.865160 0.816389 \n", "\n", " 7 8 9 ... 45 46 47 48 \\\n", "0 0.894402 0.919195 0.918026 ... 0.774057 0.861176 0.731567 0.844309 \n", "1 0.902770 0.887822 0.877728 ... 0.784384 0.841174 0.729602 0.828479 \n", "2 0.882181 0.896813 0.889755 ... 0.786636 0.821484 0.770126 0.842209 \n", "3 0.882392 0.895758 0.890735 ... 0.780414 0.825479 0.750377 0.848413 \n", "4 0.865856 0.884396 0.913948 ... 0.782834 0.862465 0.739062 0.856936 \n", "5 0.896952 0.915662 0.923222 ... 0.783759 0.849392 0.744431 0.842141 \n", "6 0.860151 0.882502 0.852995 ... 0.752524 0.796356 0.742264 0.828274 \n", "7 1.000000 0.878723 0.877895 ... 0.799398 0.859184 0.767273 0.847824 \n", "8 0.878723 1.000000 0.883385 ... 0.761283 0.827417 0.731610 0.825890 \n", "9 0.877895 0.883385 1.000000 ... 0.765622 0.859278 0.729147 0.838511 \n", "10 0.873244 0.907378 0.879919 ... 0.770757 0.847329 0.725159 0.838996 \n", "11 0.897044 0.897752 0.905827 ... 0.764435 0.851556 0.709733 0.835111 \n", "12 0.898665 0.918277 0.897774 ... 0.769405 0.836532 0.729273 0.825734 \n", "13 0.888100 0.920030 0.894838 ... 0.773878 0.831713 0.738261 0.832680 \n", "14 0.846818 0.871842 0.868329 ... 0.759897 0.779336 0.719440 0.804986 \n", "15 0.854780 0.879057 0.894117 ... 0.775972 0.852444 0.743408 0.847293 \n", "16 0.858848 0.892530 0.886955 ... 0.747956 0.819538 0.711203 0.831813 \n", "17 0.723350 0.682600 0.749956 ... 0.704389 0.801974 0.702493 0.763639 \n", "18 0.839935 0.816550 0.854762 ... 0.755226 0.880251 0.746072 0.826234 \n", "19 0.830945 0.803000 0.838556 ... 0.792280 0.870559 0.778227 0.851410 \n", "20 0.843359 0.813096 0.817110 ... 0.805719 0.861150 0.809262 0.852424 \n", "21 0.755924 0.712325 0.740827 ... 0.711484 0.829470 0.712595 0.772976 \n", "22 0.810181 0.795496 0.818210 ... 0.763423 0.839915 0.735678 0.826365 \n", "23 0.719684 0.691292 0.729020 ... 0.665508 0.783379 0.650770 0.725722 \n", "24 0.808168 0.798110 0.839622 ... 0.769851 0.871765 0.755433 0.838300 \n", "25 0.816898 0.798980 0.809980 ... 0.814741 0.836125 0.792851 0.847521 \n", "26 0.817873 0.808568 0.846014 ... 0.755862 0.872566 0.715610 0.819897 \n", "27 0.824567 0.817484 0.844344 ... 0.742222 0.853406 0.695424 0.809665 \n", "28 0.831528 0.799508 0.818772 ... 0.812883 0.881882 0.791936 0.827054 \n", "29 0.842704 0.833108 0.843954 ... 0.784725 0.857058 0.806187 0.837527 \n", "30 0.788210 0.750026 0.821269 ... 0.756161 0.876442 0.717054 0.805261 \n", "31 0.833756 0.791667 0.811959 ... 0.796111 0.861117 0.752117 0.811259 \n", "32 0.872726 0.864158 0.875575 ... 0.803574 0.878432 0.769746 0.864909 \n", "33 0.857516 0.837904 0.877588 ... 0.794945 0.866058 0.759273 0.848622 \n", "34 0.720069 0.656132 0.667695 ... 0.823057 0.785372 0.758352 0.792715 \n", "35 0.791340 0.764288 0.814760 ... 0.752378 0.885662 0.722331 0.812275 \n", "36 0.853440 0.813177 0.853282 ... 0.822397 0.904533 0.784227 0.852288 \n", "37 0.798305 0.784583 0.826856 ... 0.732387 0.864541 0.679961 0.799603 \n", "38 0.792770 0.748863 0.761727 ... 0.791290 0.807429 0.776408 0.797459 \n", "39 0.857953 0.821731 0.821977 ... 0.793983 0.853737 0.748958 0.811971 \n", "40 0.665443 0.603241 0.663814 ... 0.661901 0.745358 0.657060 0.713643 \n", "41 0.787569 0.724689 0.771514 ... 0.780559 0.830094 0.754532 0.793743 \n", "42 0.757266 0.698572 0.712019 ... 0.768337 0.806658 0.818925 0.771588 \n", "43 0.876014 0.862838 0.867116 ... 0.807759 0.890710 0.774815 0.864797 \n", "44 0.728331 0.681556 0.704017 ... 0.727093 0.783006 0.706936 0.744437 \n", "45 0.799398 0.761283 0.765622 ... 1.000000 0.840797 0.809486 0.882053 \n", "46 0.859184 0.827417 0.859278 ... 0.840797 1.000000 0.788950 0.880254 \n", "47 0.767273 0.731610 0.729147 ... 0.809486 0.788950 1.000000 0.797582 \n", "48 0.847824 0.825890 0.838511 ... 0.882053 0.880254 0.797582 1.000000 \n", "49 0.746111 0.745811 0.755803 ... 0.723502 0.833002 0.748504 0.767375 \n", "50 0.826701 0.822658 0.839939 ... 0.768755 0.833217 0.738363 0.806944 \n", "51 0.843524 0.854792 0.859853 ... 0.783202 0.814247 0.751753 0.838058 \n", "52 0.740893 0.682048 0.695509 ... 0.751529 0.782475 0.789740 0.758050 \n", "53 0.896333 0.910304 0.920241 ... 0.783454 0.861600 0.724311 0.859021 \n", "54 0.845723 0.840261 0.863758 ... 0.811948 0.861988 0.776450 0.857034 \n", "\n", " 49 50 51 52 53 54 \n", "0 0.757781 0.843069 0.879384 0.712273 0.938602 0.866187 \n", "1 0.717505 0.830369 0.836896 0.694544 0.907940 0.852431 \n", "2 0.727968 0.815429 0.854184 0.710397 0.888556 0.842170 \n", "3 0.720307 0.813878 0.852632 0.694705 0.899165 0.838796 \n", "4 0.783967 0.854642 0.876750 0.717906 0.910563 0.878084 \n", "5 0.746043 0.840681 0.871048 0.702883 0.931814 0.865160 \n", "6 0.704168 0.770870 0.818364 0.670490 0.864791 0.816389 \n", "7 0.746111 0.826701 0.843524 0.740893 0.896333 0.845723 \n", "8 0.745811 0.822658 0.854792 0.682048 0.910304 0.840261 \n", "9 0.755803 0.839939 0.859853 0.695509 0.920241 0.863758 \n", "10 0.775047 0.825331 0.832768 0.706481 0.895585 0.835030 \n", "11 0.720147 0.822738 0.840674 0.663372 0.924252 0.848980 \n", "12 0.719975 0.836236 0.863377 0.685054 0.929722 0.853080 \n", "13 0.736512 0.822101 0.852300 0.694546 0.913104 0.843727 \n", "14 0.670269 0.786946 0.833296 0.641161 0.878097 0.832097 \n", "15 0.767151 0.829395 0.866052 0.701045 0.895306 0.857095 \n", "16 0.700731 0.818488 0.875829 0.658974 0.907822 0.840660 \n", "17 0.812151 0.724943 0.714101 0.750128 0.712810 0.747868 \n", "18 0.822705 0.801768 0.809347 0.741113 0.859247 0.840234 \n", "19 0.852138 0.812832 0.831184 0.790361 0.832035 0.858552 \n", "20 0.808533 0.789840 0.820379 0.772771 0.826265 0.832442 \n", "21 0.817639 0.731415 0.722279 0.752113 0.731588 0.744636 \n", "22 0.796230 0.762080 0.791648 0.710864 0.812984 0.831387 \n", "23 0.814485 0.740312 0.731499 0.707777 0.725159 0.747473 \n", "24 0.842024 0.792607 0.807654 0.767430 0.844423 0.839348 \n", "25 0.765031 0.766337 0.812754 0.758284 0.821126 0.822159 \n", "26 0.798217 0.796102 0.801881 0.712568 0.854132 0.846940 \n", "27 0.777829 0.787153 0.810296 0.704694 0.851361 0.817153 \n", "28 0.787598 0.803041 0.778636 0.785832 0.829446 0.819105 \n", "29 0.817567 0.806701 0.837980 0.771634 0.847044 0.846029 \n", "30 0.837379 0.780225 0.753993 0.744322 0.800286 0.814796 \n", "31 0.759871 0.807596 0.790919 0.762348 0.827733 0.827300 \n", "32 0.787805 0.799803 0.833120 0.746661 0.883848 0.849817 \n", "33 0.798370 0.851570 0.853594 0.754632 0.879834 0.888921 \n", "34 0.756819 0.732708 0.709800 0.786138 0.674175 0.726947 \n", "35 0.863883 0.804854 0.787881 0.757975 0.804107 0.825393 \n", "36 0.821485 0.847995 0.818471 0.805294 0.845315 0.849890 \n", "37 0.799258 0.766278 0.765276 0.691883 0.825864 0.812077 \n", "38 0.774330 0.787869 0.759214 0.814736 0.757332 0.777322 \n", "39 0.724716 0.813735 0.795875 0.747016 0.847919 0.821548 \n", "40 0.785833 0.696702 0.673876 0.738064 0.640975 0.710919 \n", "41 0.770456 0.755606 0.735875 0.800807 0.752646 0.760544 \n", "42 0.763566 0.746599 0.733842 0.828015 0.708562 0.748552 \n", "43 0.781265 0.807796 0.828113 0.729236 0.891353 0.874514 \n", "44 0.797124 0.744491 0.711070 0.772361 0.692165 0.748060 \n", "45 0.723502 0.768755 0.783202 0.751529 0.783454 0.811948 \n", "46 0.833002 0.833217 0.814247 0.782475 0.861600 0.861988 \n", "47 0.748504 0.738363 0.751753 0.789740 0.724311 0.776450 \n", "48 0.767375 0.806944 0.838058 0.758050 0.859021 0.857034 \n", "49 1.000000 0.763742 0.751773 0.760735 0.741123 0.794523 \n", "50 0.763742 1.000000 0.866217 0.757738 0.840602 0.871413 \n", "51 0.751773 0.866217 1.000000 0.729374 0.869848 0.869194 \n", "52 0.760735 0.757738 0.729374 1.000000 0.696812 0.737664 \n", "53 0.741123 0.840602 0.869848 0.696812 1.000000 0.871567 \n", "54 0.794523 0.871413 0.869194 0.737664 0.871567 1.000000 \n", "\n", "[55 rows x 55 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics.pairwise import cosine_similarity\n", "similarity_matrix = cosine_similarity(tv_matrix)\n", "similarity_df = pd.DataFrame(similarity_matrix)\n", "similarity_df" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "%load_ext rpy2.ipython" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "R[write to console]: \n", "Attaching package: ‘dplyr’\n", "\n", "\n", "R[write to console]: The following objects are masked from ‘package:stats’:\n", "\n", " filter, lag\n", "\n", "\n", "R[write to console]: The following objects are masked from ‘package:base’:\n", "\n", " intersect, setdiff, setequal, union\n", "\n", "\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAYAAAB91L6VAAAEGWlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPrtzZyMkzlNsNIV0qD8NJQ2TVjShtLp/3d02bpZJNtoi6GT27s6Yyc44M7v9oU9FUHwx6psUxL+3gCAo9Q/bPrQvlQol2tQgKD60+INQ6Ium65k7M5lpurHeZe58853vnnvuuWfvBei5qliWkRQBFpquLRcy4nOHj4g9K5CEh6AXBqFXUR0rXalMAjZPC3e1W99Dwntf2dXd/p+tt0YdFSBxH2Kz5qgLiI8B8KdVy3YBevqRHz/qWh72Yui3MUDEL3q44WPXw3M+fo1pZuQs4tOIBVVTaoiXEI/MxfhGDPsxsNZfoE1q66ro5aJim3XdoLFw72H+n23BaIXzbcOnz5mfPoTvYVz7KzUl5+FRxEuqkp9G/Ajia219thzg25abkRE/BpDc3pqvphHvRFys2weqvp+krbWKIX7nhDbzLOItiM8358pTwdirqpPFnMF2xLc1WvLyOwTAibpbmvHHcvttU57y5+XqNZrLe3lE/Pq8eUj2fXKfOe3pfOjzhJYtB/yll5SDFcSDiH+hRkH25+L+sdxKEAMZahrlSX8ukqMOWy/jXW2m6M9LDBc31B9LFuv6gVKg/0Szi3KAr1kGq1GMjU/aLbnq6/lRxc4XfJ98hTargX++DbMJBSiYMIe9Ck1YAxFkKEAG3xbYaKmDDgYyFK0UGYpfoWYXG+fAPPI6tJnNwb7ClP7IyF+D+bjOtCpkhz6CFrIa/I6sFtNl8auFXGMTP34sNwI/JhkgEtmDz14ySfaRcTIBInmKPE32kxyyE2Tv+thKbEVePDfW/byMM1Kmm0XdObS7oGD/MypMXFPXrCwOtoYjyyn7BV29/MZfsVzpLDdRtuIZnbpXzvlf+ev8MvYr/Gqk4H/kV/G3csdazLuyTMPsbFhzd1UabQbjFvDRmcWJxR3zcfHkVw9GfpbJmeev9F08WW8uDkaslwX6avlWGU6NRKz0g/SHtCy9J30o/ca9zX3Kfc19zn3BXQKRO8ud477hLnAfc1/G9mrzGlrfexZ5GLdn6ZZrrEohI2wVHhZywjbhUWEy8icMCGNCUdiBlq3r+xafL549HQ5jH+an+1y+LlYBifuxAvRN/lVVVOlwlCkdVm9NOL5BE4wkQ2SMlDZU97hX86EilU/lUmkQUztTE6mx1EEPh7OmdqBtAvv8HdWpbrJS6tJj3n0CWdM6busNzRV3S9KTYhqvNiqWmuroiKgYhshMjmhTh9ptWhsF7970j/SbMrsPE1suR5z7DMC+P/Hs+y7ijrQAlhyAgccjbhjPygfeBTjzhNqy28EdkUh8C+DU9+z2v/oyeH791OncxHOs5y2AtTc7nb/f73TWPkD/qwBnjX8BoJ98VQNcC+8AAAA4ZVhJZk1NACoAAAAIAAGHaQAEAAAAAQAAABoAAAAAAAKgAgAEAAAAAQAAAeCgAwAEAAAAAQAAAeAAAAAApZ9jSgAAQABJREFUeAHsnQncVOP7/2+hsktEdkWlUglpkyJbksr21R6ivkiRpaSN/qRIaJPKllLSplWprIW0C8nSHi1Isvef9+135jvPPGemeZ5mOefM53q95pmZc86cc9/vM89c933d17LfnpAYiQiIgAiIgAiIQFoJFEjr1XQxERABERABERABS0AKWF8EERABERABEcgAASngDEDXJUVABERABERACljfAREQAREQARHIAAEp4AxA1yVFQAREQAREQApY3wEREAEREAERyAABKeAMQNclRUAEREAEREAKWN8BERABERABEcgAASngDEDXJUVABERABERACljfAREQAREQARHIAAEp4AxA1yVFQAREQAREQApY3wEREAEREAERyAABKeAMQNclRUAEREAEREAKWN8BERABERABEcgAASngDEDXJUVABERABERACljfAREQAREQARHIAAEp4AxA1yVFQAREQAREQApY3wEREAEREAERyAABKeAMQNclRUAEREAEREAKWN8BERABERABEcgAASngDEDXJUVABERABERACljfAREQAREQARHIAAEp4AxA1yVFQAREQARE4AAhEIFsIvDZZ5+ZTz/91Pz444/mzDPPNLVr1zb7779/GMGbb75p/vnnH9OgQYPwtmS84JwFCiR3vLt48WKzbt26cPMKFy5sjj/+eFOqVClTsGDB8PZUvXj//ffNtm3bTK1atcyRRx6ZqsvovCIQWAL77QlJYHunjonA/xH4+eefzZ133mleeumlHEwqVapkxo4da8444wy7/aCDDjK//fab+fvvv5OiMPn3GjVqlJk0aZIZN25cjmvv65tWrVqZF198MddpTj/9dDN48GBTt27dXPuSuQHF++6775oFCxaY888/P5mn1rlEICsIaAacFbdZnWzbtq0ZPXq0nR127NjRKtnnn3/eLFmyxNxxxx1m5syZKYG0aNEi07x5c1OtWrWUnJ+TMluvU6eO2b17t5kzZ459sG3VqlXmlFNOSdl1dWIREIF9IyAFvG/89GkfEPj444+t8j3ssMPMBx98YIoWLWpb3bhxY1OlShX7esuWLebYY4/N0Rtmr48//rg54IADzD333GP3ff/992bkyJHW1ItiRTj/hAkTrFmbGfWFF15oSpcubXbu3GleeOEFe8yGDRtMnz59TLt27czhhx9uleXw4cMNJvHTTjvNNG3a1J6Tg5cvX26mTZtmlSoKfO3ataZNmzamRIkS9lzRfy644ALToUMHu7lz587mxhtvNGPGjDE9e/Y0I0aMsNtRzrGut3r1avPGG2/YQcKhhx5qxo8fby0ATZo0MRUqVAhfbsWKFZbjn3/+adsb3vF/L5jhf/311+a6664zQ4cONcWLFze33367OfDAA+12Bjmcgxl6s2bNzDHHHBM+xa5du+xnvvnmG0N/zj77bNsmZtYsE8Q6N22ZN2+enYnzmmUF2o0lA/6DBg0yJUuWtP145ZVXzF9//WUHRFg8YANrrkF7JCKQdgKYoCUiEGQCTz75JMsse0JKbq/dDK2j2mNDJug9PPgc2xwJrR/bbaEfbbtp6dKl9v3BBx+8p0yZMntC68l7ChUqtCf0w74npDjtPs7hPL799ts9IXP4nrJly9ptIUVhn0ODgj2cCwkpSrsttJYb/pyzzx7wf39atmxp9/ft2zdy856pU6fa7RUrVrTb93a90ODBHn/uuefatoeUsH1PP7788kt7jpCSsxwcHuyjzbwPmaDtMZdffrl977SbZ2T+/Pl74OMw4Pnoo4/es3DhQrv/119/DfMIrV3v2W+//faEBjL2+HvvvdceE+vc3FPOR5thz+uQNcB+xuF/6qmn7gkNeuzDuXZIqe9x2LPt0UcftZ/RHxFIJ4HkeoWEvskSEfAaAWZ4yEknnZT0pj3zzDP2nK+++qo1+c6ePdvcdttt5ocffrAzWpy6kHPOOcesX7/enHjiiXZWzcyXGfT27dvtrBJnpgcffNAe6/xhNsna8ZQpU3LMRJ39sZ6dfq5Zs8Yewiw+ketxDOu5P/30k7nooovM77//bmbNmmXPwcyatfFOnTqZHTt2mCFDhlgHLLc2HHLIIebDDz+0s38+c9VVV5mQkjXPPvus2bx5s7n//vvN1q1bTYsWLazDG7Nlrn3eeeeZ0ADFcoSHm0Sem3ZgncA6gFMdzNk/d+5c88svv4Q/zjlZfqBfzHa5Nu3hGo5PAIwlIpBuAlLA6Sau66WdgOMRzI9ussUxCzds2NCanV9//XXTqFEjaz7Gu7pYsWL2krThhBNOsB7X77zzjt2GCfa1116z5ujQjNIqjsj2XXzxxXZ9t379+pGb9/oapYegnJBEr8cgARM63to1a9a0n0XJhWYE1nOcDe3btzd4WzN4OOKII+wx0X8wO1etWtWatFHEOMBhGmY7Zn5M45i6v/jiC/PVV1+ZTz75xJ7ihhtusGZrzPcobTeJPHeRIkWskqdNTz/9tLnlllsMZmgk8l5zrXr16tntnBvhHmGmpp0IAwOJCKSbgNaA001c10s7AcJyEJySIoXQoJtvvtmuA7NuGEuh4BHtyB9//OG8tM/M5kImU7u+GjLXGh4DBw6067333XdfjmOdN87sbOLEiWb69Ol2s6PIWQt1hDXU/AgzPsTx7E70epFrss6gBUbMhHkwoHCO4TUDCmaV0RLZbtbMEdZ9HWGwwWdRwMxGHb6RoUwhE7VzeI7nyHPTL5zNmPGyzhsyU9t7yEyYe+KI02beM3hAnIFRyDRu3zPIkIhAugloBpxu4rpe2glceeWVdraDeTi0Hhm+PubHF0JOUnhFRytWDmImyCyJWRVOTMh3331nn50/xOJi8uWZfU888YTdRRgQgqJCHCXDayc8qGvXrtb0SlxyaB3XOl5hQnUERZVXYeY3YMAA+zFmeUii13NmzPZDEX9QWsQX0wfaiuC0Fj2gcT4S2W5MvghK0pmZ42iF8oVNaC3cMNNHZsyYYZ+5TiyTcOS5GcBwXpy+mEmzHOAoXueZE7rFX7ttsxfXHxFIIwEp4DTC1qUyQ4BQnG7dullT6iWXXGJQyIQFtW7d2jbo7rvvDs/soluI6RTBvPnUU0+FvaGd4/CsxYMWkyze0Js2bbK7nPAfPK8RFA4zYvY75tDu3bub/v3723ZgZn7sscfssc6fSCXibHN7Zg0VJVu9enU7GCBBBqZWx3M70eu5ndvZhqUAueKKK+warqM0nf2Rz5HtDjlAGZgzW8WjGtY1atSwh+O5zayXmSszbsz3KGxm7itXrow8Zfh15LkZFCAhBzXrhc49cmbcmL0lIuB5Aun0+NK1RCCTBEIJMfaEFKP1lA39Y+4J/fjvefjhh/eEzKzhZuHxzL7QLMxue++996zHLttC65d7QuE9dr/jBR2aGe8JKfI9Ieeq8HlDYUh7Qg5U9vOcOxRWY/eFZnxhj+HQDHlPaA3TbsdD9/rrr98Tmr3azzhe0KHY5XC73F44XtC0jUdodrgnZOrdE0o4sidk2s3xkXjXc7ygaYMjjzzyiD1naL3WbqJtIacp66Ecmj3uCYU62QfXjfaCDsUiO6exzzCirSETvz0n3sddunTZEzJrh48LWSf2VK5c2TLh3NwXzv3QQw/ZYxwv6Ohzt2rVak/IamCPvfTSS/eEQsvs65DTVdgLPWSeDl/n1ltvtftDoWR2W8gxzr4PhYKFj9ELEUgXAWXCCv2XS7KLAN6vmGpZh0zEFBn6ZzQbN260x8cjRVpIZnTOrDfyWDygiT/GpB0pxAfjmBTL/Bt5bDJe7+v1mFmyLhy5Xptou/gc12fm6pjm+WwoHMnGB4fCuMKz45DiNaFBgI0NDinNuJfgXmLePuqoo+Iep50i4DUCUsBeuyNqjwhkGQHCrRxHOUK4WAMmbAjFSqIMFLNEBIJIQAo4iHdVfRIBnxEgrjiUMMU6sjE7RiEzC77mmmt81hM1VwQSJyAFnDgrHSkCIpBiApj7Ix2tUnw5nV4EMkpAXtAZxa+Li4AIRBKQ8o2koddBJyAFHPQ7rP6JgAiIgAh4koAUsCdvixolAiIgAiIQdAJSwEG/w+qfCIiACIiAJwlIAXvytqhRIiACIiACQScgBRz0O6z+iYAIiIAIeJKAFLAnb4saJQIiIAIiEHQCUsBBv8PqnwiIgAiIgCcJSAF78raoUSIgAiIgAkEnIAUc9Dus/omACIiACHiSgBSwJ2+LGiUCIiACIhB0AlLAQb/D6p8IiIAIiIAnCUgBe/K2qFEiIAIiIAJBJyAFHPQ7rP6JgAiIgAh4koAUsCdvixolAiIgAiIQdAJSwEG/w+qfCIiACIiAJwlIAXvytqhRIiACIiACQScgBRz0O6z+iYAIiIAIeJKAFLAnb4saJQIiIAIiEHQCWaOA169fb9asWRP0+6n+iYAIiIAI+IRA1ijgDz/80MycOdMnt0XNFAEREAERCDqBrFHAQb+R6p8IiIAIiIC/CEgB++t+qbUiIAIiIAIBISAFHJAbqW6IgAiIgAj4i4AUsL/ul1orAiIgAiIQEAJSwAG5keqGCIiACIiAvwh4TgH/9ddfZseOHf6iqNaKgAiIgAiIQB4JeEIB//HHH6ZLly7mpJNOMgULFjRHHXWUOeSQQ0z58uXNyJEj89glHS4CIiACIiAC3idwgBeaeOedd5rNmzebqVOnmhIlSljl+/PPP5vPPvvMdOjQwfz222+mXbt2Xmiq2iACIiACIiACSSHgiRnwrFmzzNChQ02FChXMoYceavbbbz9zxBFHmGrVqpkBAwaYiRMnJqWzOokIiIAIiIAIeIWAJxQwpua5c+e6MnnzzTfNMccc47pPG0VABERABETArwQ8YYLu1auXadKkienfv78pWbKkOfzww81PP/1kVq1aZXDKmjZtml/5qt0iIAIiIAIi4ErAEwr47LPPNosXLzbka/7222/tejCzXtZ9a9WqZU3Srq3XxlwEPv30UzNv3jwxy0VGG0QgMQK///67wS8FR1CJCKSSgCcUMB0sXLiwqVOnjtm1a5fBAeu4446TEsnHnceb/KabbrI88/FxfUQEsp4Ak4BRo0aZW2+9NetZCEBqCXhCAf/999+mc+fO5tVXXzUbNmwwhQoVsgrk5JNPNr179zZXXXVVaikE6OzFixc3VapUMaeeemqAeqWuiED6CPA79Msvv6TvgrpS1hLwhAJG+f7www/mrbfeskk4evToYR5++GFToEABc9ddd9kwpOuuuy5rb5I6LgIiIAIiEDwCnlDAeDqz/kvoEdK+fXszcOBA89JLL5nhw4ebrl27Ging4H351KPsJrB7924zduxYz0Hgt4h1YCxxXpNGjRpZJ1WvtUvtyR8BTyjgcuXK2TCkhg0b2l4sWLDAZsXizXfffRdWzHvrIp/DmctNcE469thj3XZpmwiIQAYIPPnkk3apqWjRohm4euxLVq1a1e7cs2dP7IMysAfnyuXLl5t+/fpl4Oq6ZCoIeEIBk+2KdV6SbjDy/OqrrwwKk8xYbdu2Na+//npCfcdrMdY/Mwk+DjzwwITOk9+DlixZYj766KP8fjwpn4PdmDFjbDrPpJwwHyfBg52RukQE4hHA8bJ06dKmfv368Q7Tvv8jcMopp5h3331XPAJEwBMKuEaNGmbjxo1m8uTJ5uCDDzaXXXaZVZYozC+//NIcdNBBCSE/66yzDA83IbsW68ypFMzk3bp1M/vvv38qLxP33F5I2cnAibze5557bty2aqcI+InAa6+9ZvCQzpRgDVy3bp3p06dPpppgDjjgAHP77bcryiJJd8ATCpi+MBq+/vrrc3QLk/EXX3xhfv31V0OssNeF0TwzP2bb2SyxlgGymYn67n8ChPjhm5IpqVixYqYuHb4uZvD58+fbSVJ4o17km4BnFHCsHowbN86uAw8bNizWIdouAiIgAiknwAD78ssvT/l1vHwBltkkySPgeQWMB7REBERABERABIJGwHMKmNzPO3fuNEWKFAka66T05/PPPzeXXnqpKVu2bFLOl4qTbN++3XzyySeeDOOgv9SfJt+4qmyl4u7rnCIgAokS8IQC5geR5Bsvv/yyzYSF+z/OWKeddpq55557TOvWrRPtT+CPW7NmjY2T7tSpU+D7mqoOku70hhtuSNXpdV4REAERSIiAJxQwic83b95sw45KlChhk6CTD/qzzz4zhCj99ttvtjBDQj3SQSIgAiIgAiLgAwKeUMCzZs2ymbAowOAIWbGqVatmY4O7d+8uBeyA8eHz2rVrrXf4iSee6InW//PPPzac4+qrr/ZEe2jEjh07zPjx41X72jN3RA0RgdQT8IQCLl++vM2EdeONN+bqMWkqSewg8S+BlStXmiuvvNLIbB77HhI/zkBF3/XYjLRHBIJGwBMKuFevXqZJkyamf//+pmTJktZB5qeffjKrVq0yOGVNmzYtaNyzrj/ERuP4JHEnkMnkLe4t0lYREIFUE/CEAibJBskbSIJOphnWg5kJkNWpVq1aqguc6m+Bzi8CIiACIpB2Ap5QwPSaTFh16tRJOwBdUAREQAREQAQyQcAzCjgTndc1RUAERCAbCCxbtsz6YCSaVz8Wk++//97gNDto0KBYhyS0nSVGshxmu8+DFHBCXxcdJAIiIAL+JbBw4ULTrFkz4xXP/549e9oUw1LA/v1OqeUiIAIiIAIJEqCSEeGdXhA5Hf57FzQD9sK3UW0QAZ8SGDVqlKFQSqFChfLcA2KfCxYsaJ555pk8f5aUrDNmzDBnnnlmnj+rD4iAVwhIAXvlTvxfOxYtWmS9wGM16+OPP7ZJJKi5G0tY57noooti7dZ2EUgaAep1k0a2evXqSTtnIicaOXKkjZyQAk6Elo7xKgEpYA/dGQpuk4ykTZs2MVvFjIEc2aTpjCWjR482ffv2NRdffHGsQ7RdBJJGoECBAnYmm7QTJnAirikRAb8TkAL20B38888/bdzzvffeu0+t4sdp9+7d+3QOfVgEREAERCC1BDSMTC1fnV0EREAEREAEXAlIAbti0UYREAEREAERSC0BKeDU8tXZRUAEREAERMCVgBSwKxZtFAEREAEREIHUEpACTi1fnV0EREAEREAEXAnIC9oVizaKQP4IUMlr3bp1ef4wn6Nu8j///JPnz5533nl5/ow+IAIikHkCUsB5vAcTJ040Tz75pK3eFP3RjRs3moYNG5roGMUffvjBNG/e3Nx9993RH9H7gBG44oorTN26dfNcQpN6yZ9++qlZsWJFnohMnz7d9OnTx9SrVy9Pn9PBIiACmScgBZzHe0ACjAcffDBPpRNXrVplhg8fnscr6XA/EiC5fO/evdOWmKJcuXKGCjUSERAB/xGQAs7HPctr5p/oGXE+LumZj1x11VXm999/z1N7du3aZf744w8ze/bsPH2OXMHk+y1atGiePqeDRUAEvEGAXOFr1qzJ1ZilS5eaX3/91UybNi3Hvl9++cWQiChbqiRJAee4/XqzNwI7d+609UD3dlwy9nfs2NHmxZYCTgZNnUME0k+ga9eurta/mjVrujbmq6++MmPHjjW333676/6gbfSsAmbdtEiRIoYSWhLvEGA2f+CBB+Z5jTM/Pdhvv/3y8zF9xscEmBmxpr236kr4W1DS7q233orb27Vr15rnnnvOlCxZMu5x2pkaAuStz0thmG3bthl++7NFPKHdWrRoYbp06WLKlCljvvjiC+usNGfOHINjyn/+8x/Tv39/+6OfLTdF/RQBvKGXLFmyVxDffvut2bNnj3Xg2tvBlSpVyuUguLfPpHv/3LlzDcsc1apVS8ql+R3hnH5UwAwyeCRD+J5QJe2TTz5JxunMySefbIoVK5aUc2XzSTyhgPH8ZJ0QefTRR60ifvnll83WrVutMmZbt27dsvk+qe9ZRmD8+PF2/fvII4+M23PW0RDW2uIJSqhz587muuuui3dYxvdhYSlcuLA59dRTk9IWzuXXwiTMHBmMJEP4ff3xxx/Na6+9ts+n++233wxlUz/44IN9Ple2n8ATCjjyJsycOdNQY/Swww4zRx11lHnkkUesEpYCjqSk10EnwODz0ksvNTfccENSusq6GueU+IdA8eLFbVlRr7UYZZ6s76XX+pbu9ngmExajqU2bNpmqVasa1gEcWb58uTn77LOdt3oWAREQAREQgUAQ8MQMuGnTpmbKlCnm4YcfNj/99JM1QVFUvkePHmbgwIGGdRzJ/wjAqHbt2jHNdOyfNWuWq/ch4UCY+eCdLmHdiVrHeRWyQ7EO+vPPP+f1o+b000/PmlCGPMPRB0RABDxBwBMK+J577jE8kA0bNtgfXJxQatWqZTp16mSdsTxByyONIFaubNmye133c2sun73xxhvddqVkG2uP7du3N/Xr18/z+XHyQAHnNTsU8cPffPONYTlDIgIiIAJeJeAJBcwMh1CB+fPn2/VeYk1RviR8wAua1I94REv8RwBleOutt5o777wzbY0nM1Q6r5e2julCIiACgSLgiTXgxx57zHz00Uc2n+1dd91lHnjgATN58mSD6zwm03HjxgUKujojAiIgAiIgAp6YAU+aNMkq4EMOOcRs2bLFems6cYCETlDEoHXr1rpbIiACcQgQZoJvAOvf0YLn6t9//+3qT0Gucsz1J554YvTH9F4ERCCFBDyhgM8880yb0aZOnTrmnXfeyRG3t2zZMlO5cuWEEHz33Xdm/fr1rsd+/vnnyqrlSkYbg0KA9f2zzjrLEEOfF+nQoYMd9EoB54WajhWBfSfgCQWMA9ZNN91kvv76a+uwwxowSrlixYrmvffeM/PmzUuop2TRQoG7CaN8P2bDceuLtomACIiACPifgCcUMOZmyvxt377dVr7B+QqTGCa1kSNH2hRqiaAmcQEPN2EdOZtyjLox0DYREAEREAHvEPCEAgYHifedqjckYm/QoIF3KKklIiACIpDFBHCIbdiwoTnllFMMIaLr1q0zV199tSVChrU33njDHHvssVlMKH9d94wCjtV8zMrku1U2rFiEtF0EREAEUksAX5zGjRsb/AWihXTBxN1LAUeT2ft7zytgTMc4Vw0bNmzvvfH5ESSeaNmypc97oeaLQDAI4Lh5ySWXWH8UekSEhrPExW8SpRCpCpQtQi6Gww8/PFd3KQspyR8Bzyngv/76y+CERS1ghILO2SJ8uS+44IJs6a76KQKeJrB69WrTsWNHGwYZ3VDCI8nal00KOJrBvrxnAFO3bt1cIXN48mPijk6VSzY8/ILIABgk8YQCJtkGeZ8Jn+BLTX3Tgw8+2FDMGQ9pxQAH6SunvoiACGQ7AcJFqaiE+ToRGTBggK2SJwWcCK08HkPaQBLvT5061ZQoUcKQkIP0lHhGs+ZA/cl27drl8aw6XAREQAREQAS8S8ATqSip3DN06FBToUIFm/MZj+gjjjjCEJ7EyGfixIneJaiWiYAIiIAIiEA+CHhCAZcvX95QNcdN3nzzTZWVcwOjbSIgAiIgAr4m4Ik14F69epkmTZqY/v3722xVOCNR05bsVThlTZs2zdeQ1XgREIHsJsAS28aNG/MEAWfUTz/9NE+fOeGEExQOlCdimT3YEwqYGN/FixebDz/80BDwzZf1mGOOseu+lCXEJC0RAREQAb8SuOyyy8zFF1+cp9+yvNb8xnv47bffNkuXLvUrpqxrtycUMNQLFy5sKMYgEQEREIGgEWBC0bdvX5PKmFmqXaHoJf4h4Ik1YP/gUktFQAREQAREIDkEpICTwzFrzkKYmJYEsuZ2q6MiIAIpJOAZE3QK+5iRUxNo/txzz1mT07Zt28zKlStNz549bVvImdq2bdtc7cKEROF0t3RvuQ7O0Ibnn38+Q1fWZUVABPxG4NVXX/Vbk9PaXingFOEePXq09eimeghCInNH+vXrZ26++WbDM+nu7rjjDpvlhWdKMlJ1hM9TFUoiAiIgAn4lcNxxx/m16WlptxRwijAfeOCB5sgjjzS1a9fOdYVBgwaZe++914ZZVa5c2aZkO+CAA8z48ePNiSeeaPPPknyEVG0SERABERCBYBKQAs7QfSW2+ZNPPrHm5oMOOsh8//335sILL7StIT8qRSikgDN0c3RZERABEUgDASngNEB2uwTOTJQ7q1KlirnlllsMa8aOLF++PFeVEGdfXp8pbsEatCPUVibPNvU9I6V48eLKOBYJRK9FwIMEKFSzbt0615bhQ0Le/LVr17ruZ6OqN8VEk5EdUsAZwW5sibOrr77aDBkyxPB8/PHH25Z06dLFjBgxwsyePTspLaOeKQ9HCNbHzD18+HBnk/nzzz/NokWLzMKFC8Pb9EIERMB7BEjNy/9uLEdNqsjFKuH63nvvGZwoL7roIu91LEtbJAWcoRtPYe8vvvjCej07TWAEyz8H/0D8IyVDypQpYwtaxDvX77//bgcB8Y7RPhEQgcwTYPbbtGlTc9111+W5Ma+88or55ptv8vw5fSB1BBQHnDq2cc/MrHPgwIHmwQcftPlex4wZY3O4opibNWtmUIoSERABERCB4BLQDDhD91Ze0BkCr8sGlgAx9E899VSOdI/UFGewu3Xr1nC/8YO4++67bZRCeKNeiEAGCEgBZwA6l5QXdIbA67KeIcB6Zu/evXMst+AgSL7kZ599NtxOktjMmTPHlCtXLrzN7cW4ceMMEQUUMXCkUqVKzsvwM2ZYEkT897//DW/TCxHIBAEp4ExQD10zXV7Q+e0e5SBJORnL2SO/503G55jN3Hfffeboo492PZ3jCcoxsYS462Sutce6TpC2owhff/11U6DA/1auCKXbsmWLmT9/frir7GeGiTKMJ/hAsARTv379eIeZwYMH20xye1PAnOSoo44yl19+edzzvfbaazbhTdyDtDMpBAivLFasWFLOFcSTSAFn6K7yA5UOL+j8do/1aeozk06zUaNG+T1NSj6H9YAf93iOKM2bN497bTxJUQCUwpQkRmDo0KHWSTByUFajRo1cH54xY4b54IMPbPm9XDu1IWsIYPo/44wzbG33rOl0HjsqBZwAsLp164aP+vHHH83UqVNNnz59bOzunXfeaW6//fbw/kRfuHlB89mrrrrKMzOz8847zxAWRVauzp07GzyqvSLnnnuuoX35FVJ9SvJG4NBDD7WOgtWqVYv7QZSvJLsIPPPMM+aKK67I1emdO3eaIkWK2O07duzItT/bN0gBJ/ANIPj9rbfeymF642PEzU6YMCGBM7gfwkwicjbBUXv7cXM/U2q2nnXWWeaNN94w3bp1M+ecc46pXr26ue2220zVqlVtLHFqrqqzioAI+I0AkRu//PKLjd544oknDIM1ZsBYmJYsWeK37qStvf9bzEnbJXUhPxEoXLiwefzxx83GjRutKZqi4qeffnp4VOunvqitIiAC+SOAQiW3vZuw7k4mvUmTJtmiM02aNDFk4KMQDf4APDtFadw+n83bPDcD/uuvv0yk2SKbb46X+n7EEUdYr1E8R0kYsmbNGi81T20RARFIIYF42bMIqXTk1ltvNXXq1DEtW7Y0NWvWdDbrOQYBTyjgP/74w/To0cO8/PLLduSEyZdMUKeddpq55557TOvWrWM0X5tTRYA16ljJQAgTKVWqVI5LU0YR8/QhhxySY/sPP/xguL+kuoyUzZs325R6J5xwQuRmvRYBzxAoWrSo2b17t2t7+G1iUCrJTQDHq3feecfWP+e1JDYBTyhgHJn4Qca5ifAcfsSJBySIvkOHDjbBeLt27WL3QnuSTgAnJzeJFVbw5Zdf2nUfBlKJCOEsCxYsMNdcc00ih+sYEUg7AXweYgkzPUlsAoT5Pfzww4bfdjfBnM0gJtvFEwp41qxZ5sMPPzSRxZsZXeKQNGDAANO9e3cjBZz5r+rewgq4f4mu9RQqVCjzHVILREAEUkYg3u8FMd2JxHWnrHEeObEnnLDKly9v5s6d64qEbDnHHHOM6z5tTB0B1nUIH4h8EFDvrM87oQWpa4HOLAIi4BcC+r3I353yxAy4V69eBs85Ej+ULFnShuaQiWnVqlUGpywSL0jSS4AsUpiVWQdWWEF62etqIuA3Avq9yN8d84QCJlZs8eLF1gz97bff2vVgHCDwpCOtHCkRJeklgNWBsAIyYTE4Ih0giS+csIL0tkZXEwER8DIB/V7k7+54QgHjcMUPPflkSdGI8r355pvt7Os///mPefLJJ62DT/66qE/tCwGFFewLPX02GwmQkIKlmkjBkrRp06YclZqIrT3ssMMiD/P9a/1e5O0WekIBP/bYY+arr74y9erVM3fddZc1O0+ePNmULl3aKmSqnCgUKW83NplHK6wgmTR1rqATIA42OvwGr+Do4iAs8VDMImii34vE76gnFDCmzo8++siGH1FZhWo3TkpGchAzK5YCTvympuJIJ6yA0AKJCEQTIIph2bJldvOnn35qqw0xqCamn2Iexx57bPRHAvuemS3lDvcmbrmT9/YZv+zX70Vid8oTXtBnnnmmzbWM4xUB3JFJG/inrly5ckK9IeED5h+3ByXqyOAkEQERSD6BBg0a2MQUhA9ef/31pkqVKvY9PgM48Un8SQAn2L0VUaBADb+9krwT8MQMmGxXN910k/n6669N+/bt7foJSrlixYrmvffeM/PmzUuoZ1S4IcGDm5DL+Pzzz3fbpW0iIAL7SIBQQvw1ooWZMY6VEv8QyGtmwlatWtk0tWTPk+SNgCcUMOZmsl6RzhAHLBwWZs6caRhZjRw5cq+FvZ0u4zXNw01YRyYtokQEREAEEiXADJDfDYoNZIvEykzIBKdNmza2/GpkIp1du3aZOXPmGMzOL7zwgq1zni2s9rWfnlDAdIJQI5Qvws3FpOVnwYubL6SbUOxeIgIi4H0C33zzTdgZtHfv3va3yfut3rcWxspMiIfzihUrzIgRI8yoUaPCmaxQys2bNze1atVKOGkSS4LMtKPLse5by/33aXcN4aF+fPHFF+bXX3+1dSU91Ky9NiVeeIEye+0Vnw4QgbQSYJ2a1InRgkMouelxEiUOnogNchOwth1UcTIT3njjjTm6SJ/J4Vy7dm3z0EMPmaZNm5qOHTvawjk42SWahpaTjh8/3mY/fP7553NcI9veeF4BYzr+7rvvzLBhw7Lt3qi/IiACaSLAOvWzzz5rl7AiK3qxDIZ1jqUtlA856bFs8UzYJCE3Bx54YJpamZ7LJJKZEIVLWFXdunUNJuh4AiMGMpHC7BfzPoq4YcOGdqkxcn+2vPa8Au7atWu23Av1M8kE8HrHXObm/Y7PAf/8CxcuzHVVKkHFqgaV62BtCASBZ555xvzzzz/2EblEtHr1apuhjyUlhJAq/FOYuf2///f/bMTFhAkTzJVXXhkIDnTCLTMhVjsGHZiZncyETz/9tJkxY4Z54IEH7Cw4FgD8eHCybdasWdhHZ+LEiZZrnz59cpUwjXWeIG73nAJmVOQk/A8i8HT3idFltsobb7xhXnvtNes4Es0gVlw5P8J45ZOVTZJdBFAG1LQmjJFYXjfBDEv8rhPDSxnVSIckt8/4cVvhwoUNCUUcYRDLTNdRvs52zPEVKlQwxx9/vLMp13PNmjVtwpE77rjD5nQYOnSoOfrooy3jvJitc504ABs8oYDz6vYeAO5p60KLFi3Sdi0vXqhx48bmhhtuSLhpOIe89NJLCR+vA4NDAKWLc1GkMPOjHGosiSyhGusYv21nLbxfv36G2T9Kk4xdPG/bts16g+OYxqBjzJgxBnM1x5G1kJkws1w3wdmK/6uxY8faWTQhofvvv7/boVm1zROeBLi9r1y50kydOtWQF5pZCHG7rPsOGTLEFgLIqruizoqACHiCAOu+bqGNzJLdnLY80eh9bASlBcm9wDovg9eePXva5RpCjVgTx3zMQBUHrL59+1qLwVNPPWXuv/9+s3z58rhXJ0kLXtasCQdx8BK38y47PTEDjuX2TnzwgAED7AiU9QeJCIiACKSbwJo1a2yp1IMPPth6/5IsiJkyzlh4Twftt4nyr+SoZtZ60EEHme+//95ceOGFBg5HHXWU7Tsho8xinbVvnLFIyEHipLPOOivuLTrxxBOtEt+b81bckwRkpydmwI7buxvTN998M+HYMrfPa5sIiIA3CZxwwgn2Bz1ZrcNBikcyhVku672sieIpXbVqVUO+esIjp0yZYiu1RVc+Sub1M3GuEiVKmM8//9xe+pZbbjGRy1j0FaVcrlw5m6KSXN8Iz8QI48AVLTB89NFHrSMWecIxXTO7xrrAEhGJl7JVPDEDTsTtPVtvkPotAkEl4Ja6cl/6Sh7qZMuSJUvs+iZrogiTBUytp512mn1gosUkSyKKoAjFb66++mq7/Mez42BFmBbKlOVCQoswQ+MBjhJl1gt/BijRgkl71apVNqc/vLAcEIHATBgzNvzy4qcRfX4/v/eEAk7U7d1PoFkjwmQVGbBPUnMSdMTKkOWn/mWyrSQIcEbemWxHtl27U6dOWRcywmyQmS/KhlkwfilsQ/gOoozq168fqK8COZ2Z4UebiFm/JRsYv2t4RTNLLlasmO07a8CYod0klkmbYx955BFDqKkUsBu5NG6LdntP46X3eilGvdHu93wIZYobPV/E/v37239S1oTwmiT+FCeDSy65xGB2wTkBb288/xj5EcRPVp1sT8W2V/guBwQt8YFLFz25ifW/bBPWOjFBM+OlvKJTJpWZIBMHwmncZn1+58TvUvRvk9N3+sbvGGZoR2IpX/Y7Jm0qZGHSXr9+vfMx+7t4+umnh99n2wtPzIC9Dh1HMDcpW7as4YH5pUmTJoZyisS8YZ7BYYH169tvv93WBr344ovtmgfrHXh4E/hPjO7bb7/tdmptEwER8AiBxx9/3JCIIzJDFoPAeLM+jzTdE82IZdLu0qWLnajMnj07RzuZXW/YsCHHNqyHzLzXrl0b3o51kcmMn0UKeB/v3tKlS03JkiVN586d7ZkmT55sMNWxboIZh0B1TDnO7AEHBo5nplyqVCnryFCkSJF9bIU+LgIikEoCxLlGS7xZX/Sx2fw+lkn7qquusuZnFGukvPrqq3ZdODIZCuGpmPvxsnbkgw8+MK+88oqvy8xKATt3M5/PKFMU7aZNm2w5RfJW48KP236lSpVsJqaHH34419mXLVtmy5zFK9qQ60Pa4CkCmNLWrVuXo03ESS5YsCBHfmAGY9me8ScHJL3JOgJ7M2lHAiG7GKkrnRCnyH2Rr0kZyv+fn+u8SwFH3tF8vGaUxpcFM/Tu3bttphcSi2BWwWWftSJKK1JJhVmwY4JGAZOSLZUOWdQxZXadyms4yFgTCmJKPqd/bs8XXXSRufbaa3Psqly5ss2PG7kRj08GaRIREAERiCQgBRxJI5+vcbP/73//az2eMTHzI0yMYJkyZWzaNczUOGoxO2Y/8Y84ZyVz9kusHmsqXJMfe9ZdyFzDAIFwD5zEUum8xACDRzYJ95GE/HsT7r9EBERABKIJSAFHE8nn+0gHjeLFixuKVztSsWJFwyNaqHMcvf4RfUyi7wmCd8IGCHpHEb/88ss25RvKmG3dunVL9HQ6TgREQAREIMUEPJEJK8V9zOjpWf9lpsRa8euvvx5uC55+kUo7vCMJLyiX1qNHD2vyxtGLWDtyu0pEIIgESJVIBIJEBPxGQAo4hXfsww8/tOuB06dPt0qQMnfPPfdcyq6IVyDOYMQlUrnEEWKQ3VLEOfv1LALpJJAMhUmYCt61ZF8irSHhKCSFwHGHpR6JCPiBgBRwCu8SIUmU8aJeJtmbcKEnk0zkTDhZl2/atKldd8bUTeYZJyyKmTAmaLeKLsm6drrPE7Tcu+nml9frsVRCZrd9kWQqTDJQ1a5d25CZidAUaj6fc8451s+CcngUjdeMeF/ulj6bLgJSwCkkjTJkFuzISSedZJNz4CWd7AQczK6pKsXs4uuvv7brvXhBE6tI/U4GAX4U+kSuWWT+/Pm2P3h2E9qjur2pu6M48l133XU2GxJLJTgMUhuX71NencqSrTC5PksrZFVi5osiPvnkkw0WIAa6RCVgdZKIgNcJSAHv5Q6RA3b79u05jsLZCVPv3vIRYw5DaUSmcCNciQTm/FAkU/CCdiqYMFshGQgKn2xbFMomHMpvQupOMhCRTQzWlH0j6T2xtqNHj7aDjLwqA78xyER7CacjxIr4ys8++8yGzjlJ+Nu0aWNnmHz/ExUGhclUmPhTMPMlOQNC/fCPP/44nBWJbEmpKMyQaH/JeicRgUQISAFHUHrxxRcNFT8Q/qmp+8lMCycqMlqhDBhxE9pDblhmYswsCTlyE2YNKAhS1kUKa7Ssy953332Rm/fptZsXNCkvmRWQTB4vaL/JokWL7Ho2P2jMyMgXjikd/iQ6Yabz/vvv+61bnm+vM8NkEMf3vWDBgjaWnDVWkuYz85w6dWrC/aDsXDIVJv9XmJpJbnLNNdfYZ9K/kviGtjEQzWSWKooLSEQgEQJSwBGUGNU7mY0wYS1cuNC8++67drbLrOCNN94wb731lv3RZ50Jb+Phw4db5TZu3LiIM+V86ZaphVClPn365DwwSe+C4gXNj+pHH31klS8J21nXc3LBYpkgzjmbE7kn6euS6zTkN6d8HEsY0cJMmO+XUwXH2U+cuZuQsB8/iGQrTDz7GXQST4/PA/4WCMqPQVkqY97d+qltIpAfAooDjkFt0qRJdgbsVPzgx4hMVqyDnXnmmXYfqdBYh0QJ84/PmlkmhdkuM3bHC9pJ9OFXL2gqzRDGVadOHTu7QdmilGvUqGG+/PJL++OLJ2xQBHOvM6PnXrL26phZmdFhdUmHkDaQmHEU8bnnnmu9jMmmxlIM7aI6EHl8I4XZp5s4KQhRmCwhYC3CMZClGIT/m1NPPdW+zusfLFNt27YNf4ylCb7/EhHwCwEp4Kg7tXr1arvuRVUjTLiO8OO/ePFiQ3kyhHy/rLEihD1kcs2JNjhe0CgsZoqYa1kn5ceOgQKzxVQKM1LSUbrNPP76669cjjvMZPGuxczsCJ+P/iFnbZui4MxysE6QbQvrAYMMPF+DJNzDDh06WIYoOUewvnAfWSJJl5DZDf8BSmmyhEEbUG5YbRgE5UeiFSbnyK/ydbt+q1atbEa6IA3K3PqpbcEhIAUccS9xluIHhx8efuxx/OEHESVBIgsUCc4p5HOmNijHshbGKDwVoUURTdvrS7ygeSCEfDgzJ9auWctj3TRZgvJk5sIaOetwOHmR7xjzJGuEQ4YMseuGzvWwEjBzihyk4MiGsxiVTxzBtPn0009bxs42nvlcsp3WIs/vldcM8nDcY+YfKQwKWQ5Jt6BwvTijJJwvegYOG75TDDSZrb/wwgt24JZuZrqeCOSFgGcVcDoLCTjAcOzhgaAccBxhRsePEBWOCHVgrYn0kZdddpndx37MoeR49oow0+CBMFNMtmCehAXmeZy7GJiwHsfABGXMLJyHI8zIKUjBI54ws3Jbd4z3Ge3zPgFqY8fzkaAHDHpJn5qIVK9e3eY9Z6BHXW1nmQgPbbzk8c9gqUgiAl4n4AkFnOlCAm43iRmj4zyFsnGEmMNIIQsPaSVRPtlSDeidd96xnqZ4xxJSNXHixHAICIo3cl0ukpVee4cAJm4KmiNbtmyxXvksWxAzThKX6ILo0S3Pi8JkLXvMmDE2Rr1Ro0bRp7LvMXEnKrSbAig4YBGZgOm+Y8eOdmDM/6NKPyZKUsdlmoAnFLBbCI3XCglgkn7wwQetJzSm6sGDB4c9cBnd4yE9duzYTN/PtFyfmE7MyoR8kJGI9VmSiyBvvvlmvtcI09J4XcQSwExLBqlowcufpRWcEInfTYbCPO6442wWOBLTkFQlWXGy+AvgtEY4H05qTjGS6D7pvQh4lYAnFHAkHNYBMeniwYvXMd6TpFLMdCUfwixw/sEUzbolZi7WhVFG2SY4ddWvX996f7NuyRrziBEj7IyKtWdmyBJvE8B64VaJC0c4PJdZZ02mwsSiRLQA341kKWAIM2vHb2DGjBl2+cOtT96+E2pdNhPwjAL2eggNszy8oFnr7dWrlw3RYB2YH6psEzIRETLDbIkEGZjoSUrCGi5ORMyuJP4mkAqFibWERyoEZ0MeEhHwEwFP/FJmOoQmkRtGTCSz3wsuuMAeTpF7wpRYS7vtttsSOUWgjtlvv/1sNRqFfATqtuboTCoVZo4L+fQNzoX8H2AxkIhAfggUyM+Hkv0Zt0ICXIMRrVcKCeBYRKKNyOxVmMZJhYcDSKqFsCfiXmkDuZGdZPm8x7uYHwOJCHiVgOOo6NX25addLMVgDcIRUSIC+SHgiRkwM0m8F1l/SnUITX4g8RlmeoRTUGkoUrp3725Dl9iXSqGqEjG2rVu3znWZLl262HSZkXG2uQ5K4oYnnngibnEHwknwkpVkF4FsdFQkMQz/f0QC4D2eaChVdn0z1NtYBDyhgFk3xHkCz+cSJUrEamvGtxNO4aTQi2xMukx1JGhwi29M95ormZEoWkFhBJhEi1sbo4/Re38SIPEMqU3dBA94BtHZ5KjI7wEREDiJYqEiRpklKeLvKWQhEYF4BDyhgGkgX2S+vIQUENuXrtlcPDja506A5AdUi+KBGU6SPQRY8+zdu7dNGRpdY5rENcTJMwvMJkdFPLEff/xxG6Y4atQo07dvX5t6FYfNHTt2ZM+XQz3NMwFPrAHTarLY4FFMFSJGjqy58t5JqZjnnukDKSXAWjj3hoxhkuwhUK9ePZsNbv369VbpMABzHhTNoOqRIzgqEh+Oo+K2bduczYF9ZtJADm3ShhKTnIn0oYGFG9COeWYGDF9iSgltoZ7n888/b9PKkXeYuq/Dhg3b6y147rnncuQWjvwAKQ4psCBJDgHCVBjtS7KPAIqWJCwUJImM6XUcFXFKvP/++y0YHBV37txpHRWD5heAXwgZ8NwEU7xM0G5ktC2SgKcUsNMwTFj9+vWzD0aSiY6eb731VsPDTchWpTzDbmS0TQTyToCscNGSaUfF6Pak+j2lGkm9itkdMzRWoWuvvdZeFq9v/CMo6CLJGwGWI2MVASGTGkI6UixwDHRIc7p582a7hMlSgJ/EEwqYdV+ntF80PL7Ebo4+0cfpvQiIQOYJsEYc6aiIImIQnS5HxXQSIDKBDFwsm7H+TTglNZNjTQLS2TY/X2tvCVXIqEbin0jBajpgwIDITb547Yk1YErNFStWzBUY8a5koJKIgNcI4CyYjcKsj3BBYmAjy3BSE5rBMktG5JNmZkJ44ZFHHmlzQMcy1/qVIZXR7rjjDoMzGr9h+KzgoBbJxK99U7vTQ8ATCjheVzEdDxo0KN4h2icCGSEQ6XCUkQZk4KKRsz7ytDPrw/cCwZKFoHSJWe/Zs6etE009Y0pWEisbJCFXNjwcwYpHKBaOZ2+//bazWc8iEJOAJ0zQMVsX2kHhd4kIiIA3CETO+pj54djIg8IpmGIRakQTgkM1Jae+Nsqa/2UUc1CE/AX0lepojiLG/E5mLIqVSERgbwQ8p4AZKeM1GW3j31tHtF8ERCD1BJj1UTgFkyvizPpwwMIs7Qhe0oQqOULyDqIcgiRUbFu6dGmucCOScNDfp556KkjdVV9SQMATJug//vjDpnPjn5kyaYymWUsqX768GTlyZAq6rVOKgAjkhwCzvvnz59tZn/N5Z9ZHGVGEWsJ4sVapUsW+J1UjpmrWhoMo559/fq5uUbo0Mm98rgO0QQRCBDwxA2bNBDdy0tyRihLli4s5Je86dOhgcO5o166dbpgIiECGCcSb9eEwidJxFK/T1Kuuusqan1Wr1yGiZxH4l4AnZsCzZs0yQ4cOtd6EJHgglIGsMqyt4FoeNOcNfflEwO8EYs36MLsy+4sU/o+lfCOJ6LUI/EvAEwoYU/PcuXNd7wlehUru74pGG0VABERABHxMwBMmaBK3N2nSxPTv39/GFlLgmvq2q1atsuEL06ZN8zFiNV0ERCCvBDZs2GCXoPgcvwP8JjiOmWeffbahMphEBPxOwBMKmH8okm3gyk+pO9aDmfWy7lurVi1rkvY7aLVfBEQgcQIUcrjkkksMpTZJ5EHqwY8//tj+PpAJiSQfkmASIAoGX4NsEE8oYECTS5Uk7xIREAERIJnH7bffbooWLZoDBp7WTzzxRI5teuNfAvj/DBkyxNZUxrueLGvz5s2zmREfe+wx06JFC/92LoGWe0YBJ9BWHSICKSMwfvx48+uvvxpC4jB/vvzyy/ZahMQReuMmlGLEaTBZ0r17d9c62FTVIYxHIgJBIsD/GulcX3jhBVu0AosnVbRwul20aJFp2bKlIe6cR1AlpgLGDFCoUCEbl+t0nnCgTZs2mdNOO83ZpGcR2CsBwlKiZzJ7/VAaD8DHgCoqt912m13uuOuuu8yff/5pW0AMK+uP77//vn3/wAMP2BkYo/ZvvvnGFqZ/9dVXTalSpfa5xTgjuglZpUqXLu22S9tEICYBwjojZcmSJfZ7Hb2des2Y+NMtKFmSllDSkmIKWEFRuggZ1Igb5/8uqxQwo5J//vnHxu1RlSLSLMzIhJlB9A1M943T9fxFoEaNGp5uMCXjrrvuOtdEEWvWrLGZjlDSfPfJcETI3CuvvGIom0lNZOJciYGViIBXCJBDnzXzyAgSrDl818mv4MiUKVNsmdZWrVo5m9L2fMYZZ5iPPvrI/u+QJQ3HW+q/n3zyyTb3w5w5c4xb2cu0NTANF8o1Ax4xYkQ46cXTTz+dowksjGOXl4hANhHgh4yqN6eccorBY79169bGiYP973//awYPHmx/xCJ/7LzOJ1axeEzqXrZWeJ2rV9pHCchy5cqFZ5Sx2oWy27FjR6zdKd2OJztrvkzyKlWqZFOVopQZsLPWjxMeKU6DLLkUcNu2bQ15XFG+1atXNxSdRkiOQfFjiQhkGwGys2FmZqaLVz6FBe69917roUuIDD9gflNasWqnUrdXIgLpIoCT1dVXX22wMK1bt86cd955NhUxStgZ5KarLZm4Ti4FTCNw/b/77rsz0R5dUwQ8RwAnLH4cmFHwA4GZjMIDrNkuW7bMzoAzsYbmOVBqkAjkkQBLPOR/IFPaQw89ZNq3b2/9MdBBeLsHPQWxqwKGIU5YwKCwNuYMR+rVq2dnx857PYtA0AmgXFnzZbZLJaCyZctaixDFQ1DOyfSEDjpL9U8EHAI4OuIA1qBBAxvfjUMWyzz4U3z99dcGayw1t4McExxTAeMVyqi/b9++ObLOONloHIh6FoFsIMBaFZ7PeGpScODaa6+13f7777+txQjnFokIiEDiBPDKxru/X79+9kNYlAhDIsqGB7Wjcfxt3rx54if12ZExFfB3331nOnbsGPhFcL/cL0w01Ep2EzIFYbKRpIYAns8zZsywBedXrlxpY3K3b99ubr311tRcUGcVgSwggG/Ft6HMh4S3MrAdNmyYrYZH1xnQ8r9Wv379QJOI+avduHFj89prr1kTgda3Mv8duOaaa2I24tFHH425Tzv2ncB7771nvaArVKhgK3bVrFnT8CCso1GjRvt+AZ1BBLKQAI6LmKCZ7X711VfhGtOYpp1835ilgyy5FDClwxy3dKCQIYiQBbygEWKDKTkmEYFsIUBoBHnKb7zxRttl1n6p0kWIBGUzkyHMAsjEhVJ3BIsHvhha9nGI5P+ZGFN+zxDWF6k3TiII5NRTT/WdF7tteAD+sNRJNixqwDty4IEHWh1Tt25dZ1Ngn3Mp4EGDBsU0dUIh8gcisFTUMRGIIMCgtFOnTnaEjiJGzjrrLDNhwoR8mchefPFFq1iJLSbpTYcOHayT1+7du22YE44o06dPtykxMcWx/MAsgXSUxCBL8k4AxvzIk90PznAntAxFzHIbOYklmSHgluUtG5QvtHMpYKb+EhFIJgFyJqNE/LqUwQ/30qVLbUasSC6Yx1gfzqtFiHSujqv9xC4AAEAASURBVJUJRbtw4ULz7rvv2tCmyy67zM62MXuzRsa1URJkL0KJMFMOemhGJONkvS5YsKCN6ohOGbpt2zabgjRZ19F5RCAvBGImAO3du3fYG83xSuMHAU81PEBJWC8RgWgC5HQlpzI5Xbdu3Wor2pAhqmTJkjaFY/TxfnrvlhigePHi1is6v/2YNGmSVQzEGONM9/3339uog+OPP96GN7H0g5mbWTjJM/AKlYiACASDQEwFTEYcPNM6d+5s/+l5xjZPgg5myUH3TgvG7U1/L8ipzJoaiglHJZJWEGxPzlmyq+E9LDFm9erVdlYLo40bN4aRMEP7/fffXdckWXdORrpLQqeYVUtEwG8EGARjzYgWYoVJZ+k3yWWCdjrAOlW3bt3CjidUpEAhf/LJJ/aHlJH4li1bDHU7JSIAAcy0zHQZrCGTJ0+2a6fM5nhQZJ1tmUj8bhvkkT/MZklw07BhQxtrzzov5mWcglhj5v+MHxpYUokJByKSgOCURcq+vAgepcRZovBZcybHLs8MhLj+6NGj7bpoXs6pY0UgUwSIE3YTMtNhefObxFTAuIg7XoNOp/jndUYf/GNTJi3ZIs/PZBNN3/lQGGSxYY2TH3icWxiwUVoMYb2UvK/ZLvBwmLA+Trwj1iUGKfAiQT6KmBjJzZs321kv677koXaiERJlSM5qlHflypVtYgPixZ3IBjIQMRiKVe6Nak8oaYkIiEBqCMRUwPxjEnI0d+5cW5BhwYIFdhSNswhmaMyMjM6TIZRA7NGjhy33xtqyPD+TQTX95yAlIzU88RDG03Ts2LHmzjvvNLNnz7YhNvz4E0sr+R8BmDlryyhehHCkyDKgOF7xP5JX5cu5mDGj1PlfZcDMGrOj/LkXY8aMsTPgyDAQPockw9z975n0VwREwI1ATAWMazhK94033rBreIykKQ+FeYy4LZxGkiX8SDPSp86wPD+TRTUz5+F7Qok+PJ75wWfmxfovsykUDeuPqqplbBm2WKktUYYMQh1hxspA+Pnnn3c2JfzM/xOOcVWqVLFVztavXx/+LH4ceFdznwYOHBjerhciIALpIZDLCYsfyfnz5xuyK11wwQW2UgXrdsQgMmK+6667kqp86SYxeBQ55/zMCOT5mfvmY6rE7O91IWUjMXw4YfGdIYkEKRsxn5LIggFdtgvmZSe1JaUN4fTcc89ZLE5MJNycB/xefvll+z6vccBYqzD7422NiRtFjHTp0sVel/R/OGTx/ZKIgAikl0CuGTA/BJiXWc9zK4aciqw8hDYxwncyDUUiSJbnZ+Q5vf7arUTXqFGjbL5nL5focpyJaCMJWyhgQApNHPZY45T8S4ABLY5QDDh5RKa2HD58uCECgTzshHIh8ENpUwTCzVT871nd//I/zLp8ZEUzjqS2cdeuXW18Nt8tiQiIQPoJ5FLAjkMGsYekoEQwDx999NEpS/jfq1cv06RJEzvbTobnZ/oxJu+KzHJjlegicw81M1nHI6OPmzzwwANum9OybfHixebMM8+0Jfq4IKZn7iuKRD/y/7sF/I9R1tAZcEamtnzppZfsgThAMnvFMsT/HpYhMmTlR1j/jfbXwBOb5QBCniK/Sz/88IOdaau4R35I6zMikDcCuUzQzsdJ1YZ5jBE6a79z5syxHpH8gyZbiCvmx5sRPiN2ZuCYMYkbXbFiRb5/eJLdznScL7JEF05MmOOdEl1wwlyIIqZUpNsjHW2MdQ1iWFlTdEzlrPWiUPCKxswaua4Z6xzZsJ0awizzoAQdcVJbOkoZbpRjw3SPE1t+he9IixYtrALn/zgysoGZtuOVziyZdjEYOO644+wM3bmP+b22PicCIhCfQK4ZsHM4pui3337brtlR8eWiiy6yMZxsf/DBB53DkvJMIgLiiSM9P5NyYh+eBKcZwk/cSnQxM2YNDxM1Sczzao5MNQ5maZg2+REnxSIDBmZXmFwZWOGNy4w424WkAYmktrz++utN9erVbepJxzKVV3b9+/e36/GwZ+CGQp83b55hsIRD1umnn25Pic8HjnKsNZPBjNk328gFIBEBEUgNgZgzYMKNSECP4wbCGh7JAlDKyRZG3vwwUKUk2yWyRBfrdsySGJwwG2GWhGmQ9UM81L0oJH3AvOosX9BGFA7fJ1IpMsCQ/EvACT+K5BGd2hKOmPKxRuVHCEMixA/lylLPk08+acg3HZ1KdubMmfY41u5RzlwPRS0RARFIHYGYM2BmMfxoYqZyBE9KfiBSISgXRvv33XefDXNKVpm3VLQ11ef0e4kuNyXLwIHwJEl6CZQtW9ZaHohoQMhGhsUJawpJb5jtskRAYQkKEzBYQkiaggUjU3LLLbe4WngYjNIHSXYSwP+lWLFi4c4zSWFZjmWT/MTJh0+UoRcxZ8B4YeI4w8yUf1BmYni3OmkGk93eNm3aGCrAYLpk1N+2bVv7Pltz1jrhKJGcY5XocirrRB6r1yIAAf6PyM+Nf4UjmJfxTkcR87+NeZuZsvO/zYyZYxwvbOdz6XwmCRA5B6KFgbmWqqKpZMd7rIDU5sZ5kIkaeoJlL4oFEZ2DvxLWIj9JzBkwI00cal577TWbUN9Jn5fKJAqsR7311ls2cQBJB5o3b26vTXYl4hX3JqxfkbTATTC5OTGQbvv9so0v2Ouvv24tE5gSWVsllSHe48TY8iWUiIBDgO8HPgPRyzvdu3e3GbHYR2Id/j+cwS7Kj+UnftwkIpAJAiT0cUs8s3PnTmsZwaETh1UmHwwYCXkk+Q95KvCfYdDpB4mpgGk8/4D8c6ZbWK9iLZEHJgZMY4kI0Mlv6ybEUpLU3s/yzjvvhONDsUb07dvX5vJlpvLKK6/YNT6UcyJCqkjOgdkGUyTe7ZSgRHCcIjtZZHhKIuf06zEMzAifchM8iCNNXm7HeH0bznos8UQLy0s8EJLZ80AwR0tEIJMEmOFSe4AwOX6n0EXMgFkWwdzMZMP5n23fvr3N5EbkAHH0xLf7VgE3bdo0PBJ2uwEkDYhVkcLt+ES2AZs1ZzfhxyNRb19MVm5mK85LQfjoZARu1/PyNpKSMHPBMkBNZjzTmakw8nMylTGLiY75dOsTznQff/yxdehiP+vvjjDLxmu2Ro0azqZAP5PzOFbeY7elgEDDUOcCQYD/Zz+uiTrw+X/E54ioGyInBg8ebM477zz7W0fZQSw3zqASh1RHf1AAxk/+Q7lmwKz5MoVHnIosTmwi27DBJ1sizx99buITSU6fSYeQ6DZl6j3KYNmyZfbyjP4GDRpkv5BsoPoQayOOE00ibWQwRWxotBCjKhEBEfAvATzZgyCkYWXNHysfv1cI0TiEOxJVwQyZ2HbKe1JLAJ+HRK2AXuCTSwFH1lRk1ogSxq6eKRk3bpwta5fIGnCm2piu6zZu3NgqTCpVvfDCC4b4bASFyYyY+Gw/j3rTxVHXEQER8A8BJn0sv/Xs2dNOALHM4UBIfgF0FL4whMnywGydijK5qaKVSwGn6kL5PS/2fMm/BPD0w2xMcfVIoYwdZhgcsSQiIAIiEDQChDHiaMUDYamRRDWOYP1D8fpJ+dL2mGFITsfS/YxDkMJqYlNnhhttXsINX8o3NjPtEQERCA4BnLHI0kZ0DKZnaloTtXPkkUcarISYpf0iuRTwli1bbGwg8YF4DeMty2vn8eOPPya9bxQbpzwaC+kFCxa0lXRwvKJK0siRI5N+PZ1QBERABETAnwQIUSJLG0qXfOmYpgk/xTLIBI6IF79ILhN0uXLlcoX9MMJwBPduigQkUwh5oeISi+hkUUL54s1LHDIL7jiFtWvXLpmX1LlEQAREQAR8SICkMURpEO2ByZnsWOSpQEihyrIlitkPkksBUz0lnqQiEcesWbOssxfxXY7gSk72LTzdCL2RAnbI6FkEchJgQExiAuLlKeRB8hyEuF7HczTnJ/zzjggIHG2ihTU/ZjvZEqse3f9sfs8k7fPPP7eJlUhZSlERR0ih6hQYcbZ5+TmXCdpZyI71jIk42YKpee7cua6nJfY1Voym6we0UQSyiAAVjp555hkbhoZ/ABYqQtJ48OMUWX7Qq1hwrGGwgB9DZAgJgwmsYSRhiC6lSHQE8fCS7CNAmlTKaBInTLEgJ8Mhy5jkQ2Bt2C+SawaciYZTpYVga0qn8U+IaYH151WrVtlRLiYHiQikigC1r5lpuaVeJPVdXmKrU9XGWOfFIYVsdYSmRQup+pgpelkIc5wxY4bNAU9KVX5AGTwQ//nUU0/ZpruVUsxrn+rVq2eoNBYtzK7Jiy3xDwHSq5IfIjqxErHBmJ/dLCZe7Z0nFDBJNhYvXhxO/MF6MLNezM4kBlFsq1e/Pv5rF74FZNchdpqRNAoWBYbnJOZawh2IL2Q/MzOcPUhF+dhjj9nC9v7rsbdbDGvKa5LDnAf3gAdlESnMgjilFKnsRMwnRVvyKsyY3ARLX7xEQG6f0bbME2CSFp3xjyVLv4knFDDQiOtSlRO/fX38114UKWZZZkQkmGEdESXghHJRMpH1VAZ/pFzFo3LRokU2Ew9Vg3hIkkcAntSPdpQgkRAsOzHLwRpGYgVHIkspRiYMcvbrWQT8RiDXGrDfOqD2ikBeCLBuRGgbCpaYQfJpM3L+5ptvDDVz8bzHvMWAkPR3mKXxsGRd6f3338/LpXRsAgSuvPJKa22InL2Q43fChAnhtKtupRQplyoRAb8T8MwM2O8g1X5/ECCHNiUvsbaQ3o44d4R0d8x0cWLCixIfhLVr1xqyjBEGN2fOHJvq0x+99E8rWV9funSpWbhwYY5G4+nKQOjxxx+31gpnJ1W7SLnKoIiE/NkuMMAhLZ40bNjQmvHjHaN9mSEgBZwZ7rpqhgg4XpLUx6WMGWvAKGVMoTgs4YWL9y1KGKVM3lnMoHjhYhaVpIbA+eefHz6xU3iddVs8W1kDRhmzZs9ACKsE5micNrNd8BInTwOxsE5u+GgmziAzerveZ56AFHDm74FakEYCmDoxM+Npi1cszlczZ840ZHjDNE1WNrzuiYen/Fnx4sVtfdxzzjknja3MnkvFK7xOiBWmaAZJpB5EEb/88stm69atVhmzrVu3btkDy6Wn5E7AKY0BJEsqF198sctR2uRVAlLAXr0zalfKCOBV74SkkMiBdUhCGpzYd8chiAagmP0U1pAyaCk6cbzC66eccor1PufSDJKwRGCyxkOajEfMiLNdAcMGiwCF6FlSkQKGiH9ECtg/90otTQIBzJv9+vWzeWMJf+FHnWdmxKyVjR49Okd2JeJr8YyW+TkJ8F1OEa/wOkl/cI4jD33VqlVtpi8nJpuMR9lSI5wKaM4aOR7jKFxy9hO/zvcSy0Dt2rXtwwWxNnmYgBSwh2+OmpZ8Apg8SfBSuXJlmy+WuF8SuRN73qxZMzu7isz2xsyYdUeOowZzrHjS5Lc0u87oVni9adOmZsqUKTYeG6c4PNMZIPXo0cMMHDjQ3pdsoAQbQuKodxuZNITkMXiIq2CNf78FUsD+vXdqeT4IxErkzmyCWcbQoUPNqFGjDEVJkDZt2tiUhySEUUrUfADPw0eiC6/jMMcD2bBhgy3QwuvLL7/cdOrUyc4EeR90IUadmS6m90jBMY3vM7Ht8URe0PHoZHafFHBm+evqaSYQK5F7gQIFDI5WeJI+9NBDhtkXsaas/1L2jPVISeoJRBded65IrmgeCObovAo1xosUKRL+GAlYcO6K3Bbe6aMX8JIXtI9uWFRTpYCjgOhtsAk4idyHDBlizckkc0cIdxkxYoSZPXu2DUHCOahu3bq58s0Gm04weofZmnAy1kVJXcnskTzTDL6I/8ahixk1Gc8YYDHDZKbdunVr3wGg/fKC9t1tCzdYmbDCKPQiEwQoos0aFg8S85N72Xm/t9KY+WkvP8aY7pwKKs45SORObDCVuVhrfPrppw3rxSThkBe0Q8n7z3gCY7kgjvuJJ56wg6gWLVrYNX5yemOype44OcFZdti4caMZNmyYYUA2ePBg73fQpYWOF7QytbnA8fgmKWCP36CgN+/666+3P4TUmSaEgjhdXhOPS8ajVAhJ3InvjRSuG61oWWukopDfa+pG9jPor8kjTf1wlhHIEIXpmfVilhFwtKPO+KmnnmrXjwlHi6w7Tt5vvwqzfYVk+e/uyQTtv3sWqBYzU2nQoEEuBycKJlCGTiICeSFQunTpcA5pMpwNGjTIsL6PlCpVypqinVCmyPOq7ngkDb1OFwEp4HSR1nVEQARSToBsUKQNJX6bsDEnPSPlJT/66CNDiBlpL1V3POW3QhdIgIAUcAKQdIgIiIA/CODVTOIKfAsihaIaWFTwpMbXgHzfmKRZ7yfUjKpYqjseSUyv00FACjgdlD14DbxBq1ev7toynFhI8i4RAT8SQJFibo4UPJ1xxsLbne9+rOIOJLuQiEC6CHhWAVN2jNEscW6S5BPA4Sja6ci5ipMn2XmvZxHwCwE8nElT6SaEmOF09cwzz6i4gxsgbUs7AU94QTMy/fzzz23nGZmSHP+kk04yVPogTy/5eyUiIAIisDcCzH579+5tQ40IY4t88DuC2RkhFpiUlmSXYrZMcQdC4CQikE4CnpherlixIpzwQGXH0nn7dS0RCBaBevXqmcmTJ5ubbrrJzJgxI8dSCmu/pBTN9uIOwbrj/u6NJ2bAkQg1Mo2kodciIAJ5JcAa7y233GIWLFiQ46NOcQdq55KQo3PnznY/M2EypLVs2TLH8XojAqkm4IkZMJ2kzBZpAbO57Fiqb7bOLwLZQsAtiYuKO2TL3fdPPz2hgJ2R6cMPP2yyueyYf742aqkI+JvAvhZ38Hfv1XqvEPCEAtbI1CtfB7VDBERABEQgXQQ8oYAjO+uMTEmCT3k4xeVF0tFrEchJgBzAVPVxEwq5OyX83PZrmwiIQGYJeMIJa+3atTZInkw1xP/efPPNNgTpyCOPtN6MJOaXiIAI5CZAneJTQ8UF3IQBLJVyJCIgAt4k4AkFTBUPUsWVK1fOBslTLJvQpGXLltmi2awNS5JDYNeuXTYMI9asiav88ssvir3eB9zbt283N9xwg60vS41ZYk955kHhCScWdR8uoY+KgAgEgIAnTNDU8CQRB4nSJ0yYYCgLduKJJ1q8KN+2bdsGAHXmuvD333/bkItXX33VFiIvVKiQzYHLoIdEJwx0yIpFCbf27dubUaNG2Qxk1FNt165d5hru0yuTTIYcw27l4caMGWNzFVN/WCICmSCAoyvVxhBqYFMbedGiRfY91hRlwrMo0vLHEwqYTDQvvfSSjd1jTYsYvTvvvNMCoEzYGWeckRAMUs29++67rsd+9tlnpkSJEq77gr6ReEdM+2+99Zatj0rcIwMbCpKThL5u3brmoIMOsiFgmDRRIPxjMvBp1qyZcSvfFnRm+9o/QurIPxwtDH4kIpBJAh06dLBLE3wXd+/ebX8HRo8ebSNQWA4kF4MkPQQ8oYAHDhxo6tevb4YPH26oD0sB7REjRtg6nozOmCEnIpQYYxbtJtQEjbXP7fggbWMQQxYg8uAizHJhziCH0mxwGTt2rClfvry5//77reJAeWBGxRrRvHnzIOFQX0Qgqwng2MpvQPTEZuvWrbJ4pfmb4QkFjOJkhsoMjdkXplEKMfAFIS90ogUZUBpusw6Ybtmyxc4C08zXE5djbX3u3LmmYcOGtj1kCCLXNhaBb0Nl2eCNDBs2LGwlYI145cqVdmBkd+qPCIiACIhAUgl4QgHTI5KoX3rppfYR2UMU8q+//mrOPvvsyM16nQcCmJxYcxwwYID5/fff7frPp59+alP1bdu2zSauxzmrWrVq9qwkrYf30Ucfbc3SebiUDhUBERABEUiQgGcUcKz2jhs3znz33Xd2dhbrGG2PT6BGjRpm48aNNkk9zlaXXXaZja/GFPX9998b1n0i6/+y/amnnrJrw/HPrL0iIAIiIAL5JeB5Bdy1a9f89k2fiyCAV+71118fscWYY4891r4vXbp0ju0//vijrRqTY6PeiIAIiIAIJJWAJ+KAI3tEDPCOHTsiN+l1mgm0atXKzFNt1DRT1+VEwJ1Anz59wg6UkUcQsfDAAw9EbtJrnxHwhAIm01WXLl2sYxAeuRTJxiSKV+7IkSN9htQ/zX3vvfessxsOb5EPwsCuueYau23SpEn+6VAWthRrxd4yxeFDIfEvAZxR999//1wdwKoVy+k018Ha4EkCnlDAhMPgcTt16lQbFE58KmuWeOUOGTLEDB482JPw/N6o6tWr24EPAx6SbyxZssQ+iMUeNGiQfX3JJZf4vZuBbr9jrSCum7zPRBS8/vrr4T6ThCVyfT+8Qy9EQAQyTsATa8CzZs2ycarHHXdcGAgxq3jl4rnbvXt3xaeFySTvBbHR9957r0HJkn+bspAdO3a0WbFYH8bEJfEGAawVbtmz8F7n/4dZMJnLihUrZqguRjpMijFIREAEvEvAEzNgTM3EqboJSSSOOeYYt13aliQClSpVMu+//76NCSYrlnIVJwlsEk8Tz1px8cUXmyeffNK0adPG3HjjjQZl3bt37xwz4SQ2RacSARFIEgFPzIB79eplmjRpYvr3729NaIcffrhNi7Zq1SqDUxZrkpLUEmA96emnnzYzZsywjh2EK6VD8HKn6lW0YE5lNi75l0A8a0XZsmVtbLdzz0iywsCVuHon+5k4ioAIeI+AJ2bAJH1YvHixwduPHw0SgjMTQyFQFUmm0PR9cS6//HK79luzZs20XLRixYquNZ/JTY1ikeQk4GatqFKlipk/f344kQqfOOuss2xhE2bEEhEQAW8S8MQMGDTMwOrUqeNNSmqVCHiIQLS1goxlS5cuNQsXLszRyqpVq5rly5ebvn375tiuN5kjgKMp2eeQDRs22CI0vMZ6ce211/JSkkUEPDEDziLe6qoIJI1ApLUCL+jGjRvn8oLGKYulHUnmCVAQhXBLPNN5kCLWed2vXz8ttWX+FqW9BZ6ZAae957qgCPiIwJo1awxpWd2EVK34SRA6RsyvvKDdKGV+GwVOqPrWunXrXI1hVsx+SXYRkALOrvut3vqUALG8Y8aMsbm7GzVqlKMXmJ5JJ0pVK/wpWL/nQXx39LE5Pqg3viLA4MpxtKPh5Etg26GHHuqrfqix/yMgBfw/FnolAp4lQIw84UU4rWFqJvTIERTzBx98EK4YJi9oh4z/nllKIPkQ6/w4pTrrwqTnZUDFDPruu+82O3futLH7VDf7z3/+Y8PQpIj9d7+1Buy/e6YWZykBfmCHDx9uY7YjEVAzW17QkUT8+ZqwS8IAp0+fbh555BG7lPDcc8/ZzqCMkXr16pm77rrLhgpOnjzZxu6ThCXW8oT9kP54loBmwJ69NWqYCOQmQJpQHpFy2GGHxfWCprSkxPsEWEq44447TIUKFewjcikBZYu0a9fObNmyxWzdujUcdta5c2c7K3ZbW/Z+r7O7hZoBZ/f9V+8DROD888/P1ZvixYtbU2auHdrgOQIUVsBT2hFnKYFc+UWLFrWbf/rpJ/POO++YRYsWOYeZZcuWmcqVK4ff64V/CEgB++de+bql/HD8/PPPCfeB8Iy8HJ/wiXWgCHiUwHnnnRdzKQElixBWds4551hfgDPPPNOu/5IxrmXLlh7tlZoVj4AUcDw62pc0AgMHDrQxqhMmTMh1zj///NM8+uij5qabbjKffvqp9falGAQpKnE4wtFEIgJBJ4CHMwlVopcMSKjy+eef28IpVIkjsQpFalgXZk149erV5vTTTw86nkD2Two4kLfVm51ihE8iAkbr/KA4QkWmefPmGZTuDTfcYHr27GnGjx9vf1jIBT5x4kTnUD2LQOAJxFpKePzxx8Om6EKFCpkGDRqYFi1aGNK2SvxJQE5Y/rxvvmw1+YnfeOMN061bN2tGo8LPbbfdZiZNmmRH/hTh4Mfk+++/NxdeeKHtI96gFGxAMUtEQAREIEgEpICDdDd90BfiGxnJP/jgg2bUqFHWnEYmJ6ofEdt4yy23mPXr14d7Qi5jv5nX9t9/f1OwYMFwHyJfHHDAAYaHRATyQoBazyzVxJIyZcqYhg0bxtqt7R4loF8Cj96YoDeLMnn//e9/7YO4R0xpzISvvvpqc/zxx9vuY64eMWKEmT17tq9wUJ2Ih5u0b9/ebbO2iUBcAt9++6159tln7fINWdGiRTXTo4n4470UsD/uk+9bSZnJWM5UV1xxhSHX8a5du3L086qrrrLm58j0ezkO0BsRyBICzzzzjE09SfpJHBr3RXDccsKaIs/D/9k111wTuUmvU0xATlgpBqzT/0vg3HPPNTVq1IiJg/VfYlYd+eGHHwxOW1K+DhE9ZzsBvJ4Jzfvll1/2CQWmatJaRgv/a6S1lKSPgBRw+ljrSjEIYH52vKK/+OILQ2pFkhCQ/5jMQPHWvmKcUptFIHAESEWK34RyPgfn1koBB+de+rYnK1asCJufiQfGoYR4RwoMsPbFNokIiIAIBI2A5xQwcZ9U/pBkJ4GZM2eaHj16WBNZqVKlbFJ6YoQlIiACIhA0Ap5QwFTzwOMVsyPhG6xP4OlXvnx5M3LkyKAxV39cCDDb3bRpkyHrD8XJHSEMiRq3EhEQAREIGgFPeEGTbHzz5s1m6tSppkSJElb54mzw2WefmQ4dOpjffvvNVgEJGnz1518CTZs2NVOmTDHUQiVnNLHCo0ePtjNhPD7nzJkjVCIgAiIQOAKemAHPmjXLDB061JbgwsFgv/32M8SJVqtWzeY8VSrCwH3vcnTonnvuMXwHyID19ddf20xZHHD55Zebb775xn4vcnxAb0RABEQgAAQ8oYAxNc+dO9cV55tvvmkUZO6KJpAbyYhFlRcEc7Q8PgN5m9UpERCBEAFPmKB79eplmjRpYvr3728r5hATiily1apVBqesadOm6WaJgO8IUFKR7y+J8x3Bu9vJ9OVs07MIiEB2EvDEDBgnm8WLF9vyWmRMOvXUU20y/ieffNIQonLKKadk591Rrz1PYN26dTaNJjP1Sy65xHz11Ve2zeSzZgbfvHlz+37MmDGmbNmy9ruMxeeVV17xfN/UQBEQgdQS8IQCXrt2rbn11lvNYYcdZurXr29/xDp37mzLbd18880GL2mJCHiRAFYbMnh98skn1mehVq1a5ssvv7RpNykygeBESNF06riSxYh6r/fff7/Bw1siAukmgK+Fm5Dmcl+zbLmdV9tiE/CEAqY83cknn2zKlStnyHmK2Y6Z77Jly2yFHLxjJSLgRQIsjxC3TPIQllKw2lx22WXWq99p73vvvWeo8UqGL8zRdevWNa1atTJsl4hAOgmQVe6MM86wKS379etnyLeO/83kyZNtPW6WR5gMSRGn5654Yg34nXfesakIiQGeMGGCLcB+4oknWgIo37Zt26aHhq4iAnkkgFmZ2e8FF1xgP0kuXdZ5b7rpJrN7926zdetWO7AkucyePXushz/PDDD58ZOIQKoI3Hvvveb555/PdXrKfhYrVswWdxgwYIC566677KQHJVy6dGlz9913m3HjxpnWrVvn+qw2JJeAJxQwGY9eeuklWwu2du3a1umK2GAEL2hGbIkIP2rkEnaTBQsW2NAmt33aJgL5JcDg8LrrrrMmZszKCD9gmPmoe7xkyRL7/cUMzeCycePG5qyzzrLfRdaIJdlDAO9+typEEOB7wRJcMuW+++4LL4dQTxg/BWbA+NyQZx2FS5u2bNliB4qEfSIs//EdlgJO5t1wP5cnFDDJFlj7HT58uC2+3qlTJ1sHtkCBAtZUwgw5EWFkRzYlN/nxxx/NQQcd5LZL20Qg3wRwGqSUIvHLkfLYY4/ZOGb2YW6m2ASzDoQ1YMzQkuwiUKRIEcPDTUhAlGwhfJMa288995yNMhk8eLCtMMbvasWKFe1kBZMzv69Yaxxh6a9y5crOWz2nkMB+IXPYnhSeP+FT04y33nrLfinIisUXlZkv62YHHLDv4wRMKpS4owi8JDsIePGeMxCk7BvLLRIRSBeB1atXm5YtW5qaNWvapEczZsywyyQMHNu3b299bebPn28VM74J80L5108//fR0NS9rr7Pvmi1J6Mh+xWyCh0QEgkqA2TCDQH3Pg3qHvdkvJjPMdHv27GknNpib8V1gaQSz+O+//27TwZKHnfz7sham5z56RgHH6i5rur/++qsS8scCpO0ZJcDamlu9YsoovvjiiwZzX+Rsd9euXTa3NVadF154wVx99dUZbb8uHmwCOLEOGTLE5lfv06ePzbfONpLEsCbsGEDxzkcJf/zxx+a2224LNhQP9c7zChgzIvGUw4YN8xA2NUUE/iWAon322WeteY8KXo6geHF2wfSHIibEDmnTpo1NzkG8sFKsOrT0nAoCH374ocHUPH36dLNy5UpDzvXt27fbMCO8nZHINWnyLRACOn78eNOwYUNVokvFTYk6p+cUMF8AnKmcL0bXrl2jmqy3IuAdAsStk8CAB86E0YIXNMlkqPhEMg7Wf4899lhld4sGpfdJJ4CX8x133GGLmVSoUMGu/7IGTLlXHF5rhyJO+E6yNoxQ9AalzUw5cjCZ9IbphGECBcKvMvhC9YAzCF+X3mcC/GBRPtMteUGlSpXM+++/b5gp4/mMg6FEBNJBAE9nFKoj1FsnrJMQTye7IKlTCTlC4R599NHWLE3qX15LUk/AEwqYLwQmEuoB80PGbIJkBpidWb/AfV4iAl4lwFraqFGjYlZuor7x008/bUiMgNMLs2CJCKSaABEkeDY78b1cjxh04tFvvPFGe3nyL9xwww2GJZHZs2enukk6fxQBTyhg1QOOuit6G0gC1DfGJI0ZUCICqSZAYo+lS5fauPPIa5EAhjzkJOpArr/+eluPm6xtJOiQpI+AJxSw6gGn74brSiIgAqkngAUPT+NEheNTJeQhjxYKiLB04gipf6dMmWIeeeQRZ5Oe00DAEwqYJPY8qlSpYk0juMGTU5c1DMwlvXv3TgMKXUIEREAEkkMA8y9m3egMaZSpbNSoUfgiKlMZRpGVLzzhBe3UA8ZhAGcVHFUI0WjXrp39EpOkQyICIiACfiLAemv16tWtqRdP+COOOMLG2mIWRpwylRRMwEHv3XfftR7JTDz4rCT4BDyhgMGMo0qdOnWCT1w9FAERyAoCxHyT156JRPfu3W0oGr9xOJkikWUqeR9ZplIKGCLBF0+YoIOPWT0UARHIRgLkUybHPRmm8JYnGQaJhShFSXIWp0wlbMhKRUU3LIKS7CAgBZwd91m9FAERyCCBMmXKmH79+lnvY/KBr1271uZkJkYcPxeEWS+eyCpTmcEbleZLe8YEneZ+63IiIAIikDIChPiQ+CJayPBHsQMEL2mVqYwmlF3vpYCz636rtyIgAmkg4CS6iHep/fffP5wjnCxqF154YbzDtS+ABGSCDuBNVZdEQAS8S2DNmjU2RzOzZPLet27d2uZnxktaWf+8e99S0TLNgFNBVecUARHIagKxylRidu7fv7+pUaOGoQQg673kXqbsKjHDbdu2Nc2aNTNksZIEn4BmwMG/x+qhCIhAmgmQz6Bz5852jXfdunXGeSxevNh6Qzdo0MCMHTvWkOPg/vvvN6eddpq5+OKLbV5mqhJJsoOAZsDZcZ+zppfkFSfsg3q8hHuQ7IBnZh6VK1c2TZo0yRoW6mjmCMQqU7lt2zZbBpDylAgFZ0qUKGFfE4ZEUZr69evb9/oTfAJSwMG/x1nVQzxMSWUaXXHozz//NI899pgUcFZ9GzLbWXIt813EwYoYYKRo0aLmiiuusDNeSgE6lYr4fhL/SxlAhSFl9r6l8+pSwOmkrWulnACOLCSaL126dI5rbd++3Rx44IE5tumNCKSSgFOmMvoajz/+uCE1ZWTRe76bTz31lM2GFX283geXgNaAg3tv1TMREAGPEogeINJMUlFKsouAFHB23W/1VgREQAREwCMEpIA9ciPUDBEQAREQgewiIAWcXfdbvRUBERABEfAIASlgj9wINUMEREAERCC7CEgBZ9f9Vm9FQAREQAQ8QkAK2CM3Qs0QAREQARHILgKeVcA//PCD+euvv7Lrbqi3IiACIiACWUPAEwq4RYsWNmcq1ElKfuWVV9pamscdd5ytGkKWGIkIiIAIiIAIBImAJxTwihUrzK5duyzXRx991JQpU8Zs3LjRfPDBB4ak5myTiIAIiIAIiECQCHhCAUcCnTlzpunRo4etj1mqVCnzyCOPmHnz5kUeotciIAIiIAIi4HsCnlHAzHY3bdpkE5FTMcSR5cuX2yTlzns9i4AIiIAIiEAQCHiiGAOluaZMmWIefvhh89NPP5nChQub0aNH25nwwIEDzZw5c4LAWn0QAREQAREQgTABTyjge+65x/BANmzYYH7++WeDFzTJyTt16hQu5RVutV6IgAiIgAiIgM8JeMIEHekFTe1MlO5JJ51kGjZsaB544AEjL2iff8vUfBEQAREQgVwEPKGA5QWd675ogwiIgAiIQMAJeEIBRzKWF3QkDb0WAREQAREIKgFPrAEDFy/o448/PuwFfdhhh1nmefGCxnt6x44drvdq8+bN5o8//nDdp40iIAIiIAIikG4CnlDAyfKCnj9/vpk+fborw61bt5oLL7zQdZ82ioAIiIAIiEC6CXhCAbt5QQPi8ssvz5MXdOPGjQ0PiQiIgAiIgAh4nYAnFHAkpBNOOMHwQKpWrRq5S69FQAREQAREIDAEPOeEFU2W4gyLFy+O3qz3IiACIiACIuBrAp5XwOPGjTODBg3yNWQ1XgREQAREQASiCXjOBE0N4J07d5oiRYrYtnbt2jW6zXovAiIgAiIgAr4n4IkZMOFBXbp0sdmvChYsaCshHXLIIaZ8+fJm5MiRvoesDoiACIiACIhANIH99oQkemO63992222GOF2KMZQoUcKgfMkH/dlnn5kOHTqYVq1amXbt2qW7WbqeDwlQV/qggw4yBQrkHluS5vTQQw/1Ya/UZBEQgSAS8IQCPu2008yHH35ojjvuuFyMFyxYYLp3727IkCURAREQAREQgaAQyD1NyEDPMDXPnTvX9cpvvvmmOeaYY1z3aaMIiIAIiIAI+JWAJ2bAhBk1adLEkH6yZMmS5vDDD7d1gVetWmVwypo2bZo55ZRT/MpY7RYBERABERCBXAQ8oYBp1W+//WbN0N9++61dD2bWe8YZZ5hatWqZ/fbbL1fDtUEEREAEREAE/EzAMwrYzxDVdhEQAREQARHIKwFPrAHntdE6XgREQAREQAT8TkAK2O93UO0XAREQARHwJQEpYF/eNjVaBERABETA7wSkgP1+B9V+ERABERABXxKQAvblbVOj3QjgSb99+3a3XdomAiIgAp4jIAXsuVuiBiVC4MUXXzTPPvusPfSff/4x7du3N8cff7ytJX3FFVcYYsglIiACIuBlAp6rhuRlWGqbdwhs2rTJ7NixwzZo+vTpZuHChebdd9+1CrhZs2amdu3apmPHjq4NLlOmjGnYsKHrPm0UAREQgXQRkAJOF2ldJ2UEJk2aZGfA5cqVs9cgt/jUqVPNkiVLTNGiRXNdV6lNcyHRBhEQgQwQkALOAHRdMjkEVq9ebStm1axZ02zcuDF80h49ephRo0aZI4880gwcODC8XS9EQAREwEsEtAbspbuhtiRMoFq1auaAAw6wpmTKWT700EPmzz//NFTPIoXpPffcY3bu3GkoQSgRAREQAS8SUCpKL94VtSlPBFCyK1euNOeff75Zu3atzSteqlSpPJ1DB4uACIhAuglIAaebuK6XEQKEKP3xxx+20lZGGqCLioAIiEAUAZmgo4DobTAJjB8/3tx9993B7Jx6JQIi4EsCmgH78rap0Xg5L1++3BVEv379zK5du0zhwoXD+5n9Ulv64IMPtuvGI0eODO/TCxEQARHIBAF5QWeCuq65zwSoEd27d29TqVIlU6FChRzno4b022+/beOAW7ZsafdNnDjR1pvu06ePOeSQQ3IcrzciIAIikAkCmgFngrqumRQCc+fONTfddJNZsWJFLqX6888/mzvuuMPOhIcOHWpmzpxpOP75559PyrV1EhEQARHYVwL7h2Ime+zrSfR5EcgEARJuYGpGSpQokaMJhQoVMo0aNTLMlJkFY4IuUKCAadCgQY7j9EYEREAEMkVAM+BMkdd100Zg/fr1pl27dqZixYrmkUceSdt1dSEREAERiEdACjgeHe0TAREQAREQgRQRUBhSisDqtCIgAiIgAiIQj4AUcDw62icCIiACIiACKSIgBZwisDqtCIiACIiACMQjIAUcj472iYAIiIAIiECKCEgBpwisTisCIiACIiAC8QhIAcejo30iIAIiIAIikCICUsApAqvTioAIiIAIiEA8AlLA8ehonwiIgAiIgAikiIAUcIrA6rQiIAIiIAIiEI+AFHA8OtonAiIgAiIgAikiIAWcIrA6rQiIgAiIgAjEIyAFHI+O9omACIiACIhAighIAacIrE4rAiIgAiIgAvEISAHHo6N9IiACIiACIpAiAlLAKQKr04qACIiACIhAPAJSwPHoaJ8IiIAIiIAIpIiAFHCKwOq0IiACIiACIhCPgBRwPDraJwIiIAIiIAIpIiAFnCKwOq0IiIAIiIAIxCMgBRyPjvaJgAiIgAiIQIoISAGnCKxOKwIiIAIiIALxCEgBx6OjfSIgAiIgAiKQIgJSwCkCq9OKgAiIgAiIQDwCUsDx6GifCIiACIiACKSIgBRwisDqtCIgAiIgAiIQj4AUcDw62icCIiACIiACKSIgBZwisDqtCIiACIiACMQjIAUcj472iYAIiIAIiMD/b+8sgOUogjDcwb2A4JoECQR3h+AaNBROAhTu7u4FBYWFYEGrcAhQBIfg7u4UHlwKCzLM11Vztdnc7bv38vbdXd7fVfd2b3d2Zvbb5Hq7Z6a7JAJSwCWBVbUiIAIiIAIiUERACriIjs6JgAiIgAiIQEkEpIBLAqtqRUAEREAERKCIgBRwER2dEwEREAEREIGSCEgBlwRW1YqACIiACIhAEQEp4CI6OicCIiACIiACJRGQAi4JrKoVAREQAREQgSICUsBFdHROBERABERABEoiIAVcElhVKwIiIAIiIAJFBKSAi+jonAiIgAiIgAiUREAKuCSwqlYEREAEREAEighIARfR0TkREAEREAERKImAFHBJYFWtCIiACIiACBQRkAIuoqNzIiACIiACIlASASngksCqWhEQAREQAREoIiAFXERH50RABERABESgJAJSwCWBVbUiIAIiIAIiUERACriIjs6JgAiIgAiIQEkEpIBLAqtqRUAEREAERKCIgBRwER2dEwEREAEREIGSCEgBlwRW1YqACIiACIhAEQEp4CI6OicCIiACIiACJRGQAi4JrKoVAREQAREQgSICUsBFdHROBERABERABEoiIAVcElhVKwIiIAIiIAJFBKSAi+jonAiIgAiIgAiUREAKuCSwqlYEREAEREAEighIARfR0TkR6EICP/zwg40ePboLW+xeTYlv93rerXC3UsCt8JTUx6Yn8Pjjj9uSSy7p/Tz88MPtmGOOKezzfffdZw8//HClzF9//WWDBg2yHj16VI7ld+qpN39Nnz597NVXX7Unn3zSFl100fzpNr+n6yl4/PHHN/QF4fPPP7cLLrigap9ffPFF69u3b9Vz6eBbb71lJ510UvqqrQg0nIAUcMMfgTowvhE48sgj7dBDDy28rUsuucS++OKLSpmzzz7bdtttN5t44okrx/I79dSbv+aJJ56wfv365Q/X/T1d/++//7ry+u+//+q+trML8pJz//33V612kUUWsZEjR1Y9lw6uvPLK9tFHH/knHdNWBBpJQAq4kfTVdkMJXH311bbgggvaVFNN5dbr888/7/157733bPnll7epp57ajz/99NNV+3nbbbcZP/y9evWy22+/vVLmiiuusCuvvNK/X3vttTbXXHNZz549bcstt7Qff/zRhg0bZg888IBh0V533XX27bff2tChQ2399dcvbD9b75prrmmXXXaZ9e7d2xZYYAF76qmn7KCDDrKZZprJttlmG/v111+9rh133HEshfP777/bHnvsYbPPPrtNP/303q9UfvXVV7czzjjDZp55Zrv33nstXb/11lt7fYsttpjdfPPNNnDgQP/On3/++ceWWWYZ+/777yvH8juvv/66W/h77bWXs1hvvfW8X/3797c555zTzj333Mol1Z7Ll19+aQcffLAr2e23396t+sGDB9sGG2zg94+Vv9NOO1kIwbbYYgsbMmSI1/fpp5/aEkssYR9//LF/33zzze2II46otKUdEWgogfgPViIC3Y5AVLJhyimnDC+99FKIY4Nh1113Deuss45ziD/g4dRTTw1RUYXzzz8/xB/wsfh8+OGHISrVEBVveO2110J071bKHXbYYeHoo48Of/zxR4jKPbz88sshKt4QFWw47bTTwp9//hk22mijcPHFF/v+iBEjwoABAypt1Go/1UvBueee2/v72Wefhah4w+STTx6OPfbYEK3qEC29cOutt3p9UUGHV155JURLNsSXBT9GH9Zaa60watSo8M4774T5558/RGXu5+aYY46w9tprh7vuuit8/fXXIV1P/+MPVfjqq6/CL7/84uyiS9iviVZpWHHFFX2/1p/nnnsuRPd6OPPMM8Mnn3wSFl988TDjjDOGBx980PnEFwFnUeu5RAvc+7juuuuG+LIQ4ktRmHDCCUN0i3tfX3jhBb8P2o/u9jDttNN6/6OCDtFzUOnWd999F+LLReW7dkSgkQRkATf09UeNN4oAFt6zzz7r1tEEE0zgbtqoXLw7E000kTGm+O6779ree+/t5fL9xIJdaKGFbNNNN3UrGOsrL4zn4rJlrJcx3uHDhxtu5EknndRdzfEFwPdpB2s0ST3tU3a//fazqDBtq622cisUt/dss81mq622mj3zzDOpurG22267rV1zzTVuLU822WQ233zzWVS2lXIHHHCAxRcEt4LTQbwBSFRs7hnA8kxWPxZxspBT+Wrb+JJg8SXC4suDYcHjZWAblbHNMMMMFl9kvM1qz4VnBC9c9HgsEPp+wgkneF+z7cWXAfcCxBcJi8rex67TeTwRP//8s3/SMW1FoFEEpIAbRV7tNpQACuXGG2/0iTu4cKPF6MqSTp1zzjn2999/27LLLusu6ptuummsvn7wwQe21FJLVY4vt9xylf20g6LlWlyqKNgNN9zQlXo6n7YoCVzHSeppn7IoWwTFxn5SkpNMMokrZD9Z5U+0HG3//fd3ZUef3n//fWOMNwku4bYEhQszrrvzzjvHcEnXujb1l/NTTDGFu45TWfpMXUXPJZVNW14+askhhxxiuL133313f8nJlouWtyvm7DHti0AjCEgBN4K62mw4ASzAW265xZUIli/WY3RFeb+wQFEuWIWMlTIOmh/fZFyXWbVJ0hhj+s4W65eZ0YxP8plmmmncos6WYR+LkDHOJPW0T1nKdUS4J8Z+UVBvvPGGv2ike6c+FHRbggXMPUVXtb+kzDrrrG1dUle9Rc8l30BRP7GMsa4Zz2bcPQnP5JtvvvFx+XRMWxFoFAEp4EaRV7sNJcCaUFyvCy+8sCveq666yq1eOsXknssvv9yV1HbbbecWFAqKH/I4Zun9Tm5eJmzFMV23dPM3FMcbvX6Wz+CuTpOsKIc79aeffvJLWB6EFZqkVvvp/LhueZlgRjBWN33DnY7FXyQoOyx63LcI7l/c1LjUcYF3lhQ9F5il9ovaY/LYQw895C7yONZtWMNJmJTFfU833XTpkLYi0DACUsANQ6+GG0mAcVCUD7N6UY5LL720YQkzQ/jkk082lgkxQ5rPiSee6GOUWIybbLKJdxulecoppxjjjfPOO68rpPz98EPPemCUHW1Q7+mnn+7FVlllFZ/VyzHqipOhKhZ4rfbz9Xf0O9Y+/WIMlhnDm2222RgvALXqZYY0bt8333zTizDbGlc8dXSWFD0XOOF1YMy4lvz222/utWBWNe5slncxVp3WXHM99UhEoBkI9GAGWDN0RH0QgUYQwBrEGmKST16wePkRL3L1YjliAafx13wd6TtLjRh7zArXMakI6xIlyESujTfeuFKknvYrhdu5w3977p3JT+0RFByWKIL7GU/BHXfc0Z4q6ipb67ngQmZCG+PeHZFqnDtSj64Rgc4gIAXcGRRVhwiMI4G4rMknDCUX9zhWV+rlKG/WMLMOGtc9Fj6CpVnLRYyV3NZLSqmdjpXj5t9nn32MKGQSEWgGAmO/9jdDr9QHEehmBOaZZx7beeedLa6xbfo7Z3kVk65w0yflS6exTmt9muGmGHI477zzmqEr6oMIOAFZwPqHIAIiIAIiIAINICALuAHQ1aQIjCuBbOAMJo9116kcjAdnlxmNK1ddLwJdSUAKuCtpq62WJED0phjmsV19J+gEEa46IrhKa2X9oT4yG1144YW+dIiEDriuieXM+tYiYdbzRRdd5FG/iFFdtswyyyzG2HZeWJ/LJCrWRTMuzJpqljPF0J2Votdff72tscYaPkud2M9vv/22n8vfAy8erNMmHrVEBFqNgBRwqz0x9Xe8J1CU9YcxVoJMoLBYZxxjVluMs2y77LLLGIFBqkEihCMzmNmmcI7VynXFsRhj28e7GfMm4xKJMMgGhWDdH3jggYYSJtgHCpvwmEj+HliPzNIlXkgkItBqBKSAW+2Jqb8NIUC4yFVXXdWXLO2www6VwBXM+iXLEWt+CUwREx+M0b+Y7KGydpgTMWmALzliv1rWpXzWH8plhUhRWIwoUiZCEdOamNAEsMhOiMpek/axHgk8ssIKK/iWwCAxUYSfZp0s62tTSMqYsMEzOrUncxJBLqiTpU2EuuRloS1hQhf3ExNTeGYo3OlcRwhP7g1hrTbZnpD8PXCMeNxnnXVWzRnYlJGIQDMSkAJuxqeiPjUdAeIdE3iDJAdYbISxRAYNGuTuVBIJoHxI3pAV3KrZMJV8R5kjRx11lK/7xXXMGmCuxW1L0viVVlrJUxR6wcyfxx57zPr37185Qkxm3OO4rIvWK3MB/SfgCDOBiQLWK6ZRTMueyLNLWEruA0EBksaPsriRUdSkZeQ8MbQRgnCguEmTSFleTKgXq5U1zqx9rlcIZoI7mpcS3Pe87CS59NJLPY423/P3wDHc2X379q30nWMSEWgFAlLArfCU1MeGE2AdK0qBH3py2bKmdPTo0Xb33Xe7IkVxkuv2uOOOq1iRbXUahZnPulQt60+2nnzmJPqTsjlly9WzH1P7eX5dyj766KNGrlxeLlCcfLA8682cREARXg5iGkZPPAGH9giWMAwJwJEVAn0Q8IOIVkVCsouOjrkX1atzIlAmASngMumq7vGGQDaTDyn5sPCwbLG+yKaEoERQakVJArKzlevNepSFiPWMu7szhLFVUv8xDovCJbQkE7yIoxxzI/v9cC/1ZE7CGiZMZXIboxCzKRbr6W/MbWzZTEysMyZkJlZ6UeYj6oZJ1tNQT3sqIwKNJiAF3OgnoPZbggDKNS+EsGT2MeOWSYYNG+bH0ncUGEtlkmTdsljAbWVdStelLeOl2fbS8Y5scfkSo5rxV8aFiU+NAiY5Q0ocUW/mJPrF+HVKMEHIyrZmZWf7TJukJOzXr58fJoUjk81QvsTjbktgQlYpiQi0EgEp4FZ6WuprUxHA6iKwP0t6sGyZvYxVm51hTBksO5YLUYbx02QFD66Rdako6w/tZTMnVQPCuGzelVutHMew2ElcgPJl8hRLg4YPH24ks0eoh8ld3EdR5iTcx3gCUJzIDTfcUJmoxveRI0f6rG32ERQ0E8fgcs8999iee+5p++67r08uw5JlPJw68DxQjk+RwASXuUQEWomAFHArPS31tekIMAFp6NCh1qdPH1caKOCstcxxJkqhnHpr0NMdAAABJ0lEQVT37m2EnExSK+tRUdYflEw2D3GqK7sdOHBgZXZz9ni1fRTwqFGjXAFznnFuLM6Urq89mZOYuMVkMKxhYkSzTQIDlkslYZlRz549nQnKlrSPTD5DhgwZ4gqayWaUSR9mZFcT1gDjAmeGt0QEWomAQlG20tNSX5uWAGtyizILsVyJ8WLcrHmplvWIpTjVsv7gwmYpDrORU1aifH0syRkwYEBlbDp/vr3fsdixhIvuL1snZVGaXSWsFx4xYoR7IrqqTbUjAp1BQAq4MyiqDhHoQgJY2cw6xmVbTbDKCczRXQT3OUq4rYla3YWH7rN1CEgBt86zUk9FwAmQg5iAHN1JydZ69HgeHnnkEQ+GUquMjotAsxKQAm7WJ6N+iYAIiIAIjNcE/gfHJWj1qG0+fAAAAABJRU5ErkJggg==\n" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%R -i similarity_matrix\n", "library(dplyr)\n", "#library(ggplot2)\n", "\n", "head(similarity_matrix)\n", "hclust(as.dist(similarity_matrix),method=\"ward.D2\") %>%\n", "plot" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'icoord': [[5.0, 5.0, 15.0, 15.0],\n", " [25.0, 25.0, 35.0, 35.0],\n", " [10.0, 10.0, 30.0, 30.0],\n", " [65.0, 65.0, 75.0, 75.0],\n", " [55.0, 55.0, 70.0, 70.0],\n", " [45.0, 45.0, 62.5, 62.5],\n", " [115.0, 115.0, 125.0, 125.0],\n", " [105.0, 105.0, 120.0, 120.0],\n", " [95.0, 95.0, 112.5, 112.5],\n", " [85.0, 85.0, 103.75, 103.75],\n", " [53.75, 53.75, 94.375, 94.375],\n", " [20.0, 20.0, 74.0625, 74.0625],\n", " [155.0, 155.0, 165.0, 165.0],\n", " [145.0, 145.0, 160.0, 160.0],\n", " [185.0, 185.0, 195.0, 195.0],\n", " [175.0, 175.0, 190.0, 190.0],\n", " [152.5, 152.5, 182.5, 182.5],\n", " [135.0, 135.0, 167.5, 167.5],\n", " [205.0, 205.0, 215.0, 215.0],\n", " [245.0, 245.0, 255.0, 255.0],\n", " [235.0, 235.0, 250.0, 250.0],\n", " [225.0, 225.0, 242.5, 242.5],\n", " [265.0, 265.0, 275.0, 275.0],\n", " [295.0, 295.0, 305.0, 305.0],\n", " [285.0, 285.0, 300.0, 300.0],\n", " [270.0, 270.0, 292.5, 292.5],\n", " [233.75, 233.75, 281.25, 281.25],\n", " [210.0, 210.0, 257.5, 257.5],\n", " [151.25, 151.25, 233.75, 233.75],\n", " [315.0, 315.0, 325.0, 325.0],\n", " [345.0, 345.0, 355.0, 355.0],\n", " [335.0, 335.0, 350.0, 350.0],\n", " [320.0, 320.0, 342.5, 342.5],\n", " [375.0, 375.0, 385.0, 385.0],\n", " [365.0, 365.0, 380.0, 380.0],\n", " [331.25, 331.25, 372.5, 372.5],\n", " [395.0, 395.0, 405.0, 405.0],\n", " [425.0, 425.0, 435.0, 435.0],\n", " [415.0, 415.0, 430.0, 430.0],\n", " [400.0, 400.0, 422.5, 422.5],\n", " [455.0, 455.0, 465.0, 465.0],\n", " [445.0, 445.0, 460.0, 460.0],\n", " [475.0, 475.0, 485.0, 485.0],\n", " [495.0, 495.0, 505.0, 505.0],\n", " [480.0, 480.0, 500.0, 500.0],\n", " [515.0, 515.0, 525.0, 525.0],\n", " [535.0, 535.0, 545.0, 545.0],\n", " [520.0, 520.0, 540.0, 540.0],\n", " [490.0, 490.0, 530.0, 530.0],\n", " [452.5, 452.5, 510.0, 510.0],\n", " [411.25, 411.25, 481.25, 481.25],\n", " [351.875, 351.875, 446.25, 446.25],\n", " [192.5, 192.5, 399.0625, 399.0625],\n", " [47.03125, 47.03125, 295.78125, 295.78125]],\n", " 'dcoord': [[0.0, 0.21333867488882308, 0.21333867488882308, 0.0],\n", " [0.0, 0.3805535089335521, 0.3805535089335521, 0.0],\n", " [0.21333867488882308,\n", " 0.5855848260443224,\n", " 0.5855848260443224,\n", " 0.3805535089335521],\n", " [0.0, 0.2872644901251715, 0.2872644901251715, 0.0],\n", " [0.0, 0.34472959074707116, 0.34472959074707116, 0.2872644901251715],\n", " [0.0, 0.38938388145098235, 0.38938388145098235, 0.34472959074707116],\n", " [0.0, 0.33352399998340854, 0.33352399998340854, 0.0],\n", " [0.0, 0.3974263535175602, 0.3974263535175602, 0.33352399998340854],\n", " [0.0, 0.4140604517840557, 0.4140604517840557, 0.3974263535175602],\n", " [0.0, 0.6053583701171363, 0.6053583701171363, 0.4140604517840557],\n", " [0.38938388145098235,\n", " 0.7629594367621403,\n", " 0.7629594367621403,\n", " 0.6053583701171363],\n", " [0.5855848260443224,\n", " 0.9584521635877752,\n", " 0.9584521635877752,\n", " 0.7629594367621403],\n", " [0.0, 0.10629097243894707, 0.10629097243894707, 0.0],\n", " [0.0, 0.14589106217232223, 0.14589106217232223, 0.10629097243894707],\n", " [0.0, 0.14845298440347174, 0.14845298440347174, 0.0],\n", " [0.0, 0.20323545927584558, 0.20323545927584558, 0.14845298440347174],\n", " [0.14589106217232223,\n", " 0.27053775393863067,\n", " 0.27053775393863067,\n", " 0.20323545927584558],\n", " [0.0, 0.2834693852894986, 0.2834693852894986, 0.27053775393863067],\n", " [0.0, 0.27579034064360586, 0.27579034064360586, 0.0],\n", " [0.0, 0.14055830099509545, 0.14055830099509545, 0.0],\n", " [0.0, 0.17317330451288437, 0.17317330451288437, 0.14055830099509545],\n", " [0.0, 0.22889162471627206, 0.22889162471627206, 0.17317330451288437],\n", " [0.0, 0.1646798411677444, 0.1646798411677444, 0.0],\n", " [0.0, 0.14195089035900987, 0.14195089035900987, 0.0],\n", " [0.0, 0.20457278941880547, 0.20457278941880547, 0.14195089035900987],\n", " [0.1646798411677444,\n", " 0.23724513617131898,\n", " 0.23724513617131898,\n", " 0.20457278941880547],\n", " [0.22889162471627206,\n", " 0.35990544890675036,\n", " 0.35990544890675036,\n", " 0.23724513617131898],\n", " [0.27579034064360586,\n", " 0.43126319905416755,\n", " 0.43126319905416755,\n", " 0.35990544890675036],\n", " [0.2834693852894986,\n", " 0.5929383229764894,\n", " 0.5929383229764894,\n", " 0.43126319905416755],\n", " [0.0, 0.19468395945144223, 0.19468395945144223, 0.0],\n", " [0.0, 0.21074083639138996, 0.21074083639138996, 0.0],\n", " [0.0, 0.23596337662440936, 0.23596337662440936, 0.21074083639138996],\n", " [0.19468395945144223,\n", " 0.2733141711848642,\n", " 0.2733141711848642,\n", " 0.23596337662440936],\n", " [0.0, 0.17997077302830203, 0.17997077302830203, 0.0],\n", " [0.0, 0.3285798092301384, 0.3285798092301384, 0.17997077302830203],\n", " [0.2733141711848642,\n", " 0.45274108992749557,\n", " 0.45274108992749557,\n", " 0.3285798092301384],\n", " [0.0, 0.26903843779564635, 0.26903843779564635, 0.0],\n", " [0.0, 0.2778464314878544, 0.2778464314878544, 0.0],\n", " [0.0, 0.30565801021330624, 0.30565801021330624, 0.2778464314878544],\n", " [0.26903843779564635,\n", " 0.5132012196645103,\n", " 0.5132012196645103,\n", " 0.30565801021330624],\n", " [0.0, 0.17361359077926955, 0.17361359077926955, 0.0],\n", " [0.0, 0.26920063399006655, 0.26920063399006655, 0.17361359077926955],\n", " [0.0, 0.19234199258680965, 0.19234199258680965, 0.0],\n", " [0.0, 0.23259832292710847, 0.23259832292710847, 0.0],\n", " [0.19234199258680965,\n", " 0.3207803415976658,\n", " 0.3207803415976658,\n", " 0.23259832292710847],\n", " [0.0, 0.1772785970481172, 0.1772785970481172, 0.0],\n", " [0.0, 0.2078965544789089, 0.2078965544789089, 0.0],\n", " [0.1772785970481172,\n", " 0.34725719635513774,\n", " 0.34725719635513774,\n", " 0.2078965544789089],\n", " [0.3207803415976658,\n", " 0.4830351672375104,\n", " 0.4830351672375104,\n", " 0.34725719635513774],\n", " [0.26920063399006655,\n", " 0.5837152751551123,\n", " 0.5837152751551123,\n", " 0.4830351672375104],\n", " [0.5132012196645103,\n", " 0.6895392014624909,\n", " 0.6895392014624909,\n", " 0.5837152751551123],\n", " [0.45274108992749557,\n", " 0.8150132507151894,\n", " 0.8150132507151894,\n", " 0.6895392014624909],\n", " [0.5929383229764894,\n", " 2.081013463489638,\n", " 2.081013463489638,\n", " 0.8150132507151894],\n", " [0.9584521635877752,\n", " 3.1556008507593947,\n", " 3.1556008507593947,\n", " 2.081013463489638]],\n", " 'ivl': ['38',\n", " '41',\n", " '45',\n", " '47',\n", " '23',\n", " '49',\n", " '17',\n", " '21',\n", " '40',\n", " '44',\n", " '34',\n", " '42',\n", " '52',\n", " '7',\n", " '5',\n", " '0',\n", " '53',\n", " '15',\n", " '4',\n", " '9',\n", " '6',\n", " '14',\n", " '16',\n", " '1',\n", " '11',\n", " '12',\n", " '2',\n", " '3',\n", " '10',\n", " '8',\n", " '13',\n", " '18',\n", " '26',\n", " '22',\n", " '27',\n", " '37',\n", " '24',\n", " '30',\n", " '35',\n", " '50',\n", " '51',\n", " '25',\n", " '20',\n", " '48',\n", " '39',\n", " '28',\n", " '31',\n", " '36',\n", " '46',\n", " '19',\n", " '29',\n", " '32',\n", " '43',\n", " '33',\n", " '54'],\n", " 'leaves': [38,\n", " 41,\n", " 45,\n", " 47,\n", " 23,\n", " 49,\n", " 17,\n", " 21,\n", " 40,\n", " 44,\n", " 34,\n", " 42,\n", " 52,\n", " 7,\n", " 5,\n", " 0,\n", " 53,\n", " 15,\n", " 4,\n", " 9,\n", " 6,\n", " 14,\n", " 16,\n", " 1,\n", " 11,\n", " 12,\n", " 2,\n", " 3,\n", " 10,\n", " 8,\n", " 13,\n", " 18,\n", " 26,\n", " 22,\n", " 27,\n", " 37,\n", " 24,\n", " 30,\n", " 35,\n", " 50,\n", " 51,\n", " 25,\n", " 20,\n", " 48,\n", " 39,\n", " 28,\n", " 31,\n", " 36,\n", " 46,\n", " 19,\n", " 29,\n", " 32,\n", " 43,\n", " 33,\n", " 54],\n", " 'color_list': ['g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'g',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'r',\n", " 'b']}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD2CAYAAADGbHw0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYx0lEQVR4nO3de5QkZX3G8e/DsoIggrpjIHthyXFNglEHs4KJUSdekRhXj5gDe4JRYyYxEsWYi5gjXqIkJioEScRJIFx00SiIq1klJGGDREUXaLnGuMoaliywsLJcvJ0Nv/xR1VDTVHVXz1Rf5p3nc86c6X77naq3q6ueeuvtqhpFBGZmtvDtNeoGmJlZMxzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJ2HtUM162bFmsXr16VLM3M1uQrr766rsiYqLstZEF+urVq9myZcuoZm9mtiBJ+l7Vax5yMTNLhAPdzCwRDnQzs0Q40M3MEuFANzNLhAPdzCwRDnQzs0SM7Dz0YZuZgQ0bRt0KS9X69TA9PepW2GK3aHroGzZAqzXqVliKWi13Fmw8LJoeOsDkJGzePOpWWGqmpkbdArPMoumhm5mlzoFuZpYIB7qZWSIc6GZmiXCgm5klomegS9pX0tclfVPSjZLeU1JnH0mfkrRV0lWSVg+ktWZmVqlOD/3HwPMj4unAJHC0pGd11Plt4PsR8STgNOADjbbSzMx66hnokbk/f7o0/4mOauuA8/LHnwFeIEmNtdLMzHqqNYYuaYmkFnAncFlEXNVRZTlwK0BE7AF2A08omc60pC2StuzcuXNeDTczs9lqBXpE/F9ETAIrgCMl/cJcZhYRMxGxNiLWTkyU/o9TMzObo77OcomIe4DLgaM7XroNWAkgaW/gQODuBtpnZmY11TnLZULSQfnjRwMvAv6ro9pG4Lfyx8cC/x4RnePsZmY2QHVuznUIcJ6kJWQ7gH+KiC9Iei+wJSI2AmcDF0jaCuwCjhtYi83MrFTPQI+I64AjSspPKTz+EfDqZptmZmb98JWiZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZInoGuqSVki6XdJOkGyW9paTOlKTdklr5zymDaa6ZmVXZu0adPcDbIuIaSQcAV0u6LCJu6qj35Yh4WfNNNDOzOnr20CNiR0Rckz++D7gZWD7ohpmZWX/6GkOXtBo4Ariq5OVfkvRNSV+U9JSKv5+WtEXSlp07d/bfWjMzq1Q70CU9BrgIOCki7u14+Rrg0Ih4OvAR4JKyaUTETESsjYi1ExMTc2yymZmVqRXokpaShfknIuLiztcj4t6IuD9/vAlYKmlZoy01M7Ou6pzlIuBs4OaI+HBFnYPzekg6Mp/u3U021MzMuqtzlsuzgROA6yW18rJ3AKsAIuIs4FjgjZL2AD8EjouIaL65ZmZWpWegR8SVgHrUORM4s6lGmZlZ/3ylqJlZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mlggHuplZIhzoZmaJcKCbmSXCgW5mloie/yTarJaZGdiwYdStGI3W6dnvqZNG2YrRWb8epqdH3QrDgW5N2bABWi2YnBx1S4Zu8+RJo27C6LRa2W8H+lhwoFtzJidh8+ZRt8KGaWpq1C2wgp5j6JJWSrpc0k2SbpT0lpI6knSGpK2SrpP0jME018zMqtTpoe8B3hYR10g6ALha0mURcVOhzkuBNfnPUcBH899mZjYkPXvoEbEjIq7JH98H3Aws76i2Djg/Ml8DDpJ0SOOtNTOzSn2dtihpNXAEcFXHS8uBWwvPt/PI0DczswGqHeiSHgNcBJwUEffOZWaSpiVtkbRl586dc5mEmZlVqBXokpaShfknIuLikiq3ASsLz1fkZbNExExErI2ItRMTE3Npr5mZVahzlouAs4GbI+LDFdU2Aq/Jz3Z5FrA7InY02E4zM+uhzlkuzwZOAK6X1MrL3gGsAoiIs4BNwDHAVuAHwOsab6mZmXXVM9Aj4kpAPeoE8KamGmVmZv3zzbnMzBLhQDczS4QD3cwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NE9Ax0SedIulPSDRWvT0naLamV/5zSfDPNzKyXvWvUORc4Ezi/S50vR8TLGmmRmZnNSc8eekRcAewaQlvMzGwemhpD/yVJ35T0RUlPaWiaZmbWhzpDLr1cAxwaEfdLOga4BFhTVlHSNDANsGrVqgZmbWZmbfPuoUfEvRFxf/54E7BU0rKKujMRsTYi1k5MTMx31mZmVjDvQJd0sCTlj4/Mp3n3fKdrZmb96TnkIulCYApYJmk78C5gKUBEnAUcC7xR0h7gh8BxEREDa7GZmZXqGegRcXyP188kO63RzMxGyFeKmpklwoFuZpYIB7qZWSIc6GZmiXCgm5klwoFuZpYIB7qZWSIc6GZmiXCgm5klwoFuZpYIB7qZWSIc6GZmiXCgm5klwoFuZpYIB7qZWSIc6GZmiXCgm5klwoFuZpYIB7qZWSIc6GZmiXCgm5klwoFuZpYIB7qZWSIc6GZmiegZ6JLOkXSnpBsqXpekMyRtlXSdpGc030wzM+ulTg/9XODoLq+/FFiT/0wDH51/s8zMrF89Az0irgB2damyDjg/Ml8DDpJ0SFMNNDOzepoYQ18O3Fp4vj0vewRJ05K2SNqyc+fOBmZtZmZtQ/1SNCJmImJtRKydmJgY5qzNzJK3dwPTuA1YWXi+Ii9rzMzVM2y4fsO8ptG6/XQAps49ac7TWP/U9Uz/4vS82mFmNihNBPpG4ERJnwSOAnZHxI4GpvuQDddvoHV7i8mDJ+c8jcm3nzSvNrRubwE40M1sbPUMdEkXAlPAMknbgXcBSwEi4ixgE3AMsBX4AfC6QTR08uBJNr928yAmXcvUuVMjm7eZWR09Az0iju/xegBvaqxFZmY2J75S1MwsEQ50M7NEONDNzBLhQDczS4QD3cwsEQ50M7NENHFhkZmNg5kZ2DC/K6r71mplv6emhjvf9eth2hf5dXIP3SwVGzY8HLDDMjmZ/QxTqzX8HdcC4R66WUomJ2Hz5lG3YrCGfTSwgCzaQO/3hl/te7n0ewsA39DLzIZl0Q65tG/4VdfkwZN93xysdXtr3neJNDOra9H20GHwN/zyDb3MbJgWbQ/dzCw1DnQzs0Qs6iEXMxuwQZwbP6hz3xM4t909dDMbnEGcGz+Ic98TObfdPXQzG6yFcG58Iue2u4duZpYIB7qZWSI85NKAqqtOq64u9dWjZjYIDvQGtK867byStOzK0nbIDzXQh3EXvmHddS+BMxHMBmWsAr3fni6MT2+37lWnI7l6tH2mwSDvijeMO+61dxoOdCtqosPSZIdkhJ2OsQr0fnq6MKLe7kK1EM406CWRMxGsYU10WJrqkIy40zFWgQ793V/F90oxM2B8Oiwj7nTUOstF0tGSviVpq6S3l7z+Wkk7JbXynzc031QzM+umZw9d0hLgb4EXAduBb0jaGBE3dVT9VEScOIA2mtk46zaG3W1s2l9wN65OD/1IYGtEfDcifgJ8Elg32GaZ2YLR7fL+qsv0E7nUftzUGUNfDtxaeL4dOKqk3qskPRf4b+CtEXFrZwVJ08A0wKpVq/pvrZmNp37HsBfCF9xzOXtmLmfLNHik0tSVop8HVkfE04DLgPPKKkXETESsjYi1ExMTDc3azGwA5nJjsX5vHNbwkUqdHvptwMrC8xV52UMi4u7C038A/mr+TTNb5PrtIc71XGqPZVcb9NkzDR+p1OmhfwNYI+kwSY8CjgM2FitIOqTw9OXAzc010WyR6reHOJfbynosOyk9e+gRsUfSicClwBLgnIi4UdJ7gS0RsRF4s6SXA3uAXcBrB9hmW+jmemXffK/mW4g90QXWQ7TRqnVhUURsAjZ1lJ1SeHwycHKzTVv4ym5lsBBuYzBwc72ybz5X8/m2AbYIjN2VooPQT7A2GapltzKYPHiSHffteGj+bbt/vJvW7a1Z7Uw64Id9ZZ97orYILLhAL4ZzWSiXhWBVsHYaxL1hym5lMHXuFHc8cEflPWoG1RYzS9uCC/RiOHcGYrcQrHOPmGHeG6ZXe3yfmhLzuavefMbfF+LYuy1KCy7QoToM+w3BzqEY/0OKMTefu+rNdfzdY+/WhKrOSMO3RliQgd6UzqGYUf9Dim7DSd6p5Dz2PndloVIVKD4qaVZVZ6SqozHHjsSiDnQYr6GPquEkj6dbI8pCpeo+K7B4An0uvWfof6fXT2dkjh2JRR/o46bqS1SzRtQJlUEclXSG5jgdGfTbe4ax3ek50EssmKGPul8S9vOFoA+1bRA6Q3PcjgwSubmYA73Eghn6qPsl4eQk7NjR+zLy3bvrXQo+l9Av7nw6dzCLeScy13uJw2iX21w+z7LQLOu5d5tO3e8BFuk65UCvsGCGPur2LKam4I475v+/E+faiyrufIptGNND16HptlMe50P+pj7PfqdT53uAUS+bEXKgLyZNnCEyn0PNsvmPy6HrsL4YKzOXz2UclltTn2e/0+m1vMZh2YyIA93GUz+n2MH8g3WcvxjzMMN4WADDhiMP9AXzBaQNV91T7KC5YB3XL8aqlkXxe5HO7z/GJGCSMuhhwwZ2GCMP9AXzBeRiNcrTzfr5fmAYmloWTX2hWPW9yCIeQ65truE5yGHDBnYYIw90WEBfQC5G43662TA1tSya7Ok1ETC9dlQp9vaH8SV9kzvumsYi0G3MjdOXUE2OJze1wRXVXRbj9AVxtx3VQtlZjyA8exrBmV0O9BQN454do/qCqMnT1nwq5cOqdlQL5YyRcf0sh7zjdqCnaBj37BjlBtTEBSrdpjWfIYvFMFwxrsbpqGdEHOipGsY9O8ZpAxrlDmZce4e26DjQF5uUe5Oj3MGM087NFq29Rt0AG7J2bxJm9yjr3MPFzMaae+iLkXuTZklyD93MLBEOdDOzRNQKdElHS/qWpK2S3l7y+j6SPpW/fpWk1Y231MzMuuoZ6JKWAH8LvBQ4HDhe0uEd1X4b+H5EPAk4DfhA0w01M7Pu6vTQjwS2RsR3I+InwCeBdR111gHn5Y8/A7xAkpprppmZ9aKI6F5BOhY4OiLekD8/ATgqIk4s1Lkhr7M9f/6dvM5dHdOaBtonOv8s8K2m3oiZ2SJxaERMlL0w1NMWI2IGmBnmPM3MFos6Qy63ASsLz1fkZaV1JO0NHAjc3UQDzcysnjqB/g1gjaTDJD0KOA7Y2FFnI/Bb+eNjgX+PXmM5ZmbWqJ5DLhGxR9KJwKXAEuCciLhR0nuBLRGxETgbuEDSVmAXWeibmdkQ9fxS1MzMFgZfKWpmlggHuplZIhzoi4CkR0taJ+nlkvZraJpP73i+r6Rfz28TsVehfH9Jj5L0SkmHNDHvQZH0eEkHSHqKpCfWqH9Y/v6OkbRiGG0062bkgS7pMV1ee8TVpvmtCOpOe6/89+Ml7VMof6yk/UvqP7rGNA/seP5YSY+tqPuEfsrz11R4vKTdpuJ8Ja3q9veSpiQ9rlD852SnkX4feE9H/dJlVHh9dX4qavv55yR9GDg9/932l2Snrh4DnFwo/13gROAB4G0l0z+s4n0cUbXzkbSyrDx/rbicqpbfQe37DXVM693AucChPHI5PU3S0zpm90fAR4D7gbcW6latdwfkvw+vWP9mvecu7e+5npYpW9bd1t92G0rKypZF8fUDS8pmrUe9plOWC2Xbba9lmr9WliOPq6hbVf5UST9Xt7zHvEuvou9SXrqNlBnZ/dAl/QNwD/B9SY+PiLfl5cWV643k94WR9CrgUcAzJf04Ik7Oy68ELgL+MSLu6ZjN6ZImgH8Fngn8nqR3k92TZj9Jn42Iswv1/03SFfm0HrqKtbDSCVgP/Gle/s68PCRFRLw/L28H3RGSro2IP+xR/iGyc/c/AjwPOCOv93fAjyR9Pm9zu/w8STcBGyPi0kI73wwsBb4LvB/4/fylHwJfyR+/qNcyyqf1OmDf/Ocw4M15/Y/lZdsi4ozCdHYB5wBvAorftB9AtjO5FdhTaOs5ZJ9/57I4Ka/3E+AEoF3+8vafAi8DfqcwrdLPp8vy+2vgFkm3ABOF8luAr0XEJklPKUz/L4Ebsoc6vr3u5e0XcCXwq72WKXCapAC+THbF9End3nOX9letp6XbQpdlXbr+5q9VbW+ly6LLNlK6HuXTuTF/XJxOVS68m/LttmqZVuXI64EHgSdLWhoRf9ytPH/tQ8D/AT8t6ZqI+HCN8kdsz/l7PoBs3VsZEW/pUV76uXUzyh76zcAD+Up0a6H8c2QfylvJFkbbCuCJ+Zu6r1D+aeCfgbfnC7JoG/CV/MPfmpftiojfADYBnT2TT5H1NI+R9NFC+XuAyfyneGj9QET8eUS8jyw0274L/D1wSceH0C7/bEf5XfmtFV4JrCnWzz/cw4GjCuWXAH8A7C/prEL5zwAHRsRn8/fednH+vv4C+GzHe97GI5cRZL3tx0bEacD/tgsjYlP+Pjp7Q1/Pp38J8KVC+XnAcrIALv5LpMvzup3LaD+yDevjZCtz2yvz5/cAP+qYd9XnU7X8vh0Rp+aPjy6UXxoR7TZeVyi/NyI+HhEXAPcWyr9KFqC/R7Y+t22jfJluBXZHxPnMvjiv6j1Xtb9qPa3aFqqWddX6C9XbW9WyqPoMStejfDoXlEynKheqttuqZVqVI8uA/SLiHcCOGuUA/xsRf0J2TU7UKK/anu8C7oiIDwLba5S3P7fOvKg0yv9YtD+wR9KfMXsFODUiLpM0CXy+UL4SuF3Z2O3/FMrvjIj/lnQh8O2OeXyVhz+cO/Pfe0v6GNmKcF1H/e/kPZvTNHtY41RgZ0Rsy3t1bXflG84k2aE6ABFxpqTjgRcDf1OofyHwa8A6SRdGxM68fFv+eyPw2kL9G/LDsOuB4nDItRHxIFlQX1wov5xsaAPgqkJ7rgWupdx/Al+XNAV8r1C+m6zXDR1XBkdEC2h1lH2J2UHeLt/G7CGYdvkFkl4I/FTeK2lvvJcCrwfOJ9so206NiG8DSPqvjsmdGhHfyF8rfj435PM6Q9IphfKv5OUXSnroqCEibio8vrRQ/9pCQLYKdb6QP/y3jvZ8JSK+lj++o1C+C9ieH0ntWyi/jGy45/kd7/k2SatL2r8LOCgiOtfTXWTr+8d5+LMD+ALwah65rNvr72qy4Ch6KvC9/EiluL218r+ZAj5YKP8LYAtZeJ5eKL8PaA+fFAP9FkkXka37rUL5PsADkt5C1stt21fS2Xnd4nQeBHaXLNO/I1sfDyVbvg/Nl+yOsa8iW9fargVeI2kd8AlmuzvvOP092c63LSR9CfizjvLtyoaCLiYbbmxrkR1dPZ1s+LNtG3CppF+gsJ3n28iLgfWSzi8ZgXiEUfbQl5EFYWfQnJgPTZwG/GahfA1ZT+904BmF8uPy+qcD7+uY1uvJFsbxwEF52fOAH5AF60s66u+l7IvDdcA7C+XThelMFspfRbYMlwBHtAslXUJ2l8r9NXuc+V1kd6b8EPDekvfwIQrDEsCTyA6/DyI7XAUgIq6gRER8LiL+NX/8H2V1Sjwrn8fjmD1ssI1sQ1kHPLfmtGrLh1D2I9vxFMNqOfAvwAXF+bbDPH9cDEmAQwqf2wmF8iWF8pWFv7+y8PjTNZq7FLgi/6mzLJ5YmG+x/u1kvdHvAcUgnibbyVxM9jm0PY+H17t7CuXPLZS/olD+TLKOxSpmr78fIBtauoGs19i2lizMzwZ+ueM9/IBsXT8UeHahfA1ZoL4PeE6h/FlkveGDgNcVyg8HHuzYBiHbeV0DXN3xno8CDsnbWQz0Ffnff4dsZ1Jsz4NkvdzdhfLn8PCyOKGjPMi2teLY9In5NPZh9jYO2c7xi3kbXlMo/3myoZ4nU9j+efhzWEE2JNP2B/l8TydbLm3PLbR1ebsw30k9lezzO5UaRhnotwAXFQ7h2z5GNibZeXj4MbKF13n4UVUfyg+t29O5uKR+1WF91SF61bRmKsr7fc+HUT6E0qSqebyC6iGOJlQt61eS9V76mW+3aZWV96vfNvXbnm/nQwzB7PWrar2rKq9av7ZWTP8W4DMl9btNq2p9qRrua7+3svdQ1qaqbaqqrVXTqWp/1XR65ULZ51/13npt550ZVlW/aplWi4iR/ACHFx6/pOO1SeDkkr/pt/xXCo9fXaP+msLjn+o1nX7b1O97JuvNvzB//LwBfQ6l86haFg3Ot2pZ9z3fJqfVz/Sbak+X9bTf8tL1q9/6PaZVtb5Ulff1HvLntbedOSyLueROv59bv9t5X8u6248v/TczS8TIz0M3M7NmONDNzBLhQDczS4QD3cwsEQ50M7NE/D9QHDrSf1vc+AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from scipy.cluster.hierarchy import dendrogram, linkage\n", "Z = linkage(similarity_matrix, 'ward')\n", "pd.DataFrame(Z)\n", "\n", "# but to draw dendrogram?\n", "dendrogram(Z)\n", "# Don't like the graph in python" ] } ], "metadata": { "kernelspec": { "display_name": "python-notes", "language": "python", "name": "python-notes" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }