Skip to content

Commit 50bfd19

Browse files
committed
Added code for chapter 5
1 parent 4c8e34c commit 50bfd19

File tree

7 files changed

+941
-0
lines changed

7 files changed

+941
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:da1d1d0a3525d88d6a1a9f3c3e952828c69f420cc6272f4ed1cd45e348d28b22
3+
size 385918

Chapter-5/contractions.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Mon Aug 01 01:11:02 2016
4+
5+
@author: DIP
6+
"""
7+
8+
CONTRACTION_MAP = {
9+
"ain't": "is not",
10+
"aren't": "are not",
11+
"can't": "cannot",
12+
"can't've": "cannot have",
13+
"'cause": "because",
14+
"could've": "could have",
15+
"couldn't": "could not",
16+
"couldn't've": "could not have",
17+
"didn't": "did not",
18+
"doesn't": "does not",
19+
"don't": "do not",
20+
"hadn't": "had not",
21+
"hadn't've": "had not have",
22+
"hasn't": "has not",
23+
"haven't": "have not",
24+
"he'd": "he would",
25+
"he'd've": "he would have",
26+
"he'll": "he will",
27+
"he'll've": "he he will have",
28+
"he's": "he is",
29+
"how'd": "how did",
30+
"how'd'y": "how do you",
31+
"how'll": "how will",
32+
"how's": "how is",
33+
"I'd": "I would",
34+
"I'd've": "I would have",
35+
"I'll": "I will",
36+
"I'll've": "I will have",
37+
"I'm": "I am",
38+
"I've": "I have",
39+
"i'd": "i would",
40+
"i'd've": "i would have",
41+
"i'll": "i will",
42+
"i'll've": "i will have",
43+
"i'm": "i am",
44+
"i've": "i have",
45+
"isn't": "is not",
46+
"it'd": "it would",
47+
"it'd've": "it would have",
48+
"it'll": "it will",
49+
"it'll've": "it will have",
50+
"it's": "it is",
51+
"let's": "let us",
52+
"ma'am": "madam",
53+
"mayn't": "may not",
54+
"might've": "might have",
55+
"mightn't": "might not",
56+
"mightn't've": "might not have",
57+
"must've": "must have",
58+
"mustn't": "must not",
59+
"mustn't've": "must not have",
60+
"needn't": "need not",
61+
"needn't've": "need not have",
62+
"o'clock": "of the clock",
63+
"oughtn't": "ought not",
64+
"oughtn't've": "ought not have",
65+
"shan't": "shall not",
66+
"sha'n't": "shall not",
67+
"shan't've": "shall not have",
68+
"she'd": "she would",
69+
"she'd've": "she would have",
70+
"she'll": "she will",
71+
"she'll've": "she will have",
72+
"she's": "she is",
73+
"should've": "should have",
74+
"shouldn't": "should not",
75+
"shouldn't've": "should not have",
76+
"so've": "so have",
77+
"so's": "so as",
78+
"that'd": "that would",
79+
"that'd've": "that would have",
80+
"that's": "that is",
81+
"there'd": "there would",
82+
"there'd've": "there would have",
83+
"there's": "there is",
84+
"they'd": "they would",
85+
"they'd've": "they would have",
86+
"they'll": "they will",
87+
"they'll've": "they will have",
88+
"they're": "they are",
89+
"they've": "they have",
90+
"to've": "to have",
91+
"wasn't": "was not",
92+
"we'd": "we would",
93+
"we'd've": "we would have",
94+
"we'll": "we will",
95+
"we'll've": "we will have",
96+
"we're": "we are",
97+
"we've": "we have",
98+
"weren't": "were not",
99+
"what'll": "what will",
100+
"what'll've": "what will have",
101+
"what're": "what are",
102+
"what's": "what is",
103+
"what've": "what have",
104+
"when's": "when is",
105+
"when've": "when have",
106+
"where'd": "where did",
107+
"where's": "where is",
108+
"where've": "where have",
109+
"who'll": "who will",
110+
"who'll've": "who will have",
111+
"who's": "who is",
112+
"who've": "who have",
113+
"why's": "why is",
114+
"why've": "why have",
115+
"will've": "will have",
116+
"won't": "will not",
117+
"won't've": "will not have",
118+
"would've": "would have",
119+
"wouldn't": "would not",
120+
"wouldn't've": "would not have",
121+
"y'all": "you all",
122+
"y'all'd": "you all would",
123+
"y'all'd've": "you all would have",
124+
"y'all're": "you all are",
125+
"y'all've": "you all have",
126+
"you'd": "you would",
127+
"you'd've": "you would have",
128+
"you'll": "you will",
129+
"you'll've": "you will have",
130+
"you're": "you are",
131+
"you've": "you have"
132+
}
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Sep 04 15:24:26 2016
4+
5+
@author: DIP
6+
"""
7+
8+
from normalization import normalize_corpus, parse_document
9+
from utils import build_feature_matrix, low_rank_svd
10+
import numpy as np
11+
12+
13+
toy_text = """
14+
Elephants are large mammals of the family Elephantidae
15+
and the order Proboscidea. Two species are traditionally recognised,
16+
the African elephant and the Asian elephant. Elephants are scattered
17+
throughout sub-Saharan Africa, South Asia, and Southeast Asia. Male
18+
African elephants are the largest extant terrestrial animals. All
19+
elephants have a long trunk used for many purposes,
20+
particularly breathing, lifting water and grasping objects. Their
21+
incisors grow into tusks, which can serve as weapons and as tools
22+
for moving objects and digging. Elephants' large ear flaps help
23+
to control their body temperature. Their pillar-like legs can
24+
carry their great weight. African elephants have larger ears
25+
and concave backs while Asian elephants have smaller ears
26+
and convex or level backs.
27+
"""
28+
29+
30+
from gensim.summarization import summarize, keywords
31+
32+
def text_summarization_gensim(text, summary_ratio=0.5):
33+
34+
summary = summarize(text, split=True, ratio=summary_ratio)
35+
for sentence in summary:
36+
print sentence
37+
38+
docs = parse_document(toy_text)
39+
text = ' '.join(docs)
40+
text_summarization_gensim(text, summary_ratio=0.4)
41+
42+
43+
44+
sentences = parse_document(toy_text)
45+
norm_sentences = normalize_corpus(sentences,lemmatize=False)
46+
47+
total_sentences = len(norm_sentences)
48+
print 'Total Sentences in Document:', total_sentences
49+
50+
51+
52+
num_sentences = 3
53+
num_topics = 2
54+
55+
vec, dt_matrix = build_feature_matrix(sentences,
56+
feature_type='frequency')
57+
58+
td_matrix = dt_matrix.transpose()
59+
td_matrix = td_matrix.multiply(td_matrix > 0)
60+
61+
u, s, vt = low_rank_svd(td_matrix, singular_count=num_topics)
62+
63+
sv_threshold = 0.5
64+
min_sigma_value = max(s) * sv_threshold
65+
s[s < min_sigma_value] = 0
66+
67+
salience_scores = np.sqrt(np.dot(np.square(s), np.square(vt)))
68+
print np.round(salience_scores, 2)
69+
70+
top_sentence_indices = salience_scores.argsort()[-num_sentences:][::-1]
71+
top_sentence_indices.sort()
72+
print top_sentence_indices
73+
74+
for index in top_sentence_indices:
75+
print sentences[index]
76+
77+
78+
def lsa_text_summarizer(documents, num_sentences=2,
79+
num_topics=2, feature_type='frequency',
80+
sv_threshold=0.5):
81+
82+
vec, dt_matrix = build_feature_matrix(documents,
83+
feature_type=feature_type)
84+
85+
td_matrix = dt_matrix.transpose()
86+
td_matrix = td_matrix.multiply(td_matrix > 0)
87+
88+
u, s, vt = low_rank_svd(td_matrix, singular_count=num_topics)
89+
min_sigma_value = max(s) * sv_threshold
90+
s[s < min_sigma_value] = 0
91+
92+
salience_scores = np.sqrt(np.dot(np.square(s), np.square(vt)))
93+
top_sentence_indices = salience_scores.argsort()[-num_sentences:][::-1]
94+
top_sentence_indices.sort()
95+
96+
for index in top_sentence_indices:
97+
print sentences[index]
98+
99+
100+
101+
102+
import networkx
103+
104+
num_sentences = 3
105+
vec, dt_matrix = build_feature_matrix(norm_sentences,
106+
feature_type='tfidf')
107+
similarity_matrix = (dt_matrix * dt_matrix.T)
108+
print np.round(similarity_matrix.todense(), 2)
109+
110+
similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
111+
112+
networkx.draw_networkx(similarity_graph)
113+
114+
scores = networkx.pagerank(similarity_graph)
115+
116+
ranked_sentences = sorted(((score, index)
117+
for index, score
118+
in scores.items()),
119+
reverse=True)
120+
ranked_sentences
121+
122+
top_sentence_indices = [ranked_sentences[index][1]
123+
for index in range(num_sentences)]
124+
top_sentence_indices.sort()
125+
print top_sentence_indices
126+
127+
for index in top_sentence_indices:
128+
print sentences[index]
129+
130+
131+
def textrank_text_summarizer(documents, num_sentences=2,
132+
feature_type='frequency'):
133+
134+
vec, dt_matrix = build_feature_matrix(norm_sentences,
135+
feature_type='tfidf')
136+
similarity_matrix = (dt_matrix * dt_matrix.T)
137+
138+
similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
139+
scores = networkx.pagerank(similarity_graph)
140+
141+
ranked_sentences = sorted(((score, index)
142+
for index, score
143+
in scores.items()),
144+
reverse=True)
145+
146+
top_sentence_indices = [ranked_sentences[index][1]
147+
for index in range(num_sentences)]
148+
top_sentence_indices.sort()
149+
150+
for index in top_sentence_indices:
151+
print sentences[index]
152+
153+
154+
DOCUMENT = """
155+
The Elder Scrolls V: Skyrim is an open world action role-playing video game
156+
developed by Bethesda Game Studios and published by Bethesda Softworks.
157+
It is the fifth installment in The Elder Scrolls series, following
158+
The Elder Scrolls IV: Oblivion. Skyrim's main story revolves around
159+
the player character and their effort to defeat Alduin the World-Eater,
160+
a dragon who is prophesied to destroy the world.
161+
The game is set two hundred years after the events of Oblivion
162+
and takes place in the fictional province of Skyrim. The player completes quests
163+
and develops the character by improving skills.
164+
Skyrim continues the open world tradition of its predecessors by allowing the
165+
player to travel anywhere in the game world at any time, and to
166+
ignore or postpone the main storyline indefinitely. The player may freely roam
167+
over the land of Skyrim, which is an open world environment consisting
168+
of wilderness expanses, dungeons, cities, towns, fortresses and villages.
169+
Players may navigate the game world more quickly by riding horses,
170+
or by utilizing a fast-travel system which allows them to warp to previously
171+
Players have the option to develop their character. At the beginning of the game,
172+
players create their character by selecting one of several races,
173+
including humans, orcs, elves and anthropomorphic cat or lizard-like creatures,
174+
and then customizing their character's appearance.discovered locations. Over the
175+
course of the game, players improve their character's skills, which are numerical
176+
representations of their ability in certain areas. There are eighteen skills
177+
divided evenly among the three schools of combat, magic, and stealth.
178+
Skyrim is the first entry in The Elder Scrolls to include Dragons in the game's
179+
wilderness. Like other creatures, Dragons are generated randomly in the world
180+
and will engage in combat.
181+
"""
182+
183+
184+
sentences = parse_document(DOCUMENT)
185+
norm_sentences = normalize_corpus(sentences,lemmatize=True)
186+
print "Total Sentences:", len(norm_sentences)
187+
188+
lsa_text_summarizer(norm_sentences, num_sentences=3,
189+
num_topics=5, feature_type='frequency',
190+
sv_threshold=0.5)
191+
192+
textrank_text_summarizer(norm_sentences, num_sentences=3,
193+
feature_type='tfidf')

0 commit comments

Comments
 (0)