carlson9
diff --git a/‎Day7/DataStructure.py‎
Lines changed: 1 addition & 0 deletions b/‎Day7/DataStructure.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Day7/DataStructure.py~‎
Lines changed: 1 addition & 1 deletion b/‎Day7/DataStructure.py~‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Day7/lab7_dc.py‎
Lines changed: 169 additions & 0 deletions b/‎Day7/lab7_dc.py‎
Lines changed: 169 additions & 0 deletions
diff --git a/‎Day7/map-reduce.py‎
Lines changed: 1 addition & 1 deletion b/‎Day7/map-reduce.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Day7/map-reduce.py~‎
Lines changed: 5 additions & 0 deletions b/‎Day7/map-reduce.py~‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Day8/lab.py‎
Lines changed: 29 additions & 0 deletions b/‎Day8/lab.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎Day8/lab.py~‎
Lines changed: 29 additions & 0 deletions b/‎Day8/lab.py~‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎Day8/naivebayes.py‎
Lines changed: 128 additions & 0 deletions b/‎Day8/naivebayes.py‎
Lines changed: 128 additions & 0 deletions
@@ -155,6 +155,7 @@ def add_branch(self,node,children):
 mytree.branches
 mytree.add_branch(node1,[node2,node3])
 mytree.add_branch(node2,[node4,node5])
+mytree.add_branch(node3,[node4,node5])
 
 ###				Graph				###
 
 
@@ -4,7 +4,7 @@
 
 my_tuple=(1,'b',3,'d',5,'b')
 
-mytuple[0] #Gives the element with index number 0
+my_tuple[0] #Gives the element with index number 0
 my_tuple.index('b') #Gives the index of 'b' - only the first occurence!
 my_tuple.count('b') #Gives the number of times 'b' occurs
 
 
@@ -0,0 +1,169 @@
+"""Data Structures
+Working with Graphs/Networks"""
+
+def makeLink(G, node1, node2):
+  if node1 not in G:
+    G[node1] = {}
+  (G[node1])[node2] = 1
+  if node2 not in G:
+    G[node2] = {}
+  (G[node2])[node1] = 1
+  return G 
+
+# Ring Network
+ring = {} # empty graph 
+
+n = 5 # number of nodes 
+
+# Add in edges
+for i in range(n):
+  ring = makeLink(ring, i, (i+1)%n)
+
+# How many nodes?
+print len(ring)
+
+# How many edges?
+print sum([len(ring[node]) for node in ring.keys()])/2 
+
+
+# Grid Network
+# TODO: create a square graph with 256 nodes and count the edges 
+square = {}
+n=15
+for i in range(n):
+    for k in range(n):
+        square = makeLink(square, 16*i+k, 16*i+k+1)
+        square = makeLink(square, 16*i+k, 16*(i+1)+k)
+    square = makeLink(square, 16*i+15, 16*(i+1)+15)
+    square = makeLink(square, 16*15+i, 16*15+i+1)
+    
+square={}
+	
+for i in range(1,256):
+	if i%16!=0:
+		makeLink(square,i,i+1)
+	if (i-1)/16<15:
+		makeLink(square,i,i+16)
+		
+def make_square_graph(n):
+	square={}
+	for i in range(1,n**2):
+		if i%n!=0:
+			makeLink(square,i,i+1)
+		if (i-1)/n<n-1:
+			makeLink(square,i,i+n)
+	return square
+
+
+# TODO: define a function countEdges
+def count_edges(graph):
+    return reduce((lambda x, y : x + y), map(len, graph.values()))/2
+
+print "There are %d edges in the square"%count_edges(square)
+
+# Social Network
+class Actor(object):
+  def __init__(self, name):
+    self.name = name 
+
+  def __repr__(self):
+    return self.name 
+
+ss = Actor("Susan Sarandon")
+jr = Actor("Julia Roberts")
+kb = Actor("Kevin Bacon")
+ah = Actor("Anne Hathaway")
+rd = Actor("Robert DiNero")
+ms = Actor("Meryl Streep")
+dh = Actor("Dustin Hoffman")
+
+movies = {}
+
+makeLink(movies, dh, rd) # Wag the Dog
+makeLink(movies, rd, ms) # Marvin's Room
+makeLink(movies, dh, ss) # Midnight Mile
+makeLink(movies, dh, jr) # Hook
+makeLink(movies, dh, kb) # Sleepers
+makeLink(movies, ss, jr) # Stepmom
+makeLink(movies, kb, jr) # Flatliners
+makeLink(movies, kb, ms) # The River Wild
+makeLink(movies, ah, ms) # Devil Wears Prada
+makeLink(movies, ah, jr) # Valentine's Day
+
+# How many nodes in movies?
+print "There are %d nodes in movies"%len(movies)
+# How many edges in movies?
+print "There are %d edges in movies"%count_edges(movies)
+
+def tour(graph, nodes):
+  for i in range(len(nodes)):
+    node = nodes[i] 
+    if node in graph.keys():
+      print node 
+    else:
+      print "Node not found!"
+      break 
+    if i+1 < len(nodes):
+      next_node = nodes[i+1]
+      if next_node in graph.keys():
+        if next_node in graph[node].keys():
+          pass 
+        else:
+          print "Can't get there from here!"
+          break 
+
+# TODO: find an Eulerian tour of the movie network and check it 
+movie_tour = [kb,ms,rd,dh,kb,jr,dh,ss,jr,ah,ms] 
+tour(movies, movie_tour)
+
+
+def findPath(graph, start, end, path=[]):
+        path = path + [start]
+        if start == end:
+            return path
+        if not graph.has_key(start):
+            return None
+        for node in graph[start]:
+            if node not in path:
+                newpath = findPath(graph, node, end, path)
+                if newpath: return newpath
+        return None
+
+print findPath(movies, jr, ms)
+
+
+# TODO: implement findShortestPath()
+def findShortestPath(graph, start, end, path=[]):
+        path = path + [start]
+        if start == end:
+            return path
+        if not graph.has_key(start):
+            return None
+        newpath=[]
+        for node in graph[start]:
+            if node not in path:
+                newpath.append(findShortestPath(graph, node, end, path))
+        newpath = filter(None, newpath)
+        if newpath: return min(newpath, key=len)
+        return None
+
+print findShortestPath(movies, ms, ss)
+print findShortestPath(movies, rd, ah)
+
+# TODO: implement findAllPaths() to find all paths between two nodes
+def findAllPaths(graph, start, end, path=[]):
+        path = path + [start]
+        if start == end:
+            return path
+        if not graph.has_key(start):
+            return None
+        newpath=[]
+        for node in graph[start]:
+            if node not in path:
+                newpath.append(findAllPaths(graph, node, end, path))
+        newpath = filter(None, newpath)
+        if newpath: return newpath
+        return None
+allPaths = findAllPaths(movies, jr, ms)
+for path in allPaths:
+   print path
@@ -11,7 +11,7 @@ def cub(x): return x**3
 	mylist.append(sqr(x))
 
 mylist=map(sqr, items)
-mylist=map((lambda x: x **2), items)
+mylist=map(lambda x: x **2, items)
 
 funcs = [sqr, cub]
 for i in items:
 
@@ -39,4 +39,9 @@ g = make_incrementor(6)
 
 print f(42), g(42)
 
+nums = range(2, 50) 
+for i in range(2, 8): 
+    nums = filter(lambda x: x == i or x % i, nums)
+
+print nums
 
@@ -0,0 +1,29 @@
+import re
+
+# open text file of 2008 NH primary Obama speech
+file = open("obama-nh.txt", "r")
+text = file.readlines()
+file.close()
+
+# compile the regular expression
+keyword = re.compile(r"the ")
+
+# search file for keyword, line by line
+for line in text:
+  if keyword.search(line):
+    print line 
+
+# TODO: print all lines that DO NOT contain "the "
+# TODO: print lines that contain a word of any length starting with s and ending with e
+  
+# date = raw_input("Please enter a date in the format MM.DD.YY: ")
+# Print the date input in the following format:
+# Month: MM
+# Day: DD
+# Year: YY
+
+# TODO: Write a regular expression that finds html tags in example.html and print them.
+
+# TODO: Scrape a website and search for some things...
+
+
@@ -0,0 +1,29 @@
+import re
+
+# open text file of 2008 NH primary Obama speech
+file = open("obama-nh.txt", "r")
+text = file.readlines()
+file.close()
+
+# compile the regular expression
+keyword = re.compile(r"the ")
+
+# search file for keyword, line by line
+for line in text:
+  if keyword.search(line):
+    print line 
+
+# TODO: print all lines that DO NOT contain "the "
+# TODO: print lines that contain a word of any length starting with s and ending with e
+  
+# date = raw_input("Please enter a date in the format MM.DD.YY: ")
+# Print the date input in the following format:
+# Month: MM
+# Day: DD
+# Year: YY
+
+# TODO: Write a regular expression that finds html tags in example.html and print them.
+
+# TODO: Scrape a website and search for somethings...
+
+
@@ -0,0 +1,128 @@
+# Some docs for this library: http://nltk.org/api/nltk.classify.html#module-nltk.classify.naivebayes
+# pip install nltk
+
+import nltk
+nltk.download('names')
+from nltk.corpus import names
+import random
+
+names = ([(name, 'male') for name in names.words('male.txt')] +
+  [(name, 'female') for name in names.words('female.txt')])
+
+random.shuffle(names)
+
+# Our simple feature
+def gender_features(word):
+  return {'last_letter': word[-1]}
+
+featuresets = [(gender_features(n), g) for (n,g) in names]
+train_set, test_set = featuresets[500:], featuresets[:500]
+classifier = nltk.NaiveBayesClassifier.train(train_set)
+
+classifier.classify(gender_features('Neo'))
+classifier.classify(gender_features('Trinity'))
+classifier.classify(gender_features('Max'))
+classifier.classify(gender_features('Lucy'))
+
+# Check the overall accuracy
+print nltk.classify.accuracy(classifier, test_set)
+
+# Lets see what is driving this
+classifier.show_most_informative_features(5)
+
+
+# Lets be smarter
+def gender_features2(name):
+  features = {}
+  features["firstletter"] = name[0].lower()
+  features["lastletter"] = name[-1].lower()
+  for letter in 'abcdefghijklmnopqrstuvwxyz':
+      features["count(%s)" % letter] = name.lower().count(letter)
+      features["has(%s)" % letter] = (letter in name.lower())
+  return features
+
+featuresets = [(gender_features2(n), g) for (n,g) in names]
+train_set, test_set = featuresets[500:], featuresets[:500]
+classifier = nltk.NaiveBayesClassifier.train(train_set)
+print nltk.classify.accuracy(classifier, test_set)
+
+classifier.show_most_informative_features(100)
+
+
+# Still not great.... How can we refine?
+train_names = names[1500:]
+devtest_names = names[500:1500]
+test_names = names[:500]
+train_set = [(gender_features2(n), g) for (n,g) in train_names]
+devtest_set = [(gender_features2(n), g) for (n,g) in devtest_names]
+test_set = [(gender_features2(n), g) for (n,g) in test_names]
+classifier = nltk.NaiveBayesClassifier.train(train_set)
+print nltk.classify.accuracy(classifier, devtest_set)
+
+# Lets look at the errors and see if we can do better
+errors = []
+for (name, tag) in devtest_names:
+  guess = classifier.classify(gender_features(name))
+  if guess != tag:
+    errors.append( (tag, guess, name) )
+
+    for (tag, guess, name) in sorted(errors):
+      print 'correct=%-8s guess=%-8s name=%-30s' % (tag, guess, name)
+
+# yn seems to be female even though n seems to be male.  ch tends to be male even though h is female
+def gender_features(word):
+  return {'suffix1': word[-1:],
+          'suffix2': word[-2:]}
+train_set = [(gender_features(n), g) for (n,g) in train_names]
+devtest_set = [(gender_features(n), g) for (n,g) in devtest_names]
+classifier = nltk.NaiveBayesClassifier.train(train_set)
+print nltk.classify.accuracy(classifier, devtest_set)
+
+
+# Now lets look at some bigger documents
+from nltk.corpus import movie_reviews
+nltk.download('movie_reviews')
+documents = [(list(movie_reviews.words(fileid)), category)
+              for category in movie_reviews.categories()
+              for fileid in movie_reviews.fileids(category)]
+random.shuffle(documents)
+
+all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
+word_features = all_words.keys()[:2000]
+
+def document_features(document):
+  document_words = set(document)
+  features = {}
+  for word in word_features:
+      features['contains(%s)' % word] = (word in document_words)
+  return features
+
+print document_features(movie_reviews.words('pos/cv957_8737.txt'))
+
+featuresets = [(document_features(d), c) for (d,c) in documents]
+train_set, test_set = featuresets[100:], featuresets[:100]
+classifier = nltk.NaiveBayesClassifier.train(train_set)
+
+print nltk.classify.accuracy(classifier, test_set)
+
+classifier.show_most_informative_features(5)
+
+# Copyright (c) 2014 Matt Dickenson
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.