names mvp

jrloom · jrloom · Mar 27, 2020 · Mar 27, 2020 · Mar 27, 2020 · Mar 27, 2020
commit 4ae767d6df618c07bc48a02b5c8db1dc924ea7f8
diff --git a/names/names.py b/names/names.py
@@ -1,26 +1,43 @@
 import time
+from binary_search_tree import BinarySearchTree
 
 start_time = time.time()
 
-f = open('names_1.txt', 'r')
+f = open("names_1.txt", "r")
 names_1 = f.read().split("\n")  # List containing 10000 names
 f.close()
 
-f = open('names_2.txt', 'r')
+f = open("names_2.txt", "r")
 names_2 = f.read().split("\n")  # List containing 10000 names
 f.close()
 
 duplicates = []  # Return the list of duplicates in this data structure
 
 # Replace the nested for loops below with your improvements
-for name_1 in names_1:
-    for name_2 in names_2:
-        if name_1 == name_2:
-            duplicates.append(name_1)
+# for name_1 in names_1:
+#     for name_2 in names_2:
+#         if name_1 == name_2:
+#             duplicates.append(name_1)
+
+bst = BinarySearchTree(names_1[0])
+for name in names_1[1:]:
+    bst.insert(name)
+
+duplicates = []
+for name in names_2:
+    if bst.contains(name):
+        duplicates.append(name)
 
 end_time = time.time()
-print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
-print (f"runtime: {end_time - start_time} seconds")
+print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
+print(f"runtime: {end_time - start_time} seconds")
+
+# * Big O
+
+# * Original: 6.5s, O(n^2)
+# * The outer loop cycles through all of the names in the first list.
+# * The inner loop cycles through all of the names in the second list, and does so for each name in the first list.
+
 
 # ---------- Stretch Goal -----------
 # Python has built-in tools that allow for a very efficient approach to this problem