Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
name with sprint
  • Loading branch information
nomadic4life committed Jul 26, 2019
commit 4404d3667be1338e44a29915b3d96845426cf1e6
163 changes: 147 additions & 16 deletions names/names.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,75 @@
import time
from collections import Counter


class BinarySearchTree:
def __init__(self, value):
self.value = value
self.left = None
self.right = None

def insert(self, value):
if self.value == None:
self.value == value
elif value < self.value:
if self.left == None:
self.left = BinarySearchTree(value)
else:
self.left.insert(value)
else:
if self.right == None:
self.right = BinarySearchTree(value)
else:
self.right.insert(value)

def contains(self, target):
if self.value == target:
return True
elif target < self.value:
if self.left != None:
return self.left.contains(target)
else:
return False
else:
if self.right != None:
return self.right.contains(target)
else:
return False

def get_max(self):
if self.right == None:
return self.value
else:
return self.right.get_max()

def for_each(self, cb):
cb(self.value)
if self.left != None:
self.left.for_each(cb)
if self.right != None:
self.right.for_each(cb)


# original solution
# start_time = time.time()
start_time = time.time()

# f = open('names_1.txt', 'r')
# names_1 = f.read().split("\n") # List containing 10000 names
# f.close()
f = open('names_1.txt', 'r')
names_1 = f.read().split("\n") # List containing 10000 names
f.close()

# f = open('names_2.txt', 'r')
# names_2 = f.read().split("\n") # List containing 10000 names
# f.close()
f = open('names_2.txt', 'r')
names_2 = f.read().split("\n") # List containing 10000 names
f.close()

# duplicates = []
# for name_1 in names_1:
# for name_2 in names_2:
# if name_1 == name_2:
# duplicates.append(name_1)
duplicates = []
for name_1 in names_1:
for name_2 in names_2:
if name_1 == name_2:
duplicates.append(name_1)

# end_time = time.time()
# print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
# print (f"runtime: {end_time - start_time} seconds")
end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# first pass solution
start_time = time.time()
Expand All @@ -43,7 +93,88 @@
counts[elem] = 1
continue
counts[elem] += 1
duplicates = [k for k, v in counts.items() if v > 1]
duplicates = [key for key, value in counts.items() if value > 1]

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")


# second pass solution with BST
start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n") # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n") # List containing 10000 names
f.close()

bts_names_2 = BinarySearchTree(names_2[0])

for name_1 in names_2[1:]:
bts_names_2.insert(name_1)

duplicates = []

for name_1 in names_1:
if bts_names_2.contains(name_1):
duplicates.append(name_1)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# stretch
start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n") # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n") # List containing 10000 names
f.close()

names_2.sort()


def sub(arr, x):
L = 0
R = len(arr)
m = -1
while L < R:
m = (L+R)//2
if x <= arr[m]:
R = m
else:
L = m + 1
return arr[L] == x


duplicates = []

for elem in names_1:
if sub(names_2, elem):
duplicates.append(elem)

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print(f"runtime: {end_time - start_time} seconds")

# stretch 2
start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n") # List containing 10000 names
f.close()

f = open('names_2.txt', 'r')
names_2 = f.read().split("\n") # List containing 10000 names
f.close()

duplicates = list((Counter(names_1) & Counter(names_2)))

end_time = time.time()
print(f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
Expand Down