Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions robin/caesar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/python

import string
from quadgram import Scorer


ALPHABET = list(string.uppercase)


def decrypt(key, text):
table = dict(zip(ALPHABET[key:]+ALPHABET[:key], ALPHABET))
return ''.join(table.get(ch, ch) for ch in text.upper())


def solve(text, scorer=None):
if scorer is None:
scorer = Scorer.make_pretrained()

best_key = None
best_score = None

for key in range(len(ALPHABET)):
score = scorer.score(decrypt(key, text))
if best_key is None or score > best_score:
best_key = key
best_score = score

return best_key


if __name__ == '__main__':
import sys
if len(sys.argv) == 2:
with open(sys.argv[1]) as f:
text = f.read()
key = solve(text)
print key
print decrypt(key, text)
else:
print >>sys.stderr, "Usage %s filename" % sys.argv[0]
44 changes: 44 additions & 0 deletions robin/polysubstitution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/python

import string

ALPHABET = list(string.uppercase)


def get_letters(chars):
for ch in chars:
ch = ch.upper()
if ch in ALPHABET:
yield ch


def get_freq_dicts(chars, period):
freq_dicts = []
for i in range(period):
freq_dicts.append(dict((ch, 0) for ch in ALPHABET))

for i, ch in enumerate(get_letters(chars)):
freq_dicts[i % period][ch] += 1

return freq_dicts


def get_index_of_coincidence(chars, period):
ics = []
for freq_dict in get_freq_dicts(chars, period):
counts = freq_dict.values()
total = float(sum(counts))
ics.append(sum(count*(count-1) for count in counts) / (total * (total-1)))
return sum(ics) / len(ics)


if __name__ == '__main__':
import sys

if len(sys.argv) == 2:
with open(sys.argv[1]) as f:
text = f.read()
for period in range(1, 20):
print "%3d\t%04f" % (period, get_index_of_coincidence(text, period))
else:
print >>sys.stderr, "Usage %s filename" % sys.argv[0]
86 changes: 86 additions & 0 deletions robin/quadgram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from collections import defaultdict
from math import log

class Scorer(object):
def __init__(self):
self.total = 0
self.freqs = defaultdict(int)

@classmethod
def make_pretrained(cls, filename=None):

if filename is None:
from os.path import dirname, join
filename = join(dirname(__file__), "quadgram_freqs.txt")

scorer = cls()
with open(filename) as f:
for line in f:
if not line.startswith('#'):
parts = line.split()
seq = parts[0]
count = int(parts[1])
scorer.freqs[seq] = count
scorer.total += count
return scorer

def populate(self, chars):
for seq in get_quadgrams(chars):
self.total += 1
self.freqs[seq] += 1

def populate_from_file(self, filename):
self.populate(read_filechars(filename))

def score(self, text):
p = 0.0
divisor = float(self.total)
for seq in get_quadgrams(text):
if seq in self.freqs:
count = self.freqs[seq]
else:
count = 0.1

p += log(count / divisor)
return p


def read_filechars(filename):
with open(filename) as f:
while True:
ch = f.read(1)
if not ch: break
yield ch


def get_quadgrams(chars):
seq = ""
for ch in chars:
if ch.isdigit():
ch = '.'
elif ch.isalpha():
ch = ch.upper()
else:
continue

if len(seq) == 4:
seq = seq[1:]

seq = seq + ch

if '.' not in seq:
yield seq


if __name__ == "__main__":
import sys

scorer = Scorer()

for filename in sys.argv[1:]:
scorer.populate_from_file(filename)

quadgrams = list(scorer.freqs.keys())
quadgrams.sort()
for quadgram in quadgrams:
sys.stdout.write("%s\t%d\n" % (quadgram, scorer.freqs[quadgram]))
Loading