Skip to content

Commit d9ee620

Browse files
authored
Merge pull request JoshData#18 from maciejsszmigiero/add-bottom-margin
Add bottom margin parameter like an existing top margin one
2 parents 9e06731 + f690244 commit d9ee620

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

pdf_diff/command_line.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
from lxml import etree
1010
from PIL import Image, ImageDraw, ImageOps
1111

12-
def compute_changes(pdf_fn_1, pdf_fn_2, top_margin=0):
12+
def compute_changes(pdf_fn_1, pdf_fn_2, top_margin=0, bottom_margin=100):
1313
# Serialize the text in the two PDFs.
14-
docs = [serialize_pdf(0, pdf_fn_1, top_margin), serialize_pdf(1, pdf_fn_2, top_margin)]
14+
docs = [serialize_pdf(0, pdf_fn_1, top_margin, bottom_margin), serialize_pdf(1, pdf_fn_2, top_margin, bottom_margin)]
1515

1616
# Compute differences between the serialized text.
1717
diff = perform_diff(docs[0][1], docs[1][1])
1818
changes = process_hunks(diff, [docs[0][0], docs[1][0]])
1919

2020
return changes
2121

22-
def serialize_pdf(i, fn, top_margin):
23-
box_generator = pdf_to_bboxes(i, fn, top_margin)
22+
def serialize_pdf(i, fn, top_margin, bottom_margin):
23+
box_generator = pdf_to_bboxes(i, fn, top_margin, bottom_margin)
2424
box_generator = mark_eol_hyphens(box_generator)
2525

2626
boxes = []
@@ -50,7 +50,7 @@ def serialize_pdf(i, fn, top_margin):
5050
text = "".join(text)
5151
return boxes, text
5252

53-
def pdf_to_bboxes(pdf_index, fn, top_margin=0):
53+
def pdf_to_bboxes(pdf_index, fn, top_margin=0, bottom_margin=100):
5454
# Get the bounding boxes of text runs in the PDF.
5555
# Each text run is returned as a dict.
5656
box_index = 0
@@ -77,6 +77,8 @@ def pdf_to_bboxes(pdf_index, fn, top_margin=0):
7777
for word in page.findall("{http://www.w3.org/1999/xhtml}word"):
7878
if float(word.get("yMax")) < (top_margin/100.0)*float(page.get("height")):
7979
continue
80+
if float(word.get("yMin")) > (bottom_margin/100.0)*float(page.get("height")):
81+
continue
8082

8183
yield {
8284
"index": box_index,
@@ -454,7 +456,9 @@ def main():
454456
parser.add_argument('-f', '--format', choices=['png','gif','jpeg','ppm','tiff'], default='png',
455457
help='output format in which to render (default: png)')
456458
parser.add_argument('-t', '--top-margin', metavar='margin', default=0., type=float,
457-
help='TODO (default 0.0)')
459+
help='top margin (ignored area) end in percent of page height (default 0.0)')
460+
parser.add_argument('-b', '--bottom-margin', metavar='margin', default=100., type=float,
461+
help='bottom margin (ignored area) begin in percent of page height (default 100.0)')
458462
args = parser.parse_args()
459463

460464
def invalid_usage(msg):
@@ -484,7 +488,7 @@ def invalid_usage(msg):
484488
if len(args.files) != 2:
485489
invalid_usage('Insufficient number of files to compare; please supply exactly 2.')
486490

487-
changes = compute_changes(args.files[0], args.files[1], top_margin=float(args.top_margin))
491+
changes = compute_changes(args.files[0], args.files[1], top_margin=float(args.top_margin), bottom_margin=float(args.bottom_margin))
488492
img = render_changes(changes, style)
489493
img.save(sys.stdout.buffer, args.format.upper())
490494

0 commit comments

Comments
 (0)