Skip to content

Commit ac67ab6

Browse files
committed
resolved merge conflict
2 parents 894b8d1 + 02de326 commit ac67ab6

File tree

8 files changed

+125
-5
lines changed

8 files changed

+125
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
*.pyc
22
*.swp
33
.DS_Store
4+
build

PyPDF2/generic.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,13 +345,18 @@ def readStringFromStream(stream):
345345
tok = b_("\b")
346346
elif tok == b_("f"):
347347
tok = b_("\f")
348+
elif tok == b_("c"):
349+
tok = b_("\c")
348350
elif tok == b_("("):
349351
tok = b_("(")
350352
elif tok == b_(")"):
351353
tok = b_(")")
354+
elif tok == b_("/"):
355+
tok = b_("/")
352356
elif tok == b_("\\"):
353357
tok = b_("\\")
354-
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]"), b_("#")):
358+
elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["),
359+
b_("]"), b_("#"), b_("_"), b_("&"), b_('$')):
355360
# odd/unnessecary escape sequences we have encountered
356361
tok = b_(tok)
357362
elif tok.isdigit():
@@ -378,7 +383,7 @@ def readStringFromStream(stream):
378383
# line break was escaped:
379384
tok = b_('')
380385
else:
381-
raise utils.PdfReadError("Unexpected escaped string")
386+
raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
382387
txt += tok
383388
return createStringObject(txt)
384389

PyPDF2/pdf.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,78 @@ def addAttachment(self, fname, fdata):
308308
# Update the root
309309
self._root_object.update({
310310
NameObject("/Names"): embeddedFilesDictionary
311-
})
312-
313-
311+
})
312+
313+
def appendPagesFromReader(self, reader, after_page_append=None):
314+
"""
315+
Copy pages from reader to writer. Includes an optional callback parameter
316+
which is invoked after pages are appended to the writer.
317+
318+
:param reader: a PdfFileReader object from which to copy page
319+
annotations to this writer object. The writer's annots
320+
will then be updated
321+
:callback after_page_append (function): Callback function that is invoked after
322+
each page is appended to the writer. Callback signature:
323+
324+
:param writer_pageref (PDF page reference): Reference to the page
325+
appended to the writer.
326+
"""
327+
# Get page count from writer and reader
328+
reader_num_pages = reader.getNumPages()
329+
writer_num_pages = self.getNumPages()
330+
331+
# Copy pages from reader to writer
332+
for rpagenum in range(0, reader_num_pages):
333+
reader_page = reader.getPage(rpagenum)
334+
self.addPage(reader_page)
335+
writer_page = self.getPage(writer_num_pages+rpagenum)
336+
# Trigger callback, pass writer page as parameter
337+
if callable(after_page_append): after_page_append(writer_page)
338+
339+
def updatePageFormFieldValues(self, page, fields):
340+
'''
341+
Update the form field values for a given page from a fields dictionary.
342+
Copy field texts and values from fields to page.
343+
344+
:param page: Page reference from PDF writer where the annotations
345+
and field data will be updated.
346+
:param fields: a Python dictionary of field names (/T) and text
347+
values (/V)
348+
'''
349+
# Iterate through pages, update field values
350+
for j in range(0, len(page['/Annots'])):
351+
writer_annot = page['/Annots'][j].getObject()
352+
for field in fields:
353+
if writer_annot.get('/T') == field:
354+
writer_annot.update({
355+
NameObject("/V"): TextStringObject(fields[field])
356+
})
357+
358+
def cloneReaderDocumentRoot(self, reader):
359+
'''
360+
Copy the reader document root to the writer.
361+
362+
:param reader: PdfFileReader from the document root should be copied.
363+
:callback after_page_append
364+
'''
365+
self._root_object = reader.trailer['/Root']
366+
367+
def cloneDocumentFromReader(self, reader, after_page_append=None):
368+
'''
369+
Create a copy (clone) of a document from a PDF file reader
370+
371+
:param reader: PDF file reader instance from which the clone
372+
should be created.
373+
:callback after_page_append (function): Callback function that is invoked after
374+
each page is appended to the writer. Signature includes a reference to the
375+
appended page (delegates to appendPagesFromReader). Callback signature:
376+
377+
:param writer_pageref (PDF page reference): Reference to the page just
378+
appended to the document.
379+
'''
380+
self.cloneReaderDocumentRoot(reader)
381+
self.appendPagesFromReader(reader, after_page_append)
382+
314383
def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
315384
"""
316385
Encrypt this PDF file with the PDF Standard encryption handler.

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,12 @@ https://pythonhosted.org/PyPDF2/
2323
##FAQ
2424
Please see
2525
http://mstamy2.github.io/PyPDF2/FAQ.html
26+
27+
28+
##Tests
29+
PyPDF2 includes a test suite built on the unittest framework. All tests are located in the "Tests" folder.
30+
Tests can be run from the command line by:
31+
32+
```bash
33+
python -m unittest Tests.tests
34+
```

Resources/crazyones.pdf

11.2 KB
Binary file not shown.

Resources/crazyones.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo.

Tests/__init__.py

Whitespace-only changes.

Tests/tests.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import os, sys, unittest
2+
3+
# Configure path environment
4+
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
5+
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
6+
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources')
7+
8+
sys.path.append(PROJECT_ROOT)
9+
10+
# Test imports
11+
import unittest
12+
from PyPDF2 import PdfFileReader
13+
14+
15+
class PdfReaderTestCases(unittest.TestCase):
16+
17+
def test_PdfReaderFileLoad(self):
18+
''' Test loading and parsing of a file. Extract text of the file and compare to expected
19+
textual output. Expected outcome: file loads, text matches expected.
20+
'''
21+
with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile:
22+
23+
# Load PDF file from file
24+
ipdf = PdfFileReader(inputfile)
25+
ipdf_p1 = ipdf.getPage(0)
26+
27+
# Retrieve the text of the PDF
28+
pdftext_file = open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'r')
29+
pdftext = pdftext_file.read()
30+
ipdf_p1_text = ipdf_p1.extractText()
31+
32+
# Compare the text of the PDF to a known source
33+
self.assertEqual(ipdf_p1_text.encode('utf-8', errors='ignore'), pdftext,
34+
msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
35+
% (pdftext, ipdf_p1_text.encode('utf-8', errors='ignore')))

0 commit comments

Comments
 (0)