resolved merge conflict

mstamy2 · mstamy2 · commit ac67ab625164 · 2015-06-16T15:54:26.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 *.pyc
 *.swp
 .DS_Store
+build
diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py
@@ -345,13 +345,18 @@ def readStringFromStream(stream):
                 tok = b_("\b")
             elif tok == b_("f"):
                 tok = b_("\f")
+            elif tok == b_("c"):
+                tok = b_("\c")
             elif tok == b_("("):
                 tok = b_("(")
             elif tok == b_(")"):
                 tok = b_(")")
+            elif tok == b_("/"):
+                tok = b_("/")
             elif tok == b_("\\"):
                 tok = b_("\\")
-            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]"), b_("#")):
+            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), 
+                    b_("]"), b_("#"),  b_("_"), b_("&"), b_('$')):
                 # odd/unnessecary escape sequences we have encountered
                 tok = b_(tok)
             elif tok.isdigit():
@@ -378,7 +383,7 @@ def readStringFromStream(stream):
                 # line break was escaped:
                 tok = b_('')
             else:
-                raise utils.PdfReadError("Unexpected escaped string")
+                raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
         txt += tok
     return createStringObject(txt)
 
diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -308,9 +308,78 @@ def addAttachment(self, fname, fdata):
         # Update the root
         self._root_object.update({
                 NameObject("/Names"): embeddedFilesDictionary
-                })            
-                
-                
+                })
+
+    def appendPagesFromReader(self, reader, after_page_append=None):
+        """
+        Copy pages from reader to writer. Includes an optional callback parameter
+        which is invoked after pages are appended to the writer.
+        
+        :param reader: a PdfFileReader object from which to copy page
+            annotations to this writer object.  The writer's annots
+        will then be updated
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page
+                appended to the writer.
+        """
+        # Get page count from writer and reader
+        reader_num_pages = reader.getNumPages()
+        writer_num_pages = self.getNumPages()
+
+        # Copy pages from reader to writer
+        for rpagenum in range(0, reader_num_pages):
+            reader_page = reader.getPage(rpagenum)
+            self.addPage(reader_page)
+            writer_page = self.getPage(writer_num_pages+rpagenum)
+            # Trigger callback, pass writer page as parameter
+            if callable(after_page_append): after_page_append(writer_page)
+
+    def updatePageFormFieldValues(self, page, fields):
+        '''
+        Update the form field values for a given page from a fields dictionary.
+        Copy field texts and values from fields to page.
+
+        :param page: Page reference from PDF writer where the annotations
+            and field data will be updated.
+        :param fields: a Python dictionary of field names (/T) and text
+            values (/V)
+        '''
+        # Iterate through pages, update field values
+        for j in range(0, len(page['/Annots'])):
+            writer_annot = page['/Annots'][j].getObject()
+            for field in fields:
+                if writer_annot.get('/T') == field:
+                    writer_annot.update({
+                        NameObject("/V"): TextStringObject(fields[field])
+                    })
+
+    def cloneReaderDocumentRoot(self, reader):
+        '''
+        Copy the reader document root to the writer.
+        
+        :param reader:  PdfFileReader from the document root should be copied.
+        :callback after_page_append
+        '''
+        self._root_object = reader.trailer['/Root']
+
+    def cloneDocumentFromReader(self, reader, after_page_append=None):
+        '''
+        Create a copy (clone) of a document from a PDF file reader
+
+        :param reader: PDF file reader instance from which the clone
+            should be created.
+        :callback after_page_append (function): Callback function that is invoked after
+            each page is appended to the writer. Signature includes a reference to the
+            appended page (delegates to appendPagesFromReader). Callback signature:
+
+            :param writer_pageref (PDF page reference): Reference to the page just
+                appended to the document.
+        '''
+        self.cloneReaderDocumentRoot(reader)
+        self.appendPagesFromReader(reader, after_page_append)
+
     def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
         """
         Encrypt this PDF file with the PDF Standard encryption handler.
diff --git a/README.md b/README.md
@@ -23,3 +23,12 @@ https://pythonhosted.org/PyPDF2/
 ##FAQ
 Please see  
 http://mstamy2.github.io/PyPDF2/FAQ.html
+
+
+##Tests
+PyPDF2 includes a test suite built on the unittest framework. All tests are located in the "Tests" folder.
+Tests can be run from the command line by:
+
+```bash
+python -m unittest Tests.tests
+```
diff --git a/Resources/crazyones.pdf b/Resources/crazyones.pdf
diff --git a/Resources/crazyones.txt b/Resources/crazyones.txt
@@ -0,0 +1 @@
+TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo.
diff --git a/Tests/__init__.py b/Tests/__init__.py
diff --git a/Tests/tests.py b/Tests/tests.py
@@ -0,0 +1,35 @@
+import os, sys, unittest
+
+# Configure path environment
+TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
+PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
+RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources')
+
+sys.path.append(PROJECT_ROOT)
+
+# Test imports
+import unittest
+from PyPDF2 import PdfFileReader
+
+
+class PdfReaderTestCases(unittest.TestCase):
+
+	def test_PdfReaderFileLoad(self):
+		'''	Test loading and parsing of a file. Extract text of the file and compare to expected
+			textual output. Expected outcome: file loads, text matches expected.
+		'''
+		with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile:
+			
+			# Load PDF file from file
+			ipdf = PdfFileReader(inputfile)
+			ipdf_p1 = ipdf.getPage(0)
+			
+			# Retrieve the text of the PDF
+			pdftext_file = open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'r')
+			pdftext = pdftext_file.read()
+			ipdf_p1_text = ipdf_p1.extractText()
+			
+			# Compare the text of the PDF to a known source
+			self.assertEqual(ipdf_p1_text.encode('utf-8', errors='ignore'), pdftext,
+				msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n'
+					% (pdftext, ipdf_p1_text.encode('utf-8', errors='ignore')))

-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 *.pyc
 *.swp
 .DS_Store
 +build
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo.