1+ import os , sys , unittest
2+
3+ # Configure path environment
4+ TESTS_ROOT = os .path .abspath (os .path .dirname (__file__ ))
5+ PROJECT_ROOT = os .path .dirname (TESTS_ROOT )
6+ RESOURCE_ROOT = os .path .join (PROJECT_ROOT , 'Resources' )
7+
8+ sys .path .append (PROJECT_ROOT )
9+
10+ # Test imports
11+ import unittest
12+ from PyPDF2 import PdfFileReader
13+
14+
15+ class PdfReaderTestCases (unittest .TestCase ):
16+
17+ def test_PdfReaderFileLoad (self ):
18+ ''' Test loading and parsing of a file. Extract text of the file and compare to expected
19+ textual output. Expected outcome: file loads, text matches expected.
20+ '''
21+ with open (os .path .join (RESOURCE_ROOT , 'crazyones.pdf' ), 'rb' ) as inputfile :
22+
23+ # Load PDF file from file
24+ ipdf = PdfFileReader (inputfile )
25+ ipdf_p1 = ipdf .getPage (0 )
26+
27+ # Retrieve the text of the PDF
28+ pdftext_file = open (os .path .join (RESOURCE_ROOT , 'crazyones.txt' ), 'r' )
29+ pdftext = pdftext_file .read ()
30+ ipdf_p1_text = ipdf_p1 .extractText ()
31+
32+ # Compare the text of the PDF to a known source
33+ self .assertEqual (ipdf_p1_text .encode ('utf-8' , errors = 'ignore' ), pdftext ,
34+ msg = 'PDF extracted text differs from expected value.\n \n Expected:\n \n %r\n \n Extracted:\n \n %r\n \n '
35+ % (pdftext , ipdf_p1_text .encode ('utf-8' , errors = 'ignore' )))
0 commit comments