@@ -2130,6 +2130,79 @@ def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
21302130 interim [NameObject ("/T" )] = TextStringObject (name )
21312131 return interim
21322132
2133+ def _list_attachments (self ) -> List [str ]:
2134+ """
2135+ Retrieves the list of filenames of file attachments.
2136+
2137+ Returns:
2138+ list of filenames
2139+ """
2140+ catalog = cast (DictionaryObject , self .trailer ["/Root" ])
2141+ # From the catalog get the embedded file names
2142+ try :
2143+ filenames = cast (
2144+ ArrayObject ,
2145+ cast (
2146+ DictionaryObject ,
2147+ cast (DictionaryObject , catalog ["/Names" ])["/EmbeddedFiles" ],
2148+ )["/Names" ],
2149+ )
2150+ except KeyError :
2151+ return []
2152+ attachments_names = []
2153+ # Loop through attachments
2154+ for f in filenames :
2155+ if isinstance (f , str ):
2156+ attachments_names .append (f )
2157+ return attachments_names
2158+
2159+ def _get_attachments (
2160+ self , filename : Optional [str ] = None
2161+ ) -> Dict [str , Union [bytes , List [bytes ]]]:
2162+ """
2163+ Retrieves all or selected file attachments of the PDF as a dictionary of file names
2164+ and the file data as a bytestring.
2165+
2166+ Args:
2167+ filename: If filename is None, then a dictionary of all attachments
2168+ will be returned, where the key is the filename and the value
2169+ is the content. Otherwise, a dictionary with just a single key
2170+ - the filename - and its content will be returned.
2171+
2172+ Returns:
2173+ dictionary of filename -> Union[bytestring or List[ByteString]]
2174+ if the filename exists multiple times a List of the different version will be provided
2175+ """
2176+ catalog = cast (DictionaryObject , self .trailer ["/Root" ])
2177+ # From the catalog get the embedded file names
2178+ try :
2179+ filenames = cast (
2180+ ArrayObject ,
2181+ cast (
2182+ DictionaryObject ,
2183+ cast (DictionaryObject , catalog ["/Names" ])["/EmbeddedFiles" ],
2184+ )["/Names" ],
2185+ )
2186+ except KeyError :
2187+ return {}
2188+ attachments : Dict [str , Union [bytes , List [bytes ]]] = {}
2189+ # Loop through attachments
2190+ for i in range (len (filenames )):
2191+ f = filenames [i ]
2192+ if isinstance (f , str ):
2193+ if filename is not None and f != filename :
2194+ continue
2195+ name = f
2196+ f_dict = filenames [i + 1 ].get_object ()
2197+ f_data = f_dict ["/EF" ]["/F" ].get_data ()
2198+ if name in attachments :
2199+ if not isinstance (attachments [name ], list ):
2200+ attachments [name ] = [attachments [name ]] # type:ignore
2201+ attachments [name ].append (f_data ) # type:ignore
2202+ else :
2203+ attachments [name ] = f_data
2204+ return attachments
2205+
21332206
21342207class PdfFileReader (PdfReader ): # deprecated
21352208 def __init__ (self , * args : Any , ** kwargs : Any ) -> None :
0 commit comments