Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions cognee/infrastructure/files/utils/guess_file_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,53 +58,6 @@ def match(self, buf):
filetype.add_type(txt_file_type)


class CustomPdfMatcher(filetype.Type):
"""
Match PDF file types based on MIME type and extension.

Public methods:
- match

Instance variables:
- MIME: The MIME type of the PDF.
- EXTENSION: The file extension of the PDF.
"""

MIME = "application/pdf"
EXTENSION = "pdf"

def __init__(self):
super(CustomPdfMatcher, self).__init__(
mime=CustomPdfMatcher.MIME, extension=CustomPdfMatcher.EXTENSION
)

def match(self, buf):
"""
Determine if the provided buffer is a PDF file.

This method checks for the presence of the PDF signature in the buffer.

Raises:
- TypeError: If the buffer is not of bytes type.

Parameters:
-----------

- buf: The buffer containing the data to be checked.

Returns:
--------

Returns True if the buffer contains a PDF signature, otherwise returns False.
"""
return b"PDF-" in buf


custom_pdf_matcher = CustomPdfMatcher()

filetype.add_type(custom_pdf_matcher)


def guess_file_type(file: BinaryIO) -> filetype.Type:
"""
Guess the file type from the given binary file stream.
Expand Down
Loading