File tree Expand file tree Collapse file tree 1 file changed +3
-0
lines changed
Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -63,6 +63,7 @@ def from_bytes(cls, file: BytesIO) -> "DocxFile":
6363 text = docx2txt .process (file )
6464 text = strip_consecutive_newlines (text )
6565 doc = Document (page_content = text .strip ())
66+ doc .metadata ["source" ] = "p-1"
6667 return cls (name = file .name , id = md5 (file .read ()).hexdigest (), docs = [doc ])
6768
6869
@@ -76,6 +77,7 @@ def from_bytes(cls, file: BytesIO) -> "PdfFile":
7677 text = strip_consecutive_newlines (text )
7778 doc = Document (page_content = text .strip ())
7879 doc .metadata ["page" ] = i + 1
80+ doc .metadata ["source" ] = f"p-{ i + 1 } "
7981 docs .append (doc )
8082 # file.read() mutates the file object, which can affect caching
8183 # so we need to reset the file pointer to the beginning
@@ -90,6 +92,7 @@ def from_bytes(cls, file: BytesIO) -> "TxtFile":
9092 text = strip_consecutive_newlines (text )
9193 file .seek (0 )
9294 doc = Document (page_content = text .strip ())
95+ doc .metadata ["source" ] = "p-1"
9396 return cls (name = file .name , id = md5 (file .read ()).hexdigest (), docs = [doc ])
9497
9598
You can’t perform that action at this time.
0 commit comments