Skip to content

Commit 6706a5d

Browse files
committed
update lint and tests
1 parent 6cf4e32 commit 6706a5d

File tree

3 files changed

+109
-89
lines changed

3 files changed

+109
-89
lines changed

docs/document_loader.ipynb

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,7 @@
143143
"outputs": [],
144144
"source": [
145145
"custom_content_loader = SpannerLoader(\n",
146-
" instance_id,\n",
147-
" database_id,\n",
148-
" query,\n",
149-
" content_columns = [\"custom_content\"]\n",
146+
" instance_id, database_id, query, content_columns=[\"custom_content\"]\n",
150147
")"
151148
]
152149
},
@@ -173,10 +170,7 @@
173170
"outputs": [],
174171
"source": [
175172
"custom_metadata_loader = SpannerLoader(\n",
176-
" instance_id,\n",
177-
" database_id,\n",
178-
" query,\n",
179-
" metadata_columns = [\"column1\", \"column2\"]\n",
173+
" instance_id, database_id, query, metadata_columns=[\"column1\", \"column2\"]\n",
180174
")"
181175
]
182176
},
@@ -196,10 +190,7 @@
196190
"outputs": [],
197191
"source": [
198192
"custom_metadata_json_loader = SpannerLoader(\n",
199-
" instance_id,\n",
200-
" database_id,\n",
201-
" query,\n",
202-
" metadata_json_column = \"another-json-column\"\n",
193+
" instance_id, database_id, query, metadata_json_column=\"another-json-column\"\n",
203194
")"
204195
]
205196
},
@@ -223,7 +214,7 @@
223214
" instance_id,\n",
224215
" database_id,\n",
225216
" query,\n",
226-
" staleness = timestamp,\n",
217+
" staleness=timestamp,\n",
227218
")"
228219
]
229220
},
@@ -238,7 +229,7 @@
238229
" instance_id,\n",
239230
" database_id,\n",
240231
" query,\n",
241-
" staleness = duration,\n",
232+
" staleness=duration,\n",
242233
")"
243234
]
244235
},
@@ -261,7 +252,7 @@
261252
" instance_id,\n",
262253
" database_id,\n",
263254
" query,\n",
264-
" databoost = True,\n",
255+
" databoost=True,\n",
265256
")"
266257
]
267258
},
@@ -404,6 +395,8 @@
404395
"metadata": {},
405396
"outputs": [],
406397
"source": [
398+
"from langchain_google_spanner import Column\n",
399+
"\n",
407400
"new_table_name = \"my_new_table\"\n",
408401
"\n",
409402
"SpannerDocumentSaver.init_document_table(\n",
@@ -412,8 +405,8 @@
412405
" new_table_name,\n",
413406
" content_column=\"my-page-content\",\n",
414407
" metadata_columns=[\n",
415-
" ('category', 'STRING(36)', True),\n",
416-
" ('price', 'FLOAT64', False),\n",
408+
" Column(\"category\", \"STRING(36)\", True),\n",
409+
" Column(\"price\", \"FLOAT64\", False),\n",
417410
" ],\n",
418411
")"
419412
]

src/langchain_google_spanner/document_loader.py

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import datetime
1616
import json
1717
from dataclasses import dataclass
18-
from typing import Any, Dict, Iterator, List, Optional, Union
18+
from typing import Any, Dict, Iterator, List, Union
1919

2020
from google.cloud.spanner import Client, KeySet # type: ignore
2121
from google.cloud.spanner_admin_database_v1.types import DatabaseDialect # type: ignore
@@ -107,7 +107,7 @@ def _load_doc_to_row(
107107
del doc_metadata[metadata_json_column]
108108
metadata_json = {**metadata_json, **doc_metadata}
109109
j = json.dumps(metadata_json) if parse_json else metadata_json
110-
row.append(j)
110+
row.append(j) # type: ignore
111111
return tuple(row)
112112

113113

@@ -131,7 +131,7 @@ def __init__(
131131
format: str = "text",
132132
databoost: bool = False,
133133
metadata_json_column: str = METADATA_COL_NAME,
134-
staleness: Union[float, datetime.datetime] = 15.0,
134+
staleness: Union[float, datetime.datetime] = 0.0,
135135
):
136136
"""Initialize Spanner document loader.
137137
@@ -162,13 +162,11 @@ def __init__(
162162
self.databoost = databoost
163163
self.client = client
164164
self.staleness = staleness
165-
if not self.client.instance(self.instance_id).exists():
165+
instance = self.client.instance(instance_id)
166+
if not instance.exists():
166167
raise Exception("Instance doesn't exist.")
167-
if (
168-
not self.client.instance(self.instance_id)
169-
.database(self.database_id)
170-
.exists()
171-
):
168+
database = instance.database(database_id)
169+
if not database.exists():
172170
raise Exception("Database doesn't exist.")
173171

174172
def load(self) -> List[Document]:
@@ -235,9 +233,9 @@ def __init__(
235233
database_id: str,
236234
table_name: str,
237235
client: Client = Client(),
238-
content_column: Optional[str] = "",
239-
metadata_columns: Optional[List[str]] = [],
240-
metadata_json_column: Optional[str] = METADATA_COL_NAME,
236+
content_column: str = "",
237+
metadata_columns: List[str] = [],
238+
metadata_json_column: str = METADATA_COL_NAME,
241239
):
242240
"""Initialize Spanner document saver.
243241
@@ -246,11 +244,10 @@ def __init__(
246244
database_id: The Spanner database to load data to.
247245
table_name: The table name to load data to.
248246
client: The connection object to use. This can be used to customized project id and credentials.
249-
content_column: Optional. The name of the content column. Defaulted to the first column.
250-
metadata_columns: Optional. This is for user to opt-in a selection of columns to use. Defaulted to use
247+
content_column: The name of the content column. Defaulted to the first column.
248+
metadata_columns: This is for user to opt-in a selection of columns to use. Defaulted to use
251249
all columns.
252-
store_metadata: If true, extra metadata will be stored in the "langchain_metadata" column.
253-
metadata_json_column: Optional. The name of the special JSON column. Defaulted to use "langchain_metadata".
250+
metadata_json_column: The name of the special JSON column. Defaulted to use "langchain_metadata".
254251
"""
255252
self.instance_id = instance_id
256253
self.database_id = database_id
@@ -329,7 +326,7 @@ def init_document_table(
329326
metadata_columns: List[Column] = [],
330327
primary_key: str = "",
331328
store_metadata: bool = True,
332-
metadata_json_column: Optional[str] = None,
329+
metadata_json_column: str = METADATA_COL_NAME,
333330
):
334331
"""
335332
Create a new table to store docs with a custom schema.
@@ -343,13 +340,11 @@ def init_document_table(
343340
primary_key: The name of the primary key.
344341
store_metadata: If true, extra metadata will be stored in the "langchain_metadata" column.
345342
Defaulted to true.
346-
metadata_json_column: Optional. The name of the special JSON column. Defaulted to use "langchain_metadata".
343+
metadata_json_column: The name of the special JSON column. Defaulted to use "langchain_metadata".
347344
"""
348345
primary_key = primary_key or content_column
349-
metadata_json_column = (
350-
(metadata_json_column or METADATA_COL_NAME) if store_metadata else None
351-
)
352346
client = Client()
347+
metadata_json_column = metadata_json_column if store_metadata else ""
353348
instance = client.instance(instance_id)
354349
if not instance.exists():
355350
raise Exception("Instance doesn't exist.")
@@ -375,7 +370,7 @@ def create_table(
375370
database_id: str,
376371
table_name: str,
377372
primary_key: str,
378-
metadata_json_column: Optional[str],
373+
metadata_json_column: str,
379374
content_column: str,
380375
metadata_columns: List[Column],
381376
):

0 commit comments

Comments
 (0)