Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230317-144957.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Added support to configure a delimiter for a seed file, defaults to comma
time: 2023-03-17T14:49:57.564210866+01:00
custom:
Author: ramonvermeulen
Issue: "3990"
4 changes: 2 additions & 2 deletions core/dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ def as_matrix(table):
return [r.values() for r in table.rows.values()]


def from_csv(abspath, text_columns):
def from_csv(abspath, text_columns, delimiter = ","):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Should we add support for more than just delimiter here? Any/all **kwargs supported by agate.from_csv / Python's stdlib CSV reader?

Thoughts:

  • That can be out of scope for now / for this PR
  • It would risk locking us further into using agate (which I don't love), or at least the Python stdlib csv reader (which I don't mind as much)

type_tester = build_type_tester(text_columns=text_columns)
with open(abspath, encoding="utf-8") as fp:
if fp.read(1) != BOM:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=type_tester)
return agate.Table.from_csv(fp, column_types=type_tester, delimiter=delimiter)


class _NullMarker:
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,9 @@ def load_agate_table(self) -> agate.Table:
assert self.model.root_path
path = os.path.join(self.model.root_path, self.model.original_file_path)
column_types = self.model.config.column_types
delimiter = self.model.config.delimiter
try:
table = agate_helper.from_csv(path, text_columns=column_types)
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model)
table.original_abspath = os.path.abspath(path)
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ def field_mapping(cls):
@dataclass
class SeedConfig(NodeConfig):
materialized: str = "seed"
delimiter: str = ","
quote_columns: Optional[bool] = None

@classmethod
Expand Down
3 changes: 3 additions & 0 deletions test/unit/test_contracts_graph_parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ def basic_parsed_seed_dict():
'alias': 'foo',
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'seed',
'persist_docs': {},
Expand Down Expand Up @@ -542,6 +543,7 @@ def complex_parsed_seed_dict():
'alias': 'foo',
'config': {
'column_types': {},
'delimiter': ',',
'enabled': True,
'materialized': 'seed',
'persist_docs': {'relation': True, 'columns': True},
Expand Down Expand Up @@ -588,6 +590,7 @@ def complex_parsed_seed_object():
alias='foo',
config=SeedConfig(
quote_columns=True,
delimiter=',',
persist_docs={'relation': True, 'columns': True},
),
deferred=False,
Expand Down