Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
334a485
Update README
kevinhu Jun 9, 2021
8580c54
Merge branch 'master' of github.com:kevinhu/datahub into glue-etl
kevinhu Jun 9, 2021
499d308
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 10, 2021
520b099
Merge branch 'glue-etl' of github.com:kevinhu/datahub into glue-etl
kevinhu Jun 10, 2021
1941e93
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 10, 2021
5270fac
Read transformation DAGs
kevinhu Jun 11, 2021
67c0807
Extract node sources
kevinhu Jun 11, 2021
3ffdb1a
Init glue MCEs
kevinhu Jun 11, 2021
bdae7c1
Refactor job and flow wus
kevinhu Jun 11, 2021
4acc825
Resolve source and sink datasets
kevinhu Jun 11, 2021
68ed8e5
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 11, 2021
2fd692b
Set URNs correctly
kevinhu Jun 11, 2021
a0bc357
Isort and update snapshot JSONs
kevinhu Jun 11, 2021
e4b4d64
Successful ingestion
kevinhu Jun 11, 2021
7f0eb42
Refactor job listing
kevinhu Jun 11, 2021
fe63ce6
Glue ETL comments
kevinhu Jun 11, 2021
218338e
Clean up s3 naming
kevinhu Jun 11, 2021
5c1c9f2
Add job properties
kevinhu Jun 12, 2021
5e4873a
Fix lint errors
kevinhu Jun 14, 2021
4739367
Temp disable extract_transform in tests
kevinhu Jun 14, 2021
ce58f7b
Fix S3 URN
kevinhu Jun 14, 2021
a37b1db
Stubs for S3
kevinhu Jun 14, 2021
5050b8f
Fix lint errors
kevinhu Jun 14, 2021
ff682a8
Create Glue golden MCE json
kevinhu Jun 15, 2021
5144ffc
Trim Glue golden MCE
kevinhu Jun 15, 2021
ab419a4
Reapply freeze to Glue files
kevinhu Jun 15, 2021
ecd89a6
Fix golden path
kevinhu Jun 15, 2021
06d6c8a
Merge
kevinhu Jun 15, 2021
51ae22a
Fix duplicate MCEs
kevinhu Jun 15, 2021
108a203
Fix outputDatasets
kevinhu Jun 15, 2021
d9d8bc5
Remove S3 URIs
kevinhu Jun 15, 2021
e4a96b4
Expand job names
kevinhu Jun 15, 2021
db9d8ca
Expand job custom props
kevinhu Jun 15, 2021
7e6333b
Update golden
kevinhu Jun 15, 2021
4026917
Remove ownership classes
kevinhu Jun 15, 2021
c436149
Clean up redundant properties
kevinhu Jun 15, 2021
8619918
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 15, 2021
33dd0ea
Fix topological sort
kevinhu Jun 15, 2021
e0eaf0c
Fix S3 browse paths
kevinhu Jun 15, 2021
6f7b74a
Restore feast
kevinhu Jun 15, 2021
c738a74
Smaller stubs
kevinhu Jun 15, 2021
1d89927
Update README
kevinhu Jun 16, 2021
fe5b087
Resolve golden script conflict
kevinhu Jun 17, 2021
948dd24
Regenerate snapshot JSONs
kevinhu Jun 17, 2021
8a681cd
Merge
kevinhu Jun 17, 2021
95efa7e
Rebuild
kevinhu Jun 17, 2021
55e6619
Refactor node processing
kevinhu Jun 17, 2021
51bdb88
Add links to boto docs
kevinhu Jun 17, 2021
59bbf15
Fix sequence type error
kevinhu Jun 18, 2021
2a4501b
Fix Id typo
kevinhu Jun 18, 2021
8feb874
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 18, 2021
d173d6c
Types for process_dataflow_graph
kevinhu Jun 18, 2021
1b44baf
Include extension type in glue imports
kevinhu Jun 18, 2021
d80f529
S3 deduplication logic
kevinhu Jun 19, 2021
db1c638
Fix type annotation
kevinhu Jun 19, 2021
8eb3d50
Add comments for deduplication
kevinhu Jun 19, 2021
5b66074
Fix dataset IDs for Glue
kevinhu Jun 21, 2021
9955e06
Merge branch 'linkedin:master' into glue-etl
kevinhu Jun 21, 2021
6335f95
Update golden files
kevinhu Jun 21, 2021
09b9b60
Merge
kevinhu Jun 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix lint errors
  • Loading branch information
kevinhu committed Jun 14, 2021
commit 5050b8fe83704e3ad3fcb17bf127564f4eb17e22
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/ingestion/source/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class GlueSourceConfig(ConfigModel):
aws_role: Optional[Union[str, List[str]]] = None
aws_region: str

def get_client(self, service: str):
def get_client(self, service: str) -> boto3.client:
if (
self.aws_access_key_id
and self.aws_secret_access_key
Expand Down
8 changes: 4 additions & 4 deletions metadata-ingestion/tests/unit/test_glue_source.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import unittest
from datetime import datetime

Expand Down Expand Up @@ -32,11 +31,11 @@
get_databases_response,
get_dataflow_graph_response_1,
get_dataflow_graph_response_2,
get_jobs_response,
get_object_body_1,
get_object_body_2,
get_object_response_1,
get_object_response_2,
get_jobs_response,
get_tables_response_1,
get_tables_response_2,
)
Expand Down Expand Up @@ -97,7 +96,7 @@ def test_get_column_type_not_contained(self):
],
)

# @freeze_time(FROZEN_TIME)
@freeze_time(FROZEN_TIME)
def test_turn_boto_glue_data_to_metadata_event(self):
stringy_timestamp = datetime.strptime(FROZEN_TIME, "%Y-%m-%d %H:%M:%S")
timestamp = int(datetime.timestamp(stringy_timestamp) * 1000)
Expand Down Expand Up @@ -149,7 +148,8 @@ def test_turn_boto_glue_data_to_metadata_event(self):
for wu in self.glue_source.get_workunits():
print(wu.mce.to_obj)

expected_metadata_work_unit = create_metadata_work_unit(timestamp)
create_metadata_work_unit(timestamp)
# expected_metadata_work_unit = create_metadata_work_unit(timestamp)

# self.assertEqual(expected_metadata_work_unit, actual_work_unit)

Expand Down
11 changes: 4 additions & 7 deletions metadata-ingestion/tests/unit/test_glue_source_stubs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import io
import datetime
import botocore.session
from botocore.stub import Stubber
import io
from typing import Any, Dict

from botocore.response import StreamingBody

get_databases_response = {
Expand Down Expand Up @@ -875,7 +875,7 @@
"""


def mock_get_object_response(raw_body: str):
def mock_get_object_response(raw_body: str) -> Dict[str, Any]:
"""
Mock s3 client get_object() response object.

Expand All @@ -895,6 +895,3 @@ def mock_get_object_response(raw_body: str):

get_object_response_1 = mock_get_object_response(get_object_body_1)
get_object_response_2 = mock_get_object_response(get_object_body_2)

s3 = botocore.session.get_session().create_client("s3")
s3_stubber = Stubber(s3)