-
Notifications
You must be signed in to change notification settings - Fork 3.3k
feat(ingest): SageMaker feature store ingestion #2758
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
shirshanka
merged 28 commits into
datahub-project:master
from
kevinhu:sagemaker-features
Jun 30, 2021
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit
Hold shift + click to select a range
ec8a385
Create common AWS config
kevinhu d3bf612
Init sagemaker
kevinhu 3db0a58
Common AWS dependencies
kevinhu d282559
Get features in feature group
kevinhu 8f455c9
Ingest feature groups
kevinhu 2bfb882
Add example ingestion config
kevinhu 5bff9f4
Fix feature ingestion
kevinhu 44ecb58
Append Glue data catalog source
kevinhu d660a9b
Handle primary key ingestion
kevinhu 0259845
Init tests and stubs
kevinhu cd4d233
Add sagemaker golden
kevinhu 4ff8434
Clean up golden
kevinhu 8971109
Add descriptions and filter primary keys
kevinhu 9133c85
Include custom fields in feature tables
kevinhu 777f7df
Add sagemaker custom properties
kevinhu 3722726
Merge
kevinhu 149584a
Cleanup
kevinhu fb70c0b
Fix old references
kevinhu 1c248c3
Add test stub with offline store
kevinhu 3a4012e
Update custom properties
kevinhu 3b575b1
Merge
kevinhu ffcd8cc
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu 768393e
Refactor
kevinhu 4bc4601
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu 63841e4
Update comments
kevinhu 30564cc
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu 0bbe932
Merge
kevinhu 8f96239
Fix imports order
kevinhu File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Init tests and stubs
- Loading branch information
commit 0259845c0a94ff6f6b66e592fc380a1e36453878
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| import json | ||
|
|
||
| from botocore.stub import Stubber | ||
| from freezegun import freeze_time | ||
|
|
||
| from datahub.ingestion.api.common import PipelineContext | ||
| from datahub.ingestion.source.glue import ( | ||
| SagemakerSource, | ||
| SagemakerSourceConfig, | ||
| get_column_type, | ||
| ) | ||
| from datahub.metadata.com.linkedin.pegasus2avro.schema import ( | ||
| ArrayTypeClass, | ||
| MapTypeClass, | ||
| SchemaFieldDataType, | ||
| StringTypeClass, | ||
| ) | ||
| from tests.test_helpers import mce_helpers | ||
| from tests.unit.test_sagemaker_source_stubs import ( | ||
| describe_feature_group_response_1, | ||
| describe_feature_group_response_2, | ||
| list_feature_groups_response, | ||
| ) | ||
|
|
||
| FROZEN_TIME = "2020-04-14 07:00:00" | ||
|
|
||
|
|
||
| def sagemaker_source() -> SagemakerSource: | ||
| return SagemakerSource( | ||
| ctx=PipelineContext(run_id="glue-source-test"), | ||
| config=SagemakerSourceConfig(aws_region="us-west-2"), | ||
| ) | ||
|
|
||
|
|
||
| @freeze_time(FROZEN_TIME) | ||
| def test_sagemaker_ingest(tmp_path, pytestconfig): | ||
|
|
||
| sagemaker_source_instance = sagemaker_source() | ||
|
|
||
| with Stubber(sagemaker_source_instance.sagemaker_client) as sagemaker_stubber: | ||
|
|
||
| sagemaker_stubber.add_response( | ||
| "list_feature_groups", list_feature_groups_response, {} | ||
| ) | ||
| sagemaker_stubber.add_response( | ||
| "describe_feature_group", | ||
| describe_feature_group_response_1, | ||
| {"FeatureGroupName": "test"}, | ||
| ) | ||
| sagemaker_stubber.add_response( | ||
| "describe_feature_group", | ||
| describe_feature_group_response_2, | ||
| {"FeatureGroupName": "test-1"}, | ||
| ) | ||
|
|
||
| mce_objects = [ | ||
| wu.mce.to_obj() for wu in sagemaker_source_instance.get_workunits() | ||
| ] | ||
|
|
||
| with open(str(tmp_path / "sagemaker_mces.json"), "w") as f: | ||
| json.dump(mce_objects, f, indent=2) | ||
|
|
||
| output = mce_helpers.load_json_file(str(tmp_path / "sagemaker_mces.json")) | ||
|
|
||
| test_resources_dir = pytestconfig.rootpath / "tests/unit/sagemaker" | ||
| golden = mce_helpers.load_json_file( | ||
| str(test_resources_dir / "sagemaker_mces_golden.json") | ||
| ) | ||
| mce_helpers.assert_mces_equal(output, golden) | ||
57 changes: 57 additions & 0 deletions
57
metadata-ingestion/tests/unit/test_sagemaker_source_stubs.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| import datetime | ||
|
|
||
| list_feature_groups_response = { | ||
| "FeatureGroupSummaries": [ | ||
| { | ||
| "FeatureGroupName": "test-1", | ||
| "FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-1", | ||
| "CreationTime": datetime.datetime(2021, 6, 23, 13, 58, 10, 264000), | ||
| "FeatureGroupStatus": "Created", | ||
| }, | ||
| { | ||
| "FeatureGroupName": "test", | ||
| "FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test", | ||
| "CreationTime": datetime.datetime(2021, 6, 14, 11, 3, 0, 803000), | ||
| "FeatureGroupStatus": "Created", | ||
| }, | ||
| ], | ||
| } | ||
|
|
||
| describe_feature_group_response_1 = { | ||
| "FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test", | ||
| "FeatureGroupName": "test", | ||
| "RecordIdentifierFeatureName": "feature_1", | ||
| "EventTimeFeatureName": "feature_3", | ||
| "FeatureDefinitions": [ | ||
| {"FeatureName": "feature_1", "FeatureType": "String"}, | ||
| {"FeatureName": "feature_2", "FeatureType": "Integral"}, | ||
| {"FeatureName": "feature_3", "FeatureType": "Fractional"}, | ||
| ], | ||
| "CreationTime": datetime.datetime( | ||
| 2021, | ||
| 6, | ||
| 14, | ||
| 11, | ||
| 3, | ||
| 0, | ||
| 803000, | ||
| ), | ||
| "OnlineStoreConfig": {"EnableOnlineStore": True}, | ||
| "FeatureGroupStatus": "Created", | ||
| } | ||
| describe_feature_group_response_2 = { | ||
| "FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-1", | ||
| "FeatureGroupName": "test-1", | ||
| "RecordIdentifierFeatureName": "id", | ||
| "EventTimeFeatureName": "time", | ||
| "FeatureDefinitions": [ | ||
| {"FeatureName": "name", "FeatureType": "String"}, | ||
| {"FeatureName": "id", "FeatureType": "Integral"}, | ||
| {"FeatureName": "height", "FeatureType": "Fractional"}, | ||
| {"FeatureName": "time", "FeatureType": "String"}, | ||
| ], | ||
| "CreationTime": datetime.datetime(2021, 6, 23, 13, 58, 10, 264000), | ||
| "OnlineStoreConfig": {"EnableOnlineStore": True}, | ||
| "FeatureGroupStatus": "Created", | ||
| "Description": "First test feature group", | ||
| } |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.