Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ec8a385
Create common AWS config
kevinhu Jun 23, 2021
d3bf612
Init sagemaker
kevinhu Jun 23, 2021
3db0a58
Common AWS dependencies
kevinhu Jun 23, 2021
d282559
Get features in feature group
kevinhu Jun 23, 2021
8f455c9
Ingest feature groups
kevinhu Jun 23, 2021
2bfb882
Add example ingestion config
kevinhu Jun 23, 2021
5bff9f4
Fix feature ingestion
kevinhu Jun 23, 2021
44ecb58
Append Glue data catalog source
kevinhu Jun 23, 2021
d660a9b
Handle primary key ingestion
kevinhu Jun 24, 2021
0259845
Init tests and stubs
kevinhu Jun 24, 2021
cd4d233
Add sagemaker golden
kevinhu Jun 24, 2021
4ff8434
Clean up golden
kevinhu Jun 24, 2021
8971109
Add descriptions and filter primary keys
kevinhu Jun 24, 2021
9133c85
Include custom fields in feature tables
kevinhu Jun 24, 2021
777f7df
Add sagemaker custom properties
kevinhu Jun 24, 2021
3722726
Merge
kevinhu Jun 24, 2021
149584a
Cleanup
kevinhu Jun 24, 2021
fb70c0b
Fix old references
kevinhu Jun 24, 2021
1c248c3
Add test stub with offline store
kevinhu Jun 24, 2021
3a4012e
Update custom properties
kevinhu Jun 24, 2021
3b575b1
Merge
kevinhu Jun 25, 2021
ffcd8cc
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu Jun 25, 2021
768393e
Refactor
kevinhu Jun 25, 2021
4bc4601
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu Jun 28, 2021
63841e4
Update comments
kevinhu Jun 28, 2021
30564cc
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-fe…
kevinhu Jun 29, 2021
0bbe932
Merge
kevinhu Jun 29, 2021
8f96239
Fix imports order
kevinhu Jun 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add test stub with offline store
  • Loading branch information
kevinhu committed Jun 24, 2021
commit 1c248c38b17f63c5ecfa9d1921aad65bc92af520
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def get_feature_group_details(self, feature_group_name: str) -> Dict[str, Any]:
FeatureGroupName=feature_group_name
)

# use falsy fallback since AWS stubs require this to be a string in tests
next_token = feature_group.get("NextToken", "")

# paginate over feature group features
Expand Down
94 changes: 94 additions & 0 deletions metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,98 @@
[
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": {
"urn": "urn:li:mlFeature:(test-2,some-feature-1)",
"aspects": [
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"description": null,
"dataType": "TEXT",
"version": null,
"sources": [
"urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)"
]
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": {
"urn": "urn:li:mlPrimaryKey:(test-2,some-feature-2)",
"aspects": [
{
"com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": {
"description": null,
"dataType": "ORDINAL",
"version": null,
"sources": [
"urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)"
]
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": {
"urn": "urn:li:mlFeature:(test-2,some-feature-3)",
"aspects": [
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": {
"description": null,
"dataType": "CONTINUOUS",
"version": null,
"sources": [
"urn:li:dataset:(urn:li:dataPlatform:s3,datahub-sagemaker-outputs,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:glue,sagemaker_featurestore.test-2-123412341234,PROD)"
]
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": {
"urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,test-2)",
"aspects": [
{
"com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": {
"customProperties": {
"arn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-2",
"creation_time": "2021-06-24 09:48:37.035000",
"status": "Created"
},
"description": "Yet another test feature group",
"mlFeatures": [
"urn:li:mlFeature:(test-2,some-feature-1)",
"urn:li:mlFeature:(test-2,some-feature-3)"
],
"mlPrimaryKeys": [
"urn:li:mlPrimaryKey:(test-2,some-feature-2)"
]
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
Expand Down
10 changes: 9 additions & 1 deletion metadata-ingestion/tests/unit/test_sagemaker_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tests.unit.test_sagemaker_source_stubs import (
describe_feature_group_response_1,
describe_feature_group_response_2,
describe_feature_group_response_3,
list_feature_groups_response,
)

Expand All @@ -34,6 +35,13 @@ def test_sagemaker_ingest(tmp_path, pytestconfig):
list_feature_groups_response,
{},
)
sagemaker_stubber.add_response(
"describe_feature_group",
describe_feature_group_response_1,
{
"FeatureGroupName": "test-2",
},
)
sagemaker_stubber.add_response(
"describe_feature_group",
describe_feature_group_response_2,
Expand All @@ -43,7 +51,7 @@ def test_sagemaker_ingest(tmp_path, pytestconfig):
)
sagemaker_stubber.add_response(
"describe_feature_group",
describe_feature_group_response_1,
describe_feature_group_response_3,
{
"FeatureGroupName": "test",
},
Expand Down
68 changes: 52 additions & 16 deletions metadata-ingestion/tests/unit/test_sagemaker_source_stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

list_feature_groups_response = {
"FeatureGroupSummaries": [
{
"FeatureGroupName": "test-2",
"FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-2",
"CreationTime": datetime.datetime(2021, 6, 24, 9, 48, 37, 35000),
"FeatureGroupStatus": "Created",
},
{
"FeatureGroupName": "test-1",
"FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-1",
Expand All @@ -19,26 +25,32 @@
}

describe_feature_group_response_1 = {
"FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test",
"FeatureGroupName": "test",
"RecordIdentifierFeatureName": "feature_1",
"EventTimeFeatureName": "feature_3",
"FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test-2",
"FeatureGroupName": "test-2",
"RecordIdentifierFeatureName": "some-feature-2",
"EventTimeFeatureName": "some-feature-3",
"FeatureDefinitions": [
{"FeatureName": "feature_1", "FeatureType": "String"},
{"FeatureName": "feature_2", "FeatureType": "Integral"},
{"FeatureName": "feature_3", "FeatureType": "Fractional"},
{"FeatureName": "some-feature-1", "FeatureType": "String"},
{"FeatureName": "some-feature-2", "FeatureType": "Integral"},
{"FeatureName": "some-feature-3", "FeatureType": "Fractional"},
],
"CreationTime": datetime.datetime(
2021,
6,
14,
11,
3,
0,
803000,
),
"CreationTime": datetime.datetime(2021, 6, 24, 9, 48, 37, 35000),
"OnlineStoreConfig": {"EnableOnlineStore": True},
"OfflineStoreConfig": {
"S3StorageConfig": {
"S3Uri": "s3://datahub-sagemaker-outputs",
"ResolvedOutputS3Uri": "s3://datahub-sagemaker-outputs/123412341234/sagemaker/us-west-2/offline-store/test-2-123412341234/data",
},
"DisableGlueTableCreation": False,
"DataCatalogConfig": {
"TableName": "test-2-123412341234",
"Catalog": "AwsDataCatalog",
"Database": "sagemaker_featurestore",
},
},
"RoleArn": "arn:aws:iam::123412341234:role/service-role/AmazonSageMaker-ExecutionRole-20210614T104201",
"FeatureGroupStatus": "Created",
"Description": "Yet another test feature group",
"NextToken": "",
}

Expand All @@ -59,3 +71,27 @@
"Description": "First test feature group",
"NextToken": "",
}

describe_feature_group_response_3 = {
"FeatureGroupArn": "arn:aws:sagemaker:us-west-2:123412341234:feature-group/test",
"FeatureGroupName": "test",
"RecordIdentifierFeatureName": "feature_1",
"EventTimeFeatureName": "feature_3",
"FeatureDefinitions": [
{"FeatureName": "feature_1", "FeatureType": "String"},
{"FeatureName": "feature_2", "FeatureType": "Integral"},
{"FeatureName": "feature_3", "FeatureType": "Fractional"},
],
"CreationTime": datetime.datetime(
2021,
6,
14,
11,
3,
0,
803000,
),
"OnlineStoreConfig": {"EnableOnlineStore": True},
"FeatureGroupStatus": "Created",
"NextToken": "",
}