Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/how/delete-metadata.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ will allow you to customize the datahub instance you are communicating with.

_Note: Provide your GMS instance's host when the prompt asks you for the DataHub host._

Alternatively, you can set the following env variables if you don't want to use a config file
```
DATAHUB_SKIP_CONFIG=True
DATAHUB_GMS_HOST=http://localhost:8080
DATAHUB_GMS_TOKEN=
```

The env variables take precendence over what is in the config

## Delete By Urn

To delete all the data related to a single entity, run
Expand Down
54 changes: 48 additions & 6 deletions metadata-ingestion/src/datahub/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import sys
import typing
from datetime import datetime
from typing import Optional
from typing import List, Optional

import click
import requests
import yaml
from pydantic import BaseModel, ValidationError

DEFAULT_GMS_HOST = "http://localhost:8080"
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)

Expand All @@ -35,18 +36,23 @@ def write_datahub_config(host: str, token: Optional[str]) -> None:
return None


def get_session_and_host():
session = requests.Session()
def should_skip_config() -> bool:
try:
return os.environ["DATAHUB_SKIP_CONFIG"] == "True"
except KeyError:
return False

gms_host = "http://localhost:8080"
gms_token = None

def ensure_datahub_config() -> None:
if not os.path.isfile(DATAHUB_CONFIG_PATH):
click.secho(
f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...",
bold=True,
)
write_datahub_config(gms_host, gms_token)
write_datahub_config(DEFAULT_GMS_HOST, None)


def get_details_from_config():
with open(DATAHUB_CONFIG_PATH, "r") as stream:
try:
config_json = yaml.safe_load(stream)
Expand All @@ -63,8 +69,44 @@ def get_session_and_host():

gms_host = gms_config.server
gms_token = gms_config.token
return gms_host, gms_token
except yaml.YAMLError as exc:
click.secho(f"{DATAHUB_CONFIG_PATH} malformatted, error: {exc}", bold=True)
return None, None


def get_details_from_env():
gms_host: Optional[str] = None
try:
gms_host = os.environ["DATAHUB_GMS_HOST"]
except KeyError:
pass

gms_token: Optional[str] = None
try:
gms_token = os.environ["DATAHUB_GMS_TOKEN"]
except KeyError:
pass

return gms_host, gms_token


def first_non_null(ls: List[Optional[str]]) -> Optional[str]:
return next((el for el in ls if el is not None), None)


def get_session_and_host():
session = requests.Session()

gms_host_env, gms_token_env = get_details_from_env()
if not should_skip_config():
ensure_datahub_config()
gms_host_conf, gms_token_conf = get_details_from_config()
gms_host = first_non_null([gms_host_env, gms_host_conf])
gms_token = first_non_null([gms_token_env, gms_token_conf])
else:
gms_host = gms_host_env
gms_token = gms_token_env

session.headers.update(
{
Expand Down