Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(ingest): support for env variable in cli
  • Loading branch information
aseembansal-gogo committed Sep 9, 2021
commit dd5489b44aee15908dd1faa5f4ea8145e03fc7f8
9 changes: 9 additions & 0 deletions docs/how/delete-metadata.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ will allow you to customize the datahub instance you are communicating with.

_Note: Provide your GMS instance's host when the prompt asks you for the DataHub host._

Alternatively, you can set the following env variables if you don't want to use a config file
```
DATAHUB_SKIP_CONFIG=True
DATAHUB_GMS_HOST=http://localhost:8080
DATAHUB_GMS_TOKEN=
```

The env variables take precendence over what is in the config

## Delete By Urn

To delete all the data related to a single entity, run
Expand Down
54 changes: 48 additions & 6 deletions metadata-ingestion/src/datahub/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import sys
import typing
from datetime import datetime
from typing import Optional
from typing import List, Optional

import click
import requests
import yaml
from pydantic import BaseModel, ValidationError

DEFAULT_GMS_HOST = "http://localhost:8080"
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)

Expand All @@ -35,18 +36,23 @@ def write_datahub_config(host: str, token: Optional[str]) -> None:
return None


def get_session_and_host():
session = requests.Session()
def should_skip_config() -> bool:
try:
return os.environ["DATAHUB_SKIP_CONFIG"] == "True"
except KeyError:
return False

gms_host = "http://localhost:8080"
gms_token = None

def ensure_datahub_config() -> None:
if not os.path.isfile(DATAHUB_CONFIG_PATH):
click.secho(
f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...",
bold=True,
)
write_datahub_config(gms_host, gms_token)
write_datahub_config(DEFAULT_GMS_HOST, None)


def get_details_from_config():
with open(DATAHUB_CONFIG_PATH, "r") as stream:
try:
config_json = yaml.safe_load(stream)
Expand All @@ -63,8 +69,44 @@ def get_session_and_host():

gms_host = gms_config.server
gms_token = gms_config.token
return gms_host, gms_token
except yaml.YAMLError as exc:
click.secho(f"{DATAHUB_CONFIG_PATH} malformatted, error: {exc}", bold=True)
return None, None


def get_details_from_env():
gms_host: Optional[str] = None
try:
gms_host = os.environ["DATAHUB_GMS_HOST"]
except KeyError:
pass

gms_token: Optional[str] = None
try:
gms_token = os.environ["DATAHUB_GMS_TOKEN"]
except KeyError:
pass

return gms_host, gms_token


def first_non_null(ls: List[Optional[str]]) -> Optional[str]:
return next((el for el in ls if el is not None), None)


def get_session_and_host():
session = requests.Session()

gms_host_env, gms_token_env = get_details_from_env()
if not should_skip_config():
ensure_datahub_config()
gms_host_conf, gms_token_conf = get_details_from_config()
gms_host = first_non_null([gms_host_env, gms_host_conf])
gms_token = first_non_null([gms_token_env, gms_token_conf])
else:
gms_host = gms_host_env
gms_token = gms_token_env

session.headers.update(
{
Expand Down