Skip to content

Commit a089b2d

Browse files
committed
Merge branch 'master' of github.com:kevinhu/datahub into sagemaker-models
2 parents 233c00f + 725abf5 commit a089b2d

File tree

102 files changed

+1729
-387
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1729
-387
lines changed

README.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ HOSTED_DOCS_ONLY-->
4848
[Features](https://datahubproject.io/docs/features) |
4949
[Roadmap](https://datahubproject.io/docs/roadmap) |
5050
[Adoption](#adoption) |
51-
[FAQ](https://datahubproject.io/docs/faq) |
5251
[Demo](https://datahubproject.io/docs/demo) |
5352
[Town Hall](https://datahubproject.io/docs/townhalls)
5453

@@ -97,10 +96,6 @@ We have documentation available at [https://datahubproject.io/docs/](https://dat
9796

9897
See [Releases](https://github.com/linkedin/datahub/releases) page for more details. We follow the [SemVer Specification](https://semver.org) when versioning the releases and adopt the [Keep a Changelog convention](https://keepachangelog.com/) for the changelog format.
9998

100-
## FAQs
101-
102-
Frequently Asked Questions about DataHub can be found [here](docs/faq.md).
103-
10499
## Features & Roadmap
105100

106101
Check out DataHub's [Features](docs/features.md) & [Roadmap](docs/roadmap.md).

build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ project.ext.externalDependency = [
6262
'guice': 'com.google.inject:guice:4.2.2',
6363
'guava': 'com.google.guava:guava:27.0.1-jre',
6464
'h2': 'com.h2database:h2:1.4.196',
65+
'hadoopClient': 'org.apache.hadoop:hadoop-client:3.1.0',
6566
'hibernateCore': 'org.hibernate:hibernate-core:5.2.16.Final',
6667
'httpClient': 'org.apache.httpcomponents:httpclient:4.5.9',
6768
'iStackCommons': 'com.sun.istack:istack-commons-runtime:4.0.1',
@@ -87,6 +88,7 @@ project.ext.externalDependency = [
8788
'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11',
8889
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.1',
8990
'parseqTest': 'com.linkedin.parseq:parseq:3.0.7:test',
91+
'parquet': 'org.apache.parquet:parquet-avro:1.12.0',
9092
'picocli': 'info.picocli:picocli:4.5.0',
9193
'playCache': 'com.typesafe.play:play-cache_2.11:2.6.18',
9294
'playDocs': 'com.typesafe.play:play-docs_2.11:2.6.18',

datahub-kubernetes/datahub/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ description: A Helm chart for LinkedIn DataHub
44
type: application
55
# This is the chart version. This version number should be incremented each time you make changes
66
# to the chart and its templates, including the app version.
7-
version: 0.2.1
7+
version: 0.2.2
88
# This is the version number of the application being deployed. This version number should be
99
# incremented each time you make changes to the application.
10-
appVersion: 0.8.3 #0.3.1
10+
appVersion: 0.8.4 #0.3.1
1111
dependencies:
1212
- name: datahub-gms
1313
version: 0.2.1

datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-cleanup-job-template.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ spec:
8585
name: "{{ .password.secretRef }}"
8686
key: "{{ .password.secretKey }}"
8787
{{- end }}
88+
- name: GRAPH_SERVICE_IMPL
89+
value: {{ .Values.global.graph_service_impl }}
90+
{{- if eq .Values.global.graph_service_impl "neo4j" }}
8891
- name: NEO4J_HOST
8992
value: "{{ .Values.global.neo4j.host }}"
9093
- name: NEO4J_URI
@@ -96,6 +99,7 @@ spec:
9699
secretKeyRef:
97100
name: "{{ .Values.global.neo4j.password.secretRef }}"
98101
key: "{{ .Values.global.neo4j.password.secretKey }}"
102+
{{- end }}
99103
{{- with .Values.datahubUpgrade.extraEnvs }}
100104
{{- toYaml . | nindent 16 }}
101105
{{- end }}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{{- if .Values.datahubUpgrade.enabled -}}
2+
# Job template for restoring indices by sending MAE corresponding to all entities in the local db
3+
# Creates a suspended cronJob that you can use to create an adhoc job when ready to run clean up.
4+
# Run the following command to do so
5+
# kubectl create job --from=cronjob/<<release-name>>-datahub-restore-indices-job-template datahub-restore-indices-job
6+
apiVersion: batch/v1beta1
7+
kind: CronJob
8+
metadata:
9+
name: {{ .Release.Name }}-datahub-restore-indices-job-template
10+
labels:
11+
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
12+
app.kubernetes.io/instance: {{ .Release.Name | quote }}
13+
app.kubernetes.io/version: {{ .Chart.AppVersion }}
14+
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
15+
spec:
16+
schedule: "* * * * *"
17+
suspend: true
18+
jobTemplate:
19+
spec:
20+
template:
21+
spec:
22+
{{- with .Values.global.hostAliases }}
23+
hostAliases:
24+
{{- toYaml . | nindent 12 }}
25+
{{- end }}
26+
{{- with .Values.datahubUpgrade.serviceAccount }}
27+
serviceAccountName: {{ . }}
28+
{{- end }}
29+
{{- with .Values.imagePullSecrets }}
30+
imagePullSecrets:
31+
{{- toYaml . | nindent 12 }}
32+
{{- end }}
33+
volumes:
34+
{{- with .Values.datahubUpgrade.extraVolumes }}
35+
{{- toYaml . | nindent 12 }}
36+
{{- end }}
37+
restartPolicy: Never
38+
securityContext:
39+
runAsUser: 1000
40+
fsGroup: 1000
41+
containers:
42+
- name: datahub-upgrade-job
43+
image: "{{ .Values.datahubUpgrade.image.repository }}:{{ .Values.datahubUpgrade.image.tag }}"
44+
args:
45+
- "-u"
46+
- "RestoreIndices"
47+
env:
48+
- name: DATAHUB_GMS_HOST
49+
value: {{ printf "%s-%s" .Release.Name "datahub-gms" }}
50+
- name: DATAHUB_GMS_PORT
51+
value: "{{ .Values.global.datahub.gms.port }}"
52+
- name: DATAHUB_MAE_CONSUMER_HOST
53+
value: {{ printf "%s-%s" .Release.Name "datahub-mae-consumer" }}
54+
- name: DATAHUB_MAE_CONSUMER_PORT
55+
value: "{{ .Values.global.datahub.mae_consumer.port }}"
56+
- name: EBEAN_DATASOURCE_USERNAME
57+
value: "{{ .Values.global.sql.datasource.username }}"
58+
- name: EBEAN_DATASOURCE_PASSWORD
59+
valueFrom:
60+
secretKeyRef:
61+
name: "{{ .Values.global.sql.datasource.password.secretRef }}"
62+
key: "{{ .Values.global.sql.datasource.password.secretKey }}"
63+
- name: EBEAN_DATASOURCE_HOST
64+
value: "{{ .Values.global.sql.datasource.host }}"
65+
- name: EBEAN_DATASOURCE_URL
66+
value: "{{ .Values.global.sql.datasource.url }}"
67+
- name: EBEAN_DATASOURCE_DRIVER
68+
value: "{{ .Values.global.sql.datasource.driver }}"
69+
- name: KAFKA_BOOTSTRAP_SERVER
70+
value: "{{ .Values.global.kafka.bootstrap.server }}"
71+
- name: KAFKA_SCHEMAREGISTRY_URL
72+
value: "{{ .Values.global.kafka.schemaregistry.url }}"
73+
- name: ELASTICSEARCH_HOST
74+
value: {{ .Values.global.elasticsearch.host | quote }}
75+
- name: ELASTICSEARCH_PORT
76+
value: {{ .Values.global.elasticsearch.port | quote }}
77+
{{- with .Values.global.elasticsearch.useSSL }}
78+
- name: ELASTICSEARCH_USE_SSL
79+
value: {{ . | quote }}
80+
{{- end }}
81+
{{- with .Values.global.elasticsearch.auth }}
82+
- name: ELASTICSEARCH_USERNAME
83+
value: {{ .username }}
84+
- name: ELASTICSEARCH_PASSWORD
85+
valueFrom:
86+
secretKeyRef:
87+
name: "{{ .password.secretRef }}"
88+
key: "{{ .password.secretKey }}"
89+
{{- end }}
90+
- name: GRAPH_SERVICE_IMPL
91+
value: {{ .Values.global.graph_service_impl }}
92+
{{- if eq .Values.global.graph_service_impl "neo4j" }}
93+
- name: NEO4J_HOST
94+
value: "{{ .Values.global.neo4j.host }}"
95+
- name: NEO4J_URI
96+
value: "{{ .Values.global.neo4j.uri }}"
97+
- name: NEO4J_USERNAME
98+
value: "{{ .Values.global.neo4j.username }}"
99+
- name: NEO4J_PASSWORD
100+
valueFrom:
101+
secretKeyRef:
102+
name: "{{ .Values.global.neo4j.password.secretRef }}"
103+
key: "{{ .Values.global.neo4j.password.secretKey }}"
104+
{{- end }}
105+
{{- with .Values.datahubUpgrade.extraEnvs }}
106+
{{- toYaml . | nindent 16 }}
107+
{{- end }}
108+
volumeMounts:
109+
{{- with .Values.datahubUpgrade.extraVolumeMounts }}
110+
{{- toYaml . | nindent 16 }}
111+
{{- end }}
112+
resources:
113+
limits:
114+
cpu: 500m
115+
memory: 512Mi
116+
requests:
117+
cpu: 300m
118+
memory: 256Mi
119+
{{- end -}}

datahub-kubernetes/datahub/templates/datahub-upgrade/datahub-upgrade-job.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ spec:
9191
name: "{{ .password.secretRef }}"
9292
key: "{{ .password.secretKey }}"
9393
{{- end }}
94+
- name: GRAPH_SERVICE_IMPL
95+
value: {{ .Values.global.graph_service_impl }}
96+
{{- if eq .Values.global.graph_service_impl "neo4j" }}
9497
- name: NEO4J_HOST
9598
value: "{{ .Values.global.neo4j.host }}"
9699
- name: NEO4J_URI
@@ -102,6 +105,7 @@ spec:
102105
secretKeyRef:
103106
name: "{{ .Values.global.neo4j.password.secretRef }}"
104107
key: "{{ .Values.global.neo4j.password.secretKey }}"
108+
{{- end }}
105109
{{- with .Values.datahubUpgrade.extraEnvs }}
106110
{{- toYaml . | nindent 12 }}
107111
{{- end }}

datahub-upgrade/build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ dependencies {
77
compile project(':metadata-io')
88
compile project(':gms:impl')
99
compile project(':gms:factories')
10+
compile project(':gms:client')
1011

1112
compile externalDependency.javaxInject
13+
compile externalDependency.hadoopClient
1214
compile externalDependency.lombok
1315
compile externalDependency.picocli
16+
compile externalDependency.parquet
1417
compile externalDependency.springBeans
1518
compile externalDependency.springBootAutoconfigure
1619
compile externalDependency.springCore

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCli.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager;
44
import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade;
5-
import java.util.List;
65
import com.linkedin.datahub.upgrade.nocodecleanup.NoCodeCleanupUpgrade;
6+
import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup;
7+
import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices;
8+
import java.util.List;
79
import javax.inject.Inject;
810
import javax.inject.Named;
911
import lombok.extern.slf4j.Slf4j;
@@ -34,10 +36,20 @@ private static final class Args {
3436
@Named("noCodeCleanup")
3537
private NoCodeCleanupUpgrade noCodeCleanup;
3638

39+
@Inject
40+
@Named("restoreIndices")
41+
private RestoreIndices restoreIndices;
42+
43+
@Inject
44+
@Named("restoreBackup")
45+
private RestoreBackup restoreBackup;
46+
3747
@Override
3848
public void run(String... cmdLineArgs) {
3949
_upgradeManager.register(noCodeUpgrade);
4050
_upgradeManager.register(noCodeCleanup);
51+
_upgradeManager.register(restoreIndices);
52+
_upgradeManager.register(restoreBackup);
4153

4254
final Args args = new Args();
4355
new CommandLine(args).setCaseInsensitiveEnumValuesAllowed(true).parseArgs(cmdLineArgs);

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
99
@SpringBootApplication(exclude = {RestClientAutoConfiguration.class}, scanBasePackages = {
10-
"com.linkedin.gms.factory.common", "com.linkedin.datahub.upgrade.config", "com.linkedin.gms.factory.entity"})
10+
"com.linkedin.gms.factory.common", "com.linkedin.gms.factory.search", "com.linkedin.datahub.upgrade.config", "com.linkedin.gms.factory.entity"})
1111
public class UpgradeCliApplication {
1212
public static void main(String[] args) {
1313
new SpringApplicationBuilder(UpgradeCliApplication.class, UpgradeCli.class).web(WebApplicationType.NONE).run(args);

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeStep.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.util.function.Function;
44

5+
56
/**
67
* Represents a single executable step in an {@link Upgrade}.
78
*/
@@ -31,4 +32,10 @@ default boolean isOptional() {
3132
return false;
3233
}
3334

35+
/**
36+
* Returns whether or not to skip the step based on the UpgradeContext
37+
*/
38+
default boolean skip(UpgradeContext context) {
39+
return false;
40+
}
3441
}

0 commit comments

Comments
 (0)