Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix(tracing): handle buildbot tracing integration problems
  • Loading branch information
JGAntunes committed Jun 28, 2023
commit 9d9debdb35e2c57a36e9206f0499538ab10c5e77
8 changes: 4 additions & 4 deletions packages/build/src/core/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ export const startBuild = function (flags: Partial<BuildFlags>) {
logBuildStart(logs)
}

const { bugsnagKey, tracing, ...flagsA } = normalizeFlags(flags, logs)
const errorMonitor = startErrorMonitor({ flags: { tracing, ...flagsA }, logs, bugsnagKey })
startTracing(tracing)
const { bugsnagKey, tracingOpts, debug, systemLogFile, ...flagsA } = normalizeFlags(flags, logs)
const errorMonitor = startErrorMonitor({ flags: { tracingOpts, debug, systemLogFile, ...flagsA }, logs, bugsnagKey })
startTracing(tracingOpts, getSystemLogger(logs, debug, systemLogFile))

return { ...flagsA, errorMonitor, logs, timers }
return { ...flagsA, debug, systemLogFile, errorMonitor, logs, timers }
}

const tExecBuild = async function ({
Expand Down
3 changes: 2 additions & 1 deletion packages/build/src/core/normalize_flags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ export type ResolvedFlags = {
context: 'production' | string
statsdOpts: { host?: number; port: number }
bugsnagKey?: string
tracing: TracingOptions
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually perform the transformation and apply the default values to tracingOpts 🀷 this makes sure we use that.

systemLogFile?: number
tracingOpts: TracingOptions
}

/** Normalize CLI flags */
Expand Down
1 change: 1 addition & 0 deletions packages/build/src/log/messages/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const INTERNAL_FLAGS = [
'sendStatus',
'saveConfig',
'statsd',
'tracing',
'framework',
'featureFlags',
'buildbotServerSocket',
Expand Down
38 changes: 32 additions & 6 deletions packages/build/src/tracing/main.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { HoneycombSDK } from '@honeycombio/opentelemetry-node'
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't really need the HoneycombSDK since we're using a collector. Plus it was logging warnings I couldn't supress:

import { context, trace, propagation, SpanStatusCode } from '@opentelemetry/api'
import { context, trace, propagation, SpanStatusCode, diag, DiagLogLevel, DiagLogger } from '@opentelemetry/api'
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'
import { NodeSDK } from '@opentelemetry/sdk-node'

Expand All @@ -8,17 +8,37 @@ import { ROOT_PACKAGE_JSON } from '../utils/json.js'

let sdk: NodeSDK

/** Given a simple logging function return a `DiagLogger`. Used to setup our system logger as the diag logger.*/
const getOtelLogger = function (logger: (...args: any[]) => void): DiagLogger {
const otelLogger = (...args: any[]) => logger('[otel-traces]', ...args)
return {
debug: otelLogger,
info: otelLogger,
error: otelLogger,
verbose: otelLogger,
warn: otelLogger,
}
}

/** Starts the tracing SDK, if there's already a tracing service this will be a no-op */
export const startTracing = function (options: TracingOptions) {
export const startTracing = function (options: TracingOptions, logger: (...args: any[]) => void) {
if (!options.enabled) return
if (sdk) return

sdk = new HoneycombSDK({
const traceExporter = new OTLPTraceExporter({
url: `http://${options.host}:${options.port}`,
})

sdk = new NodeSDK({
serviceName: ROOT_PACKAGE_JSON.name,
endpoint: `http://${options.host}:${options.port}`,
traceExporter,
instrumentations: [new HttpInstrumentation()],
})

// Set the diagnostics logger to our system logger. We also need to suppress the override msg
// in case there's a default console logger already registered (it would log a msg to it)
diag.setLogger(getOtelLogger(logger), { logLevel: DiagLogLevel.INFO, suppressOverrideMessage: true })

Comment on lines +38 to +41
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sets the internal diagnostics logger for open telemetry to our system logger.

Running this locally renders something like this if there's any errors and we're running in debug mode (in Buildbot prod this should still land in our backend without showing up in customer's builds):

node packages/build/bin.js --tracing.enabled=true --tracing.host=127.0.0.1 --debug ../../github/www-and-blog/

Netlify Build
────────────────────────────────────────────────────────────────

❯ Version
  @netlify/build 29.14.0

❯ Flags
  debug: true
  doest:
    not:
      exist: yolo
  siteId: 1e16aaa6-91e2-4bbc-bbe6-134250b9d81d
  tracing:
    enabled: true
    host: 127.0.0.1

❯ Current directory
  /Users/jgantunes/workspace/netlify/build

❯ Config file
  No config file was defined: using default values.

❯ Resolved config
  build:
    publish: /Users/jgantunes/workspace/netlify/build
    publishOrigin: default

❯ Context
  production

Netlify Build Complete
────────────────────────────────────────────────────────────────

(Netlify Build completed in 66ms)
Build step duration: Netlify Build completed in 66ms
[otel-traces] Service request {"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"@netlify/build"}},{"key":"telemetry.sdk.language","value":{"stringValue":"nodejs"}},{"key":"telemetry.sdk.name","value":{"stringValue":"opentelemetry"}},{"key":"telemetry.sdk.version","value":{"stringValue":"1.14.0"}},{"key":"process.pid","value":{"intValue":80030}},{"key":"process.executable.name","value":{"stringValue":"/Users/jgantunes/.nvm/versions/node/v16.4.2/bin/node"}},{"key":"process.executable.path","value":{"stringValue":"/Users/jgantunes/.nvm/versions/node/v16.4.2/bin/node"}},{"key":"process.command_args","value":{"arrayValue":{"values":[{"stringValue":"/Users/jgantunes/.nvm/versions/node/v16.4.2/bin/node"},{"stringValue":"/Users/jgantunes/workspace/netlify/build/packages/build/bin.js"},{"stringValue":"--tracing.enabled=true"},{"stringValue":"--tracing.host=127.0.0.1"},{"stringValue":"--debug"},{"stringValue":"--token"},{"stringValue":"-AaVbRmHW9hv0iftbfZESEsxLhUpsGjj7SQWhGjIPp4"},{"stringValue":"--siteId"},{"stringValue":"1e16aaa6-91e2-4bbc-bbe6-134250b9d81d"},{"stringValue":"--doest.not.exist"},{"stringValue":"yolo"},{"stringValue":"../../github/www-and-blog/"}]}}},{"key":"process.runtime.version","value":{"stringValue":"16.4.2"}},{"key":"process.runtime.name","value":{"stringValue":"nodejs"}},{"key":"process.runtime.description","value":{"stringValue":"Node.js"}},{"key":"process.command","value":{"stringValue":"/Users/jgantunes/workspace/netlify/build/packages/build/bin.js"}},{"key":"process.owner","value":{"stringValue":"jgantunes"}}],"droppedAttributesCount":0},"scopeSpans":[{"scope":{"name":"steps"},"spans":[{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"5Z/uq69ryis=","parentSpanId":"dofsu5pRMqI=","name":"run-step-build_command","kind":1,"startTimeUnixNano":1687953946533000000,"endTimeUnixNano":1687953946533486600,"attributes":[{"key":"build.execution.step.should_run","value":{"boolValue":false}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0},{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"G3vEo9dyLMY=","parentSpanId":"dofsu5pRMqI=","name":"run-step-functions_bundling","kind":1,"startTimeUnixNano":1687953946534000000,"endTimeUnixNano":1687953946534406700,"attributes":[{"key":"build.execution.step.should_run","value":{"boolValue":false}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0},{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"K0ojzneQpxc=","parentSpanId":"dofsu5pRMqI=","name":"run-step-edge_functions_bundling","kind":1,"startTimeUnixNano":1687953946534000000,"endTimeUnixNano":1687953946534243300,"attributes":[{"key":"build.execution.step.should_run","value":{"boolValue":false}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0},{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"nDLvgSE0q5U=","parentSpanId":"dofsu5pRMqI=","name":"run-step-deploy_site","kind":1,"startTimeUnixNano":1687953946535000000,"endTimeUnixNano":1687953946535169800,"attributes":[{"key":"build.execution.step.should_run","value":{"boolValue":false}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0},{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"Urb73yGcZqQ=","parentSpanId":"dofsu5pRMqI=","name":"run-step-save_artifacts","kind":1,"startTimeUnixNano":1687953946535000000,"endTimeUnixNano":1687953946535169000,"attributes":[{"key":"build.execution.step.should_run","value":{"boolValue":false}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0}]},{"scope":{"name":"core"},"spans":[{"traceId":"o9LynihymCNYSXun71/HPg==","spanId":"dofsu5pRMqI=","name":"exec-build","kind":1,"startTimeUnixNano":1687953946469000000,"endTimeUnixNano":1687953946537037300,"attributes":[{"key":"build.execution.success","value":{"boolValue":true}},{"key":"build.execution.code","value":{"intValue":0}},{"key":"build.execution.status","value":{"stringValue":"success"}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"status":{"code":0},"links":[],"droppedLinksCount":0}]}]}]}
[otel-traces] 14 UNAVAILABLE: No connection established Error: 14 UNAVAILABLE: No connection established
    at callErrorFromStatus (/Users/jgantunes/workspace/netlify/build/node_modules/@grpc/grpc-js/build/src/call.js:31:19)
    at Object.onReceiveStatus (/Users/jgantunes/workspace/netlify/build/node_modules/@grpc/grpc-js/build/src/client.js:192:76)
    at Object.onReceiveStatus (/Users/jgantunes/workspace/netlify/build/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)
    at Object.onReceiveStatus (/Users/jgantunes/workspace/netlify/build/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:323:181)
    at /Users/jgantunes/workspace/netlify/build/node_modules/@grpc/grpc-js/build/src/resolving-call.js:94:78
    at processTicksAndRejections (node:internal/process/task_queues:78:11)
for call at

Removing the debug flag clears these logs.

sdk.start()

// Sets the current trace ID and span ID based on the options received
Expand All @@ -34,7 +54,13 @@ export const startTracing = function (options: TracingOptions) {
/** Stops the tracing service if there's one running. This will flush any ongoing events */
export const stopTracing = async function () {
if (!sdk) return
return sdk.shutdown()
try {
// The shutdown method might return an error if we fail to flush the traces
// We handle it and use our diagnostics logger
await sdk.shutdown()
} catch (e) {
diag.error(e)
}
}

/** Sets attributes to be propagated across child spans under the current context */
Expand Down