Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions experimental/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ All notable changes to experimental packages in this project will be documented

### :bug: (Bug Fix)

* fix(instrumentation-http): drop url.parse in favor of URL constructor [#5091](https://github.com/open-telemetry/opentelemetry-js/pull/5091) @pichlermarc
* fixes a bug where using cyrillic characters in a client request string URL would throw an exception, whereas an un-instrumented client would accept the same input without throwing an exception

### :books: (Refine Doc)

### :house: (Internal)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,15 +632,19 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation

const headers = request.headers;

const spanAttributes = getIncomingRequestAttributes(request, {
component: component,
serverName: instrumentation.getConfig().serverName,
hookAttributes: instrumentation._callStartSpanHook(
request,
instrumentation.getConfig().startIncomingSpanHook
),
semconvStability: instrumentation._semconvStability,
});
const spanAttributes = getIncomingRequestAttributes(
request,
{
component: component,
serverName: instrumentation.getConfig().serverName,
hookAttributes: instrumentation._callStartSpanHook(
request,
instrumentation.getConfig().startIncomingSpanHook
),
semconvStability: instrumentation._semconvStability,
},
instrumentation._diag
);

const spanOptions: SpanOptions = {
kind: SpanKind.SERVER,
Expand Down Expand Up @@ -757,7 +761,11 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation
(typeof options === 'string' || options instanceof url.URL)
? (args.shift() as http.RequestOptions)
: undefined;
const { method, optionsParsed } = getRequestInfo(options, extraOptions);
const { method, invalidUrl, optionsParsed } = getRequestInfo(
instrumentation._diag,
options,
extraOptions
);
/**
* Node 8's https module directly call the http one so to avoid creating
* 2 span for the same request we need to check that the protocol is correct
Expand Down Expand Up @@ -859,7 +867,16 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation
}

const request: http.ClientRequest = safeExecuteInTheMiddle(
() => original.apply(this, [optionsParsed, ...args]),
() => {
if (invalidUrl) {
// we know that the url is invalid, there's no point in injecting context as it will fail validation.
// Passing in what the user provided will give the user an error that matches what they'd see without
// the instrumentation.
return original.apply(this, [options, ...args]);
} else {
return original.apply(this, [optionsParsed, ...args]);
}
},
error => {
if (error) {
setSpanWithError(span, error, instrumentation._semconvStability);
Expand Down
171 changes: 146 additions & 25 deletions experimental/packages/opentelemetry-instrumentation-http/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
Span,
context,
SpanKind,
DiagLogger,
} from '@opentelemetry/api';
import {
ATTR_CLIENT_ADDRESS,
Expand Down Expand Up @@ -235,27 +236,104 @@ export const isCompressed = (
return !!encoding && encoding !== 'identity';
};

/**
* Mimics Node.js conversion of URL strings to RequestOptions expected by
* `http.request` and `https.request` APIs.
*
* See https://github.com/nodejs/node/blob/2505e217bba05fc581b572c685c5cf280a16c5a3/lib/internal/url.js#L1415-L1437
*
* @param stringUrl
* @throws TypeError if the URL is not valid.
*/
function stringUrlToHttpOptions(
stringUrl: string
): RequestOptions & { pathname: string } {
// This is heavily inspired by Node.js handling of the same situation, trying
// to follow it as closely as possible while keeping in mind that we only
// deal with string URLs, not URL objects.
const {
hostname,
pathname,
port,
username,
password,
search,
protocol,
hash,
href,
origin,
host,
} = new URL(stringUrl);

const options: RequestOptions & {
pathname: string;
hash: string;
search: string;
href: string;
origin: string;
} = {
protocol: protocol,
hostname:
hostname && hostname[0] === '[' ? hostname.slice(1, -1) : hostname,
hash: hash,
search: search,
pathname: pathname,
path: `${pathname || ''}${search || ''}`,
href: href,
origin: origin,
host: host,
};
if (port !== '') {
options.port = Number(port);
}
if (username || password) {
options.auth = `${decodeURIComponent(username)}:${decodeURIComponent(
password
)}`;
}
return options;
}

/**
* Makes sure options is an url object
* return an object with default value and parsed options
* @param logger component logger
* @param options original options for the request
* @param [extraOptions] additional options for the request
*/
export const getRequestInfo = (
logger: DiagLogger,
options: url.URL | RequestOptions | string,
extraOptions?: RequestOptions
): {
origin: string;
pathname: string;
method: string;
invalidUrl: boolean;
optionsParsed: RequestOptions;
} => {
let pathname = '/';
let origin = '';
let pathname: string;
let origin: string;
let optionsParsed: RequestOptions;
let invalidUrl = false;
if (typeof options === 'string') {
optionsParsed = url.parse(options);
pathname = (optionsParsed as url.UrlWithStringQuery).pathname || '/';
try {
const convertedOptions = stringUrlToHttpOptions(options);
optionsParsed = convertedOptions;
pathname = convertedOptions.pathname || '/';
} catch (e) {
invalidUrl = true;
logger.verbose(
'Unable to parse URL provided to HTTP request, using fallback to determine path. Original error:',
e
);
// for backward compatibility with how url.parse() behaved.
optionsParsed = {
path: options,
};
pathname = optionsParsed.path || '/';
}

origin = `${optionsParsed.protocol || 'http:'}//${optionsParsed.host}`;
if (extraOptions !== undefined) {
Object.assign(optionsParsed, extraOptions);
Expand Down Expand Up @@ -285,16 +363,23 @@ export const getRequestInfo = (
{ protocol: options.host ? 'http:' : undefined },
options
);
pathname = (options as url.URL).pathname;
if (!pathname && optionsParsed.path) {
pathname = url.parse(optionsParsed.path).pathname || '/';
}

const hostname =
optionsParsed.host ||
(optionsParsed.port != null
? `${optionsParsed.hostname}${optionsParsed.port}`
: optionsParsed.hostname);
origin = `${optionsParsed.protocol || 'http:'}//${hostname}`;

pathname = (options as url.URL).pathname;
if (!pathname && optionsParsed.path) {
try {
const parsedUrl = new URL(optionsParsed.path, origin);
pathname = parsedUrl.pathname || '/';
} catch (e) {
pathname = '/';
}
}
}

// some packages return method in lowercase..
Expand All @@ -303,7 +388,7 @@ export const getRequestInfo = (
? optionsParsed.method.toUpperCase()
: 'GET';

return { origin, pathname, method, optionsParsed };
return { origin, pathname, method, optionsParsed, invalidUrl };
};

/**
Expand Down Expand Up @@ -657,6 +742,42 @@ export function getRemoteClientAddress(
return null;
}

function getInfoFromIncomingMessage(
component: 'http' | 'https',
request: IncomingMessage,
logger: DiagLogger
): { pathname?: string; search?: string; toString: () => string } {
try {
if (request.headers.host) {
return new URL(
request.url ?? '/',
`${component}://${request.headers.host}`
);
} else {
const unsafeParsedUrl = new URL(
request.url ?? '/',
// using localhost as a workaround to still use the URL constructor for parsing
`${component}://localhost`
);
// since we use localhost as a workaround, ensure we hide the rest of the properties to avoid
// our workaround leaking though.
return {
pathname: unsafeParsedUrl.pathname,
search: unsafeParsedUrl.search,
toString: function () {
// we cannot use the result of unsafeParsedUrl.toString as it's potentially wrong.
return unsafeParsedUrl.pathname + unsafeParsedUrl.search;
},
};
}
} catch (e) {
// something is wrong, use undefined - this *should* never happen, logging
// for troubleshooting in case it does happen.
logger.verbose('Unable to get URL from request', e);
return {};
}
}

/**
* Returns incoming request attributes scoped to the request data
* @param {IncomingMessage} request the request object
Expand All @@ -670,18 +791,15 @@ export const getIncomingRequestAttributes = (
serverName?: string;
hookAttributes?: Attributes;
semconvStability: SemconvStability;
}
},
logger: DiagLogger
): Attributes => {
const headers = request.headers;
const userAgent = headers['user-agent'];
const ips = headers['x-forwarded-for'];
const httpVersion = request.httpVersion;
const requestUrl = request.url ? url.parse(request.url) : null;
const host = requestUrl?.host || headers.host;
const hostname =
requestUrl?.hostname ||
host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') ||
'localhost';
const host = headers.host;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the host header guaranteed to be there? what about previous check to get the host from the URL?, it is not there anymore for host and hostname calculation, maybe I'm missing something still going through the changes

Copy link
Member Author

@pichlermarc pichlermarc Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK, IncomingMessage.url never includes the host or protocol. (https://nodejs.org/en/learn/modules/anatomy-of-an-http-transaction#method-url-and-headers, the actual API docs don't seem to state that though)

Under the assumption that this is true, I derived that in the old implementation - when parsing the URL with url.parse on L679 - there can never be a hostname that was parsed from the URL. On L680 of the old code, we'll therefore always fall back to IncomingMessage.headers.host because the requestUrl.hostname it's always null.

For the new implementation that means: we don't try to parse the URL to get the host because it'll never be there, our only chance to get it is the host header. If it's not there, we can fall back to localhost as the old code did.

const hostname = host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') || 'localhost';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const hostname = host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') || 'localhost';
const hostname = host?.replace(/^(.*)(:[0-9]{1,5})$/, '$1') || 'localhost';

Perhaps anchor at the end of the string? I'm not sure if an IPv6 address (with :[0-9]+ segments) could get in here as host. If not, ignore this comment.

Ah, I see that this is just re-using code that was already there. I think you can ignore this comment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see that this is just re-using code that was already there. I think you can ignore this comment.

Yep, I just re-used it 🙂 Let's keep it for now and adjust it later if necessary.


const method = request.method;
const normalizedMethod = normalizeMethod(method);
Expand All @@ -701,8 +819,14 @@ export const getIncomingRequestAttributes = (
[ATTR_USER_AGENT_ORIGINAL]: userAgent,
};

if (requestUrl?.pathname != null) {
newAttributes[ATTR_URL_PATH] = requestUrl.pathname;
const parsedUrl = getInfoFromIncomingMessage(
options.component,
request,
logger
);

if (parsedUrl?.pathname != null) {
newAttributes[ATTR_URL_PATH] = parsedUrl.pathname;
}

if (remoteClientAddress != null) {
Expand All @@ -719,11 +843,7 @@ export const getIncomingRequestAttributes = (
}

const oldAttributes: Attributes = {
[SEMATTRS_HTTP_URL]: getAbsoluteUrl(
requestUrl,
headers,
`${options.component}:`
),
[SEMATTRS_HTTP_URL]: parsedUrl.toString(),
[SEMATTRS_HTTP_HOST]: host,
[SEMATTRS_NET_HOST_NAME]: hostname,
[SEMATTRS_HTTP_METHOD]: method,
Expand All @@ -738,8 +858,9 @@ export const getIncomingRequestAttributes = (
oldAttributes[SEMATTRS_HTTP_SERVER_NAME] = serverName;
}

if (requestUrl) {
oldAttributes[SEMATTRS_HTTP_TARGET] = requestUrl.path || '/';
if (parsedUrl?.pathname) {
oldAttributes[SEMATTRS_HTTP_TARGET] =
parsedUrl?.pathname + parsedUrl?.search || '/';
}

if (userAgent !== undefined) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ describe('HttpInstrumentation', () => {
assert.strictEqual(rpcData.route, undefined);
rpcData.route = 'TheRoute';
}
if (request.url?.includes('/login')) {
assert.strictEqual(
request.headers.authorization,
'Basic ' + Buffer.from('username:password').toString('base64')
);
}
if (request.url?.includes('/withQuery')) {
assert.match(request.url, /withQuery\?foo=bar$/);
}
response.end('Test Server Response');
});

Expand Down Expand Up @@ -597,7 +606,7 @@ describe('HttpInstrumentation', () => {
assert.strictEqual(spans.length, 2);
});

for (const arg of ['string', {}, new Date()]) {
for (const arg of [{}, new Date()]) {
it(`should be traceable and not throw exception in ${protocol} instrumentation when passing the following argument ${JSON.stringify(
arg
)}`, async () => {
Expand Down Expand Up @@ -1005,6 +1014,34 @@ describe('HttpInstrumentation', () => {

assert.deepStrictEqual(warnMessages, []);
});

it('should not throw with cyrillic characters in the request path', async () => {
// see https://github.com/open-telemetry/opentelemetry-js/issues/5060
await httpRequest.get(`${protocol}://${hostname}:${serverPort}/привет`);
});

it('should keep username and password in the request', async () => {
await httpRequest.get(
`${protocol}://username:password@${hostname}:${serverPort}/login`
);
});

it('should keep query in the request', async () => {
await httpRequest.get(
`${protocol}://${hostname}:${serverPort}/withQuery?foo=bar`
);
});

it('using an invalid url does throw from client but still creates a span', async () => {
try {
await httpRequest.get(`http://instrumentation.test:string-as-port/`);
} catch (e) {
assert.match(e.message, /Invalid URL/);
}

const spans = memoryExporter.getFinishedSpans();
assert.strictEqual(spans.length, 1);
});
});

describe('with semconv stability set to http', () => {
Expand Down
Loading
Loading