Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport to release/3.3.x] fix(tracing): time precision, sampling rate, span hierarchy (#11484) #11650

Merged
merged 3 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG/unreleased/kong/11468.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
message: "**Opentelemetry**: fix an issue that resulted in invalid parent IDs in the propagated tracing headers"
type: bugfix
scope: Plugin
prs:
- 11468
jiras:
- "KAG-2281"
9 changes: 9 additions & 0 deletions CHANGELOG/unreleased/kong/11484.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
message: "Tracing: fix an issue that resulted in some parent spans to end before their children due to different precision of their timestamps"
type: bugfix
scope: PDK
prs:
- 11484
jiras:
- "KAG-2336"
issues:
- 11294
2 changes: 2 additions & 0 deletions kong/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,7 @@ function Kong.log()
local ctx = ngx.ctx
if not ctx.KONG_LOG_START then
ctx.KONG_LOG_START = now() * 1000
ctx.KONG_LOG_START_NS = time_ns()
if is_stream_module then
if not ctx.KONG_PROCESSING_START then
ctx.KONG_PROCESSING_START = start_time() * 1000
Expand Down Expand Up @@ -1535,6 +1536,7 @@ function Kong.log()

if ctx.KONG_BODY_FILTER_START and not ctx.KONG_BODY_FILTER_ENDED_AT then
ctx.KONG_BODY_FILTER_ENDED_AT = ctx.KONG_LOG_START
ctx.KONG_BODY_FILTER_ENDED_AT_NS = ctx.KONG_LOG_START_NS
ctx.KONG_BODY_FILTER_TIME = ctx.KONG_BODY_FILTER_ENDED_AT -
ctx.KONG_BODY_FILTER_START
end
Expand Down
10 changes: 3 additions & 7 deletions kong/pdk/tracing.lua
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,13 @@ local function create_span(tracer, options)
sampled = tracer and tracer.sampler(trace_id)
end

if not sampled then
return noop_span
end

span.parent_id = span.parent and span.parent.span_id
or options.parent_id
span.tracer = span.tracer or tracer
span.span_id = generate_span_id()
span.trace_id = trace_id
span.kind = options.span_kind or SPAN_KIND.INTERNAL
span.should_sample = true
span.should_sample = sampled

setmetatable(span, span_mt)
return span
Expand Down Expand Up @@ -275,8 +271,8 @@ end
-- local time = ngx.now()
-- span:finish(time * 100000000)
function span_mt:finish(end_time_ns)
if self.end_time_ns ~= nil then
-- span is finished, and processed already
if self.end_time_ns ~= nil or not self.should_sample then
-- span is finished, and already processed or not sampled
return
end

Expand Down
21 changes: 9 additions & 12 deletions kong/plugins/opentelemetry/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -103,34 +103,31 @@ function OpenTelemetryHandler:access(conf)
kong.ctx.plugin.should_sample = false
end

local injected_parent_span = ngx.ctx.tracing and
ngx.ctx.tracing.injected.balancer_span or root_span

local header_type, trace_id, span_id, parent_id, should_sample, _ = propagation_parse(headers, conf.header_type)
if should_sample == false then
tracer:set_should_sample(should_sample)
injected_parent_span.should_sample = should_sample
end

-- overwrite trace id
-- as we are in a chain of existing trace
if trace_id then
root_span.trace_id = trace_id
injected_parent_span.trace_id = trace_id
kong.ctx.plugin.trace_id = trace_id
end

-- overwrite root span's parent_id
-- overwrite parent span's parent_id
if span_id then
root_span.parent_id = span_id
injected_parent_span.parent_id = span_id

elseif parent_id then
root_span.parent_id = parent_id
injected_parent_span.parent_id = parent_id
end

-- propagate the span that identifies the "last" balancer try
-- This has to happen "in advance". The span will be activated (linked)
-- after the OpenResty balancer results are available
local balancer_span = tracer.create_span(nil, {
span_kind = 3,
parent = root_span,
})
propagation_set(conf.header_type, header_type, balancer_span, "w3c", true)
propagation_set(conf.header_type, header_type, injected_parent_span, "w3c")
end


Expand Down
3 changes: 3 additions & 0 deletions kong/runloop/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,9 @@ return {

-- trace router
local span = instrumentation.router()
-- create the balancer span "in advance" so its ID is available
-- to plugins in the access phase for doing headers propagation
instrumentation.precreate_balancer_span(ctx)

-- routing request
local router = get_updated_router()
Expand Down
30 changes: 24 additions & 6 deletions kong/tracing/instrumentation.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
local pdk_tracer = require "kong.pdk.tracing".new()
local propagation = require "kong.tracing.propagation"
local utils = require "kong.tools.utils"
local tablepool = require "tablepool"
local tablex = require "pl.tablex"
Expand Down Expand Up @@ -85,10 +84,10 @@ function _M.balancer(ctx)

local last_try_balancer_span
do
local propagated = propagation.get_propagated()
local balancer_span = ctx.tracing and ctx.tracing.injected.balancer_span
-- pre-created balancer span was not linked yet
if propagated and not propagated.linked then
last_try_balancer_span = propagated
if balancer_span and not balancer_span.linked then
last_try_balancer_span = balancer_span
end
end

Expand Down Expand Up @@ -210,6 +209,10 @@ _M.available_types = available_types

-- Record inbound request
function _M.request(ctx)
ctx.tracing = {
injected = {},
}

local client = kong.client

local method = get_method()
Expand Down Expand Up @@ -245,6 +248,22 @@ function _M.request(ctx)
end


function _M.precreate_balancer_span(ctx)
if _M.balancer == NOOP then
-- balancer instrumentation not enabled
return
end

local root_span = ctx.KONG_SPANS and ctx.KONG_SPANS[1]
if ctx.tracing then
ctx.tracing.injected.balancer_span = tracer.create_span(nil, {
span_kind = 3,
parent = root_span,
})
end
end


local patch_dns_query
do
local raw_func
Expand Down Expand Up @@ -305,8 +324,7 @@ function _M.runloop_log_before(ctx)
local active_span = tracer.active_span()
-- check root span type to avoid encounter error
if active_span and type(active_span.finish) == "function" then
local end_time = ctx.KONG_BODY_FILTER_ENDED_AT
and ctx.KONG_BODY_FILTER_ENDED_AT * 1e6
local end_time = ctx.KONG_BODY_FILTER_ENDED_AT_NS
active_span:finish(end_time)
end
end
Expand Down
18 changes: 1 addition & 17 deletions kong/tracing/propagation.lua
Original file line number Diff line number Diff line change
Expand Up @@ -440,33 +440,18 @@ local function parse(headers, conf_header_type)
end


local function get_propagated()
return ngx.ctx.propagated_span
end


local function set_propagated(span)
ngx.ctx.propagated_span = span
end


-- set outgoing propagation headers
--
-- @tparam string conf_header_type type of tracing header to use
-- @tparam string found_header_type type of tracing header found in request
-- @tparam table proxy_span span to be propagated
-- @tparam string conf_default_header_type used when conf_header_type=ignore
-- @tparam bool reuse if true any existing propagated_span is reused instead of proxy_span
local function set(conf_header_type, found_header_type, proxy_span, conf_default_header_type, reuse)
if reuse then
proxy_span = get_propagated() or proxy_span
end
local function set(conf_header_type, found_header_type, proxy_span, conf_default_header_type)
-- proxy_span can be noop, in which case it should not be propagated.
if proxy_span.is_recording == false then
kong.log.debug("skipping propagation of noop span")
return
end
set_propagated(proxy_span)

local set_header = kong.service.request.set_header

Expand Down Expand Up @@ -542,5 +527,4 @@ return {
parse = parse,
set = set,
from_hex = from_hex,
get_propagated = get_propagated,
}
2 changes: 1 addition & 1 deletion spec/02-integration/14-tracing/02-propagation_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ for _, strategy in helpers.each_strategy() do
assert.is_same(0, #spans, res)

local traceparent = assert(body.headers.traceparent)
assert.equals("00-" .. trace_id .. "-" .. span_id .. "-00", traceparent)
assert.matches("00%-" .. trace_id .. "%-%x+%-00", traceparent)
end)
end)
end)
Expand Down
123 changes: 123 additions & 0 deletions spec/02-integration/14-tracing/03-tracer-pdk_spec.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
local helpers = require "spec.helpers"
local cjson = require "cjson"
local join = require "pl.stringx".join


local TCP_PORT = helpers.get_available_port()
local tcp_trace_plugin_name = "tcp-trace-exporter"


local function get_parent(span, spans)
for _, s in ipairs(spans) do
if s.span_id == span.parent_id then
return s
end
end
end

for _, strategy in helpers.each_strategy() do
local proxy_client

describe("tracer pdk spec #" .. strategy, function()

local function setup_instrumentations(types, custom_spans, sampling_rate)
local bp, _ = assert(helpers.get_db_utils(strategy, {
"services",
"routes",
"plugins",
}, { tcp_trace_plugin_name }))

local http_srv = assert(bp.services:insert {
name = "mock-service",
host = helpers.mock_upstream_host,
port = helpers.mock_upstream_port,
})

bp.routes:insert({ service = http_srv,
protocols = { "http" },
paths = { "/" }})

bp.plugins:insert({
name = tcp_trace_plugin_name,
config = {
host = "127.0.0.1",
port = TCP_PORT,
custom_spans = custom_spans or false,
}
})

assert(helpers.start_kong {
database = strategy,
nginx_conf = "spec/fixtures/custom_nginx.template",
plugins = "tcp-trace-exporter",
tracing_instrumentations = types,
tracing_sampling_rate = sampling_rate or 1,
})

proxy_client = helpers.proxy_client()
end

describe("sampling rate", function ()
local instrumentations = { "request", "router", "balancer" }
lazy_setup(function()
setup_instrumentations(join(",", instrumentations), false, 0.5)
end)

lazy_teardown(function()
helpers.stop_kong()
end)

it("results in either all or none of the spans in a trace to be sampled", function ()
for _ = 1, 100 do
local thread = helpers.tcp_server(TCP_PORT)
local r = assert(proxy_client:send {
method = "GET",
path = "/",
})
assert.res_status(200, r)

local ok, res = thread:join()
assert.True(ok)
assert.is_string(res)

local spans = cjson.decode(res)
assert.True(#spans == 0 or #spans == #instrumentations)
end
end)
end)

describe("spans start/end times are consistent with their hierarchy", function ()
lazy_setup(function()
setup_instrumentations("all", false, 1)
end)

lazy_teardown(function()
helpers.stop_kong()
end)

it("sets child lifespan within parent's lifespan", function ()
for _ = 1, 100 do
local thread = helpers.tcp_server(TCP_PORT)
local r = assert(proxy_client:send {
method = "GET",
path = "/",
})
assert.res_status(200, r)

local ok, res = thread:join()
assert.True(ok)
assert.is_string(res)

local spans = cjson.decode(res)
for i = 2, #spans do -- skip the root span (no parent)
local span = spans[i]
local parent = get_parent(span, spans)
assert.is_not_nil(parent)
assert.True(span.start_time_ns >= parent.start_time_ns)
assert.True(span.end_time_ns <= parent.end_time_ns)
end
end
end)
end)
end)
end
Loading
Loading