Skip to content

Commit 365e011

Browse files
Internal Metrics (#45)
* extend span recorder with metrics properties * increment dropped span count when spans fail to send * add max span per buffer option
1 parent 757121a commit 365e011

12 files changed

+210
-45
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# Changelog
22

33
_vNext_
4+
- All value types are now properly handled in span serialization.
5+
- The platform version detection code has been improved and should be resilient to dynamic injection scenarios.
6+
- Logging now exists for many operations via the LibLog library. See the README for more details.
7+
- Error handling around dropped reports has been improved, and we now report dropped spans to LightStep.
8+
- The `LightStepHttpClient` now respects the `ReportTimeout` value. **Breaking Change**
9+
- Previously, this value was not respected by the HTTP client and was using the default 100s timeout.
410
- The `Options` class has been refactored to support a more fluent style of configuration. **Breaking Change**
511
- By default, `SatelliteOptions` are now created when `Options` is created that points to the LightStep public satellite pool.
612
- Please see the readme for more information on configuring the tracer.

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ There's several options that can be adjusted when instantiating a `LightStepTrac
4242
| WithSatellite(SatelliteOptions) | A SatelliteOptions object that specifies the host, port, and if we should use HTTPS |
4343
| WithHttp2(bool) | If this is true, we use HTTP/2 to communicate with the Satellite |
4444
| WithAutomaticReporting(bool) | If false, disables the automatic flushing of buffered spans. |
45+
| WithMaxBufferedSpans(int) | The maximum amount of spans to record in a single buffer. |
4546

4647
## `SatelliteOptions`
4748
| Property | Description |

examples/LightStep.CSharpDITestApp/LightStep.CSharpDITestApp.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<AssemblyName>LightStep.CSharpDITestApp</AssemblyName>
1212
<TargetFrameworkVersion>v4.7.1</TargetFrameworkVersion>
1313
<FileAlignment>512</FileAlignment>
14+
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
1415
</PropertyGroup>
1516
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
1617
<PlatformTarget>AnyCPU</PlatformTarget>

src/LightStep/Collector/LightStepHttpClient.cs

+25-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
using System.Threading.Tasks;
99
using Google.Protobuf;
1010
using LightStep.Logging;
11+
using Google.Protobuf.WellKnownTypes;
12+
using OpenTracing.Tag;
1113

1214
namespace LightStep.Collector
1315
{
@@ -30,7 +32,7 @@ public LightStepHttpClient(string url, Options options)
3032
{
3133
_url = url;
3234
_options = options;
33-
_client = new HttpClient();
35+
_client = new HttpClient {Timeout = _options.ReportTimeout};
3436
}
3537

3638
/// <summary>
@@ -42,7 +44,7 @@ public async Task<ReportResponse> SendReport(ReportRequest report)
4244
{
4345
// force net45 to attempt tls12 first and fallback appropriately
4446
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls;
45-
47+
4648
_client.DefaultRequestHeaders.Accept.Add(
4749
new MediaTypeWithQualityHeaderValue("application/octet-stream"));
4850
var reportsByteArray = report.ToByteArray();
@@ -54,9 +56,9 @@ public async Task<ReportResponse> SendReport(ReportRequest report)
5456
};
5557

5658
request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
57-
58-
ReportResponse responseValue = new ReportResponse();
59-
59+
60+
ReportResponse responseValue;
61+
6062
try
6163
{
6264
var response = await _client.SendAsync(request);
@@ -69,7 +71,13 @@ public async Task<ReportResponse> SendReport(ReportRequest report)
6971
{
7072
_logger.WarnException("Exception caught while sending report, resetting HttpClient", ex);
7173
_client.Dispose();
72-
_client = new HttpClient();
74+
_client = new HttpClient {Timeout = _options.ReportTimeout};
75+
throw;
76+
}
77+
catch (Exception ex) when (ex is TaskCanceledException || ex is OperationCanceledException)
78+
{
79+
_logger.WarnException("Timed out sending report to satellite", ex);
80+
throw;
7381
}
7482

7583
return responseValue;
@@ -80,23 +88,30 @@ public async Task<ReportResponse> SendReport(ReportRequest report)
8088
/// </summary>
8189
/// <param name="spans">An enumerable of <see cref="SpanData" /></param>
8290
/// <returns>A <see cref="ReportRequest" /></returns>
83-
public ReportRequest Translate(IEnumerable<SpanData> spans)
91+
public ReportRequest Translate(ISpanRecorder spanBuffer)
8492
{
8593
_logger.Debug($"Serializing spans to proto.");
8694
var timer = new Stopwatch();
8795
timer.Start();
8896

97+
var metrics = new InternalMetrics
98+
{
99+
StartTimestamp = Timestamp.FromDateTime(spanBuffer.ReportStartTime.ToUniversalTime()),
100+
DurationMicros = Convert.ToUInt64((spanBuffer.ReportEndTime - spanBuffer.ReportStartTime).Ticks / 10),
101+
Counts = { new MetricsSample() { Name = "spans.dropped", IntValue = spanBuffer.DroppedSpanCount } }
102+
};
103+
89104
var request = new ReportRequest
90105
{
91106
Reporter = new Reporter
92107
{
93108
ReporterId = _options.TracerGuid
94109
},
95-
Auth = new Auth {AccessToken = _options.AccessToken}
110+
Auth = new Auth {AccessToken = _options.AccessToken},
111+
InternalMetrics = metrics
96112
};
97113
_options.Tags.ToList().ForEach(t => request.Reporter.Tags.Add(new KeyValue().MakeKeyValueFromKvp(t)));
98-
spans.ToList().ForEach(span => request.Spans.Add(new Span().MakeSpanFromSpanData(span)));
99-
114+
spanBuffer.GetSpans().ToList().ForEach(span => request.Spans.Add(new Span().MakeSpanFromSpanData(span)));
100115
timer.Stop();
101116
_logger.Debug($"Serialization complete in {timer.ElapsedMilliseconds}ms. Request size: {request.CalculateSize()}b.");
102117
return request;

src/LightStep/ISpanRecorder.cs

+31-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Collections.Generic;
1+
using System;
2+
using System.Collections.Generic;
23

34
namespace LightStep
45
{
@@ -8,20 +9,45 @@ namespace LightStep
89
public interface ISpanRecorder
910
{
1011
/// <summary>
11-
/// Saves a span.
12+
/// The start (creation) time of the span buffer.
13+
/// </summary>
14+
DateTime ReportStartTime { get; }
15+
/// <summary>
16+
/// The finishing (flushing) time of the span buffer.
17+
/// </summary>
18+
DateTime ReportEndTime { get; set; }
19+
/// <summary>
20+
/// The count of dropped spans for this, or a prior, span buffer.
21+
/// </summary>
22+
int DroppedSpanCount { get; set; }
23+
24+
/// <summary>
25+
/// Saves a span.
1226
/// </summary>
1327
/// <param name="span"></param>
1428
void RecordSpan(SpanData span);
1529

1630
/// <summary>
17-
/// Gets the current record of spans.
31+
/// Returns this instance of the span buffer.
1832
/// </summary>
1933
/// <returns></returns>
20-
List<SpanData> GetSpanBuffer();
34+
ISpanRecorder GetSpanBuffer();
2135

2236
/// <summary>
23-
/// Clears the span record.
37+
/// Clears the span record.
2438
/// </summary>
2539
void ClearSpanBuffer();
40+
41+
/// <summary>
42+
/// Increments the dropped span count.
43+
/// </summary>
44+
/// <param name="count"></param>
45+
void RecordDroppedSpans(int count);
46+
47+
/// <summary>
48+
/// Gets the spans stored in the buffer.
49+
/// </summary>
50+
/// <returns></returns>
51+
IEnumerable<SpanData> GetSpans();
2652
}
2753
}

src/LightStep/LightStepSpanRecorder.cs

+27-9
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,16 @@ public sealed class LightStepSpanRecorder : ISpanRecorder
1010
{
1111
private List<SpanData> Spans { get; } = new List<SpanData>();
1212
private static readonly ILog _logger = LogProvider.GetCurrentClassLogger();
13+
14+
/// <inheritdoc />
15+
public DateTime ReportStartTime { get; } = DateTime.Now;
16+
17+
/// <inheritdoc />
18+
public DateTime ReportEndTime { get; set; }
1319

20+
/// <inheritdoc />
21+
public int DroppedSpanCount { get; set; }
22+
1423
/// <inheritdoc />
1524
public void RecordSpan(SpanData span)
1625
{
@@ -20,21 +29,16 @@ public void RecordSpan(SpanData span)
2029
_logger.Trace($"Lock freed, adding new span: {span}");
2130
Spans.Add(span);
2231
}
23-
2432
}
2533

2634
/// <inheritdoc />
27-
public List<SpanData> GetSpanBuffer()
35+
public ISpanRecorder GetSpanBuffer()
2836
{
29-
_logger.Trace("Waiting for lock in SpanRecorder");
3037
lock (Spans)
3138
{
32-
_logger.Trace("Lock freed, getting span buffer.");
33-
var currentSpans = Spans.ToList();
34-
Spans.Clear();
35-
return currentSpans;
39+
ReportEndTime = DateTime.Now;
40+
return this;
3641
}
37-
3842
}
3943

4044
/// <inheritdoc />
@@ -46,7 +50,21 @@ public void ClearSpanBuffer()
4650
_logger.Trace("Lock freed, clearing span buffer.");
4751
Spans.Clear();
4852
}
49-
53+
}
54+
55+
/// <inheritdoc />
56+
public void RecordDroppedSpans(int count)
57+
{
58+
DroppedSpanCount += count;
59+
}
60+
61+
/// <inheritdoc />
62+
public IEnumerable<SpanData> GetSpans()
63+
{
64+
lock (Spans)
65+
{
66+
return Spans.ToList();
67+
}
5068
}
5169
}
5270
}

src/LightStep/Options.cs

+13
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ public class Options
4949
/// Timeout for sending spans to a LightStep Satellite.
5050
/// </summary>
5151
public TimeSpan ReportTimeout { get; private set; }
52+
53+
/// <summary>
54+
/// Maximum amount of spans to buffer in a single report.
55+
/// </summary>
56+
public int ReportMaxSpans { get; private set;}
5257

5358
/// <summary>
5459
/// Tags that should be applied to each span generated by this tracer.
@@ -103,6 +108,13 @@ public Options WithAutomaticReporting(bool shouldRun)
103108
Run = shouldRun;
104109
return this;
105110
}
111+
112+
public Options WithMaxBufferedSpans(int count)
113+
{
114+
_logger.Debug($"Setting max spans per buffer to {count}");
115+
ReportMaxSpans = count;
116+
return this;
117+
}
106118

107119
/// <summary>
108120
/// Creates a new set of options for the LightStep tracer.
@@ -120,6 +132,7 @@ public Options(string token)
120132
Satellite = new SatelliteOptions("collector.lightstep.com");
121133
UseHttp2 = false;
122134
Run = true;
135+
ReportMaxSpans = int.MaxValue;
123136
}
124137

125138
private IDictionary<string, object> MergeTags(IDictionary<string, object> input)

src/LightStep/Tracer.cs

+54-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using System.Net.Http;
45
using System.Threading;
56
using System.Threading.Tasks;
67
using LightStep.Collector;
@@ -19,9 +20,10 @@ public sealed class Tracer : ITracer
1920
private readonly Options _options;
2021
private readonly IPropagator _propagator;
2122
private readonly LightStepHttpClient _httpClient;
22-
private readonly ISpanRecorder _spanRecorder;
23+
private ISpanRecorder _spanRecorder;
2324
private readonly Timer _reportLoop;
2425
private static readonly ILog _logger = LogProvider.GetCurrentClassLogger();
26+
private int currentDroppedSpanCount;
2527

2628
/// <inheritdoc />
2729
public Tracer(Options options) : this(new AsyncLocalScopeManager(), Propagators.TextMap, options,
@@ -96,26 +98,67 @@ public async void Flush()
9698
if (_options.Run)
9799
{
98100
// save current spans and clear the buffer
99-
// TODO: add retry logic so as to not drop spans on unreachable satellite
100-
_logger.Debug($"Flushing SpanData");
101-
List<SpanData> currentSpans = new List<SpanData>();
101+
ISpanRecorder currentBuffer;
102102
lock (_lock)
103103
{
104-
_logger.Debug($"Lock freed, getting current buffer.");
105-
currentSpans = _spanRecorder.GetSpanBuffer();
106-
_logger.Debug($"{currentSpans.Count} spans copied from buffer.");
104+
currentBuffer = _spanRecorder.GetSpanBuffer();
105+
_spanRecorder = new LightStepSpanRecorder();
106+
_logger.Debug($"{currentBuffer.GetSpans().Count()} spans in buffer.");
107107
}
108-
var data = _httpClient.Translate(currentSpans);
109-
var resp = await _httpClient.SendReport(data);
110-
if (resp.Errors.Count > 0) _logger.Warn($"Errors sending report to LightStep: {resp.Errors}");
108+
109+
/**
110+
* there are two ways spans can be dropped:
111+
* 1. the buffer drops a span because it's too large, malformed, etc.
112+
* 2. the report failed to be sent to the satellite.
113+
* since flush is async and there can potentially be any number of buffers in flight to the satellite,
114+
* we need to set the current drop count on the tracer to be the amount of dropped spans from the buffer
115+
* plus the existing dropped spans, then mutate the current buffer to this new total value.
116+
*/
117+
currentDroppedSpanCount += currentBuffer.DroppedSpanCount;
118+
currentBuffer.DroppedSpanCount = currentDroppedSpanCount;
119+
var data = _httpClient.Translate(currentBuffer);
120+
121+
try
122+
{
123+
var resp = await _httpClient.SendReport(data);
124+
if (resp.Errors.Count > 0)
125+
{
126+
_logger.Warn($"Errors in report: {resp.Errors}");
127+
}
128+
129+
lock (_lock)
130+
{
131+
_logger.Debug($"Resetting tracer dropped span count as the last report was successful.");
132+
currentDroppedSpanCount = 0;
133+
}
134+
135+
}
136+
catch (Exception ex) when (ex is HttpRequestException || ex is TaskCanceledException || ex is OperationCanceledException)
137+
{
138+
lock (_lock)
139+
{
140+
_logger.Warn($"Adding {currentBuffer.GetSpans().Count()} spans to dropped span count (current total: {currentDroppedSpanCount})");
141+
currentDroppedSpanCount += currentBuffer.GetSpans().Count();
142+
}
143+
}
144+
145+
111146
}
112147
}
113148

114149
internal void AppendFinishedSpan(SpanData span)
115150
{
116151
lock (_lock)
117152
{
118-
_spanRecorder.RecordSpan(span);
153+
if (_spanRecorder.GetSpans().Count() < _options.ReportMaxSpans )
154+
{
155+
_spanRecorder.RecordSpan(span);
156+
}
157+
else
158+
{
159+
_spanRecorder.RecordDroppedSpans(1);
160+
_logger.Warn($"Dropping span due to too many spans in buffer.");
161+
}
119162
}
120163
}
121164
}

0 commit comments

Comments
 (0)