Skip to content

Commit 708399c

Browse files
committed
HPCC4J-636 DFSClient: Improve Opentelemetry tracing
- Improved span names - Transitioned read request events to read spans - Move events to spans for connect, version and close - Added span batch support Signed-off-by: James McMullan [email protected]
1 parent 9e73d7c commit 708399c

File tree

4 files changed

+400
-159
lines changed

4 files changed

+400
-159
lines changed

dfsclient/src/main/java/org/hpccsystems/dfs/client/HPCCRemoteFileWriter.java

+31-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io.opentelemetry.api.common.AttributeKey;
2222
import io.opentelemetry.api.common.Attributes;
2323
import io.opentelemetry.api.trace.Span;
24+
import io.opentelemetry.api.trace.StatusCode;
2425
import io.opentelemetry.semconv.ServerAttributes;
2526

2627
import org.apache.logging.log4j.Logger;
@@ -142,8 +143,9 @@ public HPCCRemoteFileWriter(FileWriteContext ctx, DataPartition dp, IRecordAcces
142143

143144
this.recordAccessor = recordAccessor;
144145

145-
this.writeSpanName = "HPCCRemoteFileWriter.RowService/Write_" + dp.getFileName() + "_" + dp.getThisPart();
146+
this.writeSpanName = "HPCCRemoteFileWriter/Write_" + dp.getFileName() + "_" + dp.getThisPart();
146147
this.writeSpan = Utils.createChildSpan(context.parentSpan, writeSpanName);
148+
this.writeSpan.setStatus(StatusCode.OK);
147149

148150
String primaryIP = dp.getCopyIP(0);
149151
String secondaryIP = "";
@@ -181,8 +183,20 @@ public HPCCRemoteFileWriter(FileWriteContext ctx, DataPartition dp, IRecordAcces
181183
*/
182184
public void writeRecord(T record) throws Exception
183185
{
184-
this.binaryRecordWriter.writeRecord(record);
185-
this.recordsWritten++;
186+
try
187+
{
188+
this.binaryRecordWriter.writeRecord(record);
189+
this.recordsWritten++;
190+
}
191+
catch (Exception e)
192+
{
193+
log.error("HPCCRemoteFileWriter: Error writing record: " + e.getMessage());
194+
this.writeSpan.recordException(e);
195+
this.writeSpan.setStatus(StatusCode.ERROR);
196+
this.writeSpan.end();
197+
198+
throw e;
199+
}
186200
}
187201

188202
/**
@@ -197,7 +211,20 @@ public void writeRecords(Iterator<T> it) throws Exception
197211
{
198212
while (it.hasNext())
199213
{
200-
this.binaryRecordWriter.writeRecord(it.next());
214+
try
215+
{
216+
this.binaryRecordWriter.writeRecord(it.next());
217+
this.recordsWritten++;
218+
}
219+
catch (Exception e)
220+
{
221+
log.error("HPCCRemoteFileWriter: Error writing record: " + e.getMessage());
222+
this.writeSpan.recordException(e);
223+
this.writeSpan.setStatus(StatusCode.ERROR);
224+
this.writeSpan.end();
225+
226+
throw e;
227+
}
201228
this.recordsWritten++;
202229
}
203230
}

dfsclient/src/main/java/org/hpccsystems/dfs/client/HpccRemoteFileReader.java

+46-25
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io.opentelemetry.api.common.AttributeKey;
2222
import io.opentelemetry.api.common.Attributes;
2323
import io.opentelemetry.api.trace.Span;
24+
import io.opentelemetry.api.trace.StatusCode;
2425
import io.opentelemetry.semconv.ServerAttributes;
2526

2627
import org.apache.logging.log4j.Logger;
@@ -73,6 +74,7 @@ public static class FileReadContext
7374
public int recordReadLimit = -1;
7475
public boolean createPrefetchThread = true;
7576
public int readSizeKB = -1;
77+
public int readRequestSpanBatchSize = -1; // The number of read requests before creating a new span
7678
public Span parentSpan = null;
7779
};
7880

@@ -266,21 +268,7 @@ public HpccRemoteFileReader(FileReadContext ctx, DataPartition dp, IRecordBuilde
266268
this.dataPartition = dp;
267269
this.recordBuilder = recBuilder;
268270

269-
String readSpanName = "HPCCRemoteFileReader.RowService/Read_" + dataPartition.getFileName() + "_" + dataPartition.getThisPart();
270-
this.readSpan = Utils.createChildSpan(context.parentSpan, readSpanName);
271-
272-
String primaryIP = dp.getCopyIP(0);
273-
String secondaryIP = "";
274-
if (dp.getCopyCount() > 1)
275-
{
276-
secondaryIP = dp.getCopyIP(1);
277-
}
278-
279-
Attributes attributes = Attributes.of( AttributeKey.stringKey("server.0.address"), primaryIP,
280-
AttributeKey.stringKey("server.1.address"), secondaryIP,
281-
ServerAttributes.SERVER_PORT, Long.valueOf(dp.getPort()),
282-
AttributeKey.longKey("read.size"), Long.valueOf(context.readSizeKB*1000));
283-
this.readSpan.setAllAttributes(attributes);
271+
this.readSpan = createReadSpan(ctx, dp);
284272

285273
if (context.originalRD == null)
286274
{
@@ -304,6 +292,7 @@ public HpccRemoteFileReader(FileReadContext ctx, DataPartition dp, IRecordBuilde
304292
this.inputStream = new RowServiceInputStream(this.dataPartition, context.originalRD, projectedRecordDefinition, context.connectTimeout,
305293
context.recordReadLimit, context.createPrefetchThread, context.readSizeKB, null,
306294
false, context.socketOpTimeoutMS, this.readSpan);
295+
this.inputStream.setReadRequestSpanBatchSize(context.readRequestSpanBatchSize);
307296
this.binaryRecordReader = new BinaryRecordReader(this.inputStream);
308297
this.binaryRecordReader.initialize(this.recordBuilder);
309298

@@ -321,13 +310,15 @@ public HpccRemoteFileReader(FileReadContext ctx, DataPartition dp, IRecordBuilde
321310
this.inputStream = new RowServiceInputStream(this.dataPartition, context.originalRD, projectedRecordDefinition, context.connectTimeout,
322311
context.recordReadLimit, context.createPrefetchThread, context.readSizeKB, restartInfo,
323312
false, context.socketOpTimeoutMS, this.readSpan);
313+
this.inputStream.setReadRequestSpanBatchSize(context.readRequestSpanBatchSize);
324314

325315
long bytesToSkip = resumeInfo.recordReaderStreamPos - resumeInfo.inputStreamPos;
326316
if (bytesToSkip < 0)
327317
{
328318
Exception e = new Exception("Unable to restart read stream, unexpected stream position in record reader.");
329319
this.readSpan.recordException(e);
330320
this.readSpan.end();
321+
throw e;
331322
}
332323
this.inputStream.skip(bytesToSkip);
333324

@@ -344,6 +335,35 @@ public HpccRemoteFileReader(FileReadContext ctx, DataPartition dp, IRecordBuilde
344335
openTimeMs = System.currentTimeMillis();
345336
}
346337

338+
private static Span createReadSpan(FileReadContext context, DataPartition dp)
339+
{
340+
String readSpanName = "HPCCRemoteFileReader/Read_" + dp.getFileName() + "_" + dp.getThisPart();
341+
Span readSpan = Utils.createChildSpan(context.parentSpan, readSpanName);
342+
readSpan.setStatus(StatusCode.OK);
343+
344+
String primaryIP = dp.getCopyIP(0);
345+
String secondaryIP = "";
346+
if (dp.getCopyCount() > 1)
347+
{
348+
secondaryIP = dp.getCopyIP(1);
349+
}
350+
351+
long readSize = context.readSizeKB;
352+
if (readSize < 0)
353+
{
354+
readSize = RowServiceInputStream.DEFAULT_MAX_READ_SIZE_KB;
355+
}
356+
readSize *= 1000;
357+
358+
Attributes attributes = Attributes.of( AttributeKey.stringKey("server.0.address"), primaryIP,
359+
AttributeKey.stringKey("server.1.address"), secondaryIP,
360+
ServerAttributes.SERVER_PORT, Long.valueOf(dp.getPort()),
361+
AttributeKey.longKey("read.size"), Long.valueOf(readSize));
362+
readSpan.setAllAttributes(attributes);
363+
364+
return readSpan;
365+
}
366+
347367
private boolean retryRead()
348368
{
349369
if (retryCount < maxReadRetries)
@@ -364,20 +384,12 @@ private boolean retryRead()
364384

365385
try
366386
{
367-
String readSpanName = "HPCCRemoteFileReader.RowService/Read_" + dataPartition.getFileName() + "_" + dataPartition.getThisPart();
368-
if (context.parentSpan != null)
369-
{
370-
this.readSpan = Utils.createChildSpan(context.parentSpan, readSpanName);
371-
}
372-
else
373-
{
374-
this.readSpan = Utils.createSpan(readSpanName);
375-
}
387+
this.readSpan = createReadSpan(context, dataPartition);
376388

377389
this.inputStream = new RowServiceInputStream(this.dataPartition, context.originalRD,this.recordBuilder.getRecordDefinition(),
378390
context.connectTimeout, context.recordReadLimit, context.createPrefetchThread,
379391
context.readSizeKB, restartInfo, false, context.socketOpTimeoutMS, this.readSpan);
380-
392+
this.inputStream.setReadRequestSpanBatchSize(context.readRequestSpanBatchSize);
381393
long bytesToSkip = resumeInfo.recordReaderStreamPos - resumeInfo.inputStreamPos;
382394
if (bytesToSkip < 0)
383395
{
@@ -391,6 +403,7 @@ private boolean retryRead()
391403
catch (Exception e)
392404
{
393405
this.readSpan.recordException(e);
406+
this.readSpan.setStatus(StatusCode.ERROR);
394407
this.readSpan.end();
395408
log.error("Failed to retry read for " + this.dataPartition.toString() + " " + e.getMessage(), e);
396409
return false;
@@ -529,6 +542,10 @@ public boolean hasNext()
529542
}
530543
catch (HpccFileException e)
531544
{
545+
this.readSpan.recordException(e);
546+
this.readSpan.setStatus(StatusCode.ERROR);
547+
this.readSpan.end();
548+
532549
if (!retryRead())
533550
{
534551
canReadNext = false;
@@ -564,6 +581,10 @@ public T next()
564581
}
565582
catch (HpccFileException e)
566583
{
584+
this.readSpan.recordException(e);
585+
this.readSpan.setStatus(StatusCode.ERROR);
586+
this.readSpan.end();
587+
567588
if (!retryRead())
568589
{
569590
log.error("Read failure for " + this.dataPartition.toString() + " " + e.getMessage(), e);

0 commit comments

Comments
 (0)