diff --git a/tools/dtrace/get-ds-state.sh b/tools/dtrace/get-ds-state.sh index ca09e41cd..d7d28d5ca 100755 --- a/tools/dtrace/get-ds-state.sh +++ b/tools/dtrace/get-ds-state.sh @@ -7,7 +7,7 @@ filename='/tmp/get-ds-state.out' # Clear out any previous state echo "" > "$filename" # Gather state on all running propolis servers, record summary to a file -dtrace -s /opt/oxide/dtrace/crucible/get-ds-state.d | sort -n | uniq | awk 'NF' > "$filename" +dtrace -s /opt/oxide/crucible_dtrace/get-ds-state.d | sort -n | uniq | awk 'NF' > "$filename" # Walk the lines in the file, append the zone name to each line. while read -r p; do # For each line in the file, pull out the PID we are looking at and diff --git a/tools/dtrace/get-lr-state.sh b/tools/dtrace/get-lr-state.sh index 90b98a879..537036591 100755 --- a/tools/dtrace/get-lr-state.sh +++ b/tools/dtrace/get-lr-state.sh @@ -7,7 +7,7 @@ filename='/tmp/get-lr-state.out' # Clear out any previous state echo "" > "$filename" # Gather state on all running propolis servers, record summary to a file -dtrace -s /opt/oxide/dtrace/crucible/get-lr-state.d | sort -n | uniq | awk 'NF' > "$filename" +dtrace -s /opt/oxide/crucible_dtrace/get-lr-state.d | sort -n | uniq | awk 'NF' > "$filename" # Walk the lines in the file, append the zone name to each line. while read -r p; do # For each line in the file, pull out the PID we are looking at and diff --git a/tools/dtrace/get-up-state.sh b/tools/dtrace/get-up-state.sh index deaeac353..d1d819df1 100755 --- a/tools/dtrace/get-up-state.sh +++ b/tools/dtrace/get-up-state.sh @@ -7,7 +7,7 @@ final='/tmp/get-up-state.final' rm -f $final # Gather our output first. -dtrace -s /opt/oxide/dtrace/crucible/get-up-state.d | awk 'NF' > "$filename" +dtrace -s /opt/oxide/crucible_dtrace/get-up-state.d | awk 'NF' > "$filename" if [[ $? -ne 0 ]]; then exit 1 fi diff --git a/tools/dtrace/upstairs_count.d b/tools/dtrace/upstairs_count.d index 6954e3440..c16b3af69 100755 --- a/tools/dtrace/upstairs_count.d +++ b/tools/dtrace/upstairs_count.d @@ -78,7 +78,7 @@ crucible_upstairs*:::gw-barrier-done tick-1s /show > 20/ { - printf("%4s %4s %4s %4s %5s %5s %4s %4s %4s %4s", + printf("%5s %5s %5s %5s %5s %5s %5s %5s %5s %5s", "F>", "F<", "W>", "W<", "R>", "R<", "WU>", "WU<", "B>", "B<"); printf("\n"); show = 0; @@ -86,7 +86,7 @@ tick-1s tick-1s { - printa("%@4u %@4u %@4u %@4u %@5u %@5u %@4u %@4u %@4u %@4u", + printa("%@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u", @flush_start, @flush_done, @write_start, @write_done, @read_start, @read_done, @write_unwritten_start, @write_unwritten_done, @barrier_start, @barrier_done diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index fa4e70c87..a4fee50f0 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -537,6 +537,14 @@ impl Downstairs { _ => (), } self.ack_job(ds_id); + } else if ack_ready && job.work.is_write() { + // We already acked this job, but, we should update dtrace probes + Self::update_io_done_stats( + &self.stats, + job.work.clone(), + ds_id, + job.io_size(), + ); } if complete { @@ -558,14 +566,52 @@ impl Downstairs { // Fire DTrace probes and update stats let io_size = done.io_size(); - match &done.work { + let work = done.work.clone(); + Self::update_io_done_stats(&self.stats, work, ds_id, io_size); + + debug!(self.log, "[A] ack job {}", ds_id); + + if let Some(r) = &mut self.repair { + r.on_job_complete(ds_id, done); + } + + // Copy (if present) read data back to the guest buffer they + // provided to us, and notify any waiters. + if let Some(res) = done.res.take() { + let data = done + .data + .as_mut() + .map(|v| (v.blocks.as_slice(), &mut v.data)); + res.transfer_and_notify(data, r); + } + + if self.gw_active.remove(&ds_id) { + self.acked_ids.push(ds_id); + } else { + panic!("job {ds_id} not on gw_active list"); + } + } + + /// Update oximeter stats for a write operation. + pub fn update_write_done_metrics(&mut self, size: usize) { + self.stats.add_write(size as i64); + } + + /// Update dtrace and oximeter metrics for a completed IO + pub fn update_io_done_stats( + stats: &DownstairsStatOuter, + work: IOop, + ds_id: JobId, + io_size: usize, + ) { + match work { IOop::Read { .. } => { cdt::gw__read__done!(|| (ds_id.0)); - self.stats.add_read(io_size as i64); + stats.add_read(io_size as i64); } IOop::Write { .. } => { cdt::gw__write__done!(|| (ds_id.0)); - self.stats.add_write(io_size as i64); + // We already updated metrics right after the fast ack. } IOop::WriteUnwritten { .. } => { cdt::gw__write__unwritten__done!(|| (ds_id.0)); @@ -574,50 +620,29 @@ impl Downstairs { } IOop::Flush { .. } => { cdt::gw__flush__done!(|| (ds_id.0)); - self.stats.add_flush(); + stats.add_flush(); } IOop::Barrier { .. } => { cdt::gw__barrier__done!(|| (ds_id.0)); - self.stats.add_barrier(); + stats.add_barrier(); } IOop::ExtentFlushClose { extent, .. } => { cdt::gw__close__done!(|| (ds_id.0, extent.0)); - self.stats.add_flush_close(); + stats.add_flush_close(); } IOop::ExtentLiveRepair { extent, .. } => { cdt::gw__repair__done!(|| (ds_id.0, extent.0)); - self.stats.add_extent_repair(); + stats.add_extent_repair(); } IOop::ExtentLiveNoOp { .. } => { cdt::gw__noop__done!(|| (ds_id.0)); - self.stats.add_extent_noop(); + stats.add_extent_noop(); } IOop::ExtentLiveReopen { extent, .. } => { cdt::gw__reopen__done!(|| (ds_id.0, extent.0)); - self.stats.add_extent_reopen(); + stats.add_extent_reopen(); } } - debug!(self.log, "[A] ack job {}", ds_id); - - if let Some(r) = &mut self.repair { - r.on_job_complete(ds_id, done); - } - - // Copy (if present) read data back to the guest buffer they - // provided to us, and notify any waiters. - if let Some(res) = done.res.take() { - let data = done - .data - .as_mut() - .map(|v| (v.blocks.as_slice(), &mut v.data)); - res.transfer_and_notify(data, r); - } - - if self.gw_active.remove(&ds_id) { - self.acked_ids.push(ds_id); - } else { - panic!("job {ds_id} not on gw_active list"); - } } /// Helper function to calculate pruned deps for a given job diff --git a/upstairs/src/upstairs.rs b/upstairs/src/upstairs.rs index e037c0a27..16e9b1376 100644 --- a/upstairs/src/upstairs.rs +++ b/upstairs/src/upstairs.rs @@ -1487,6 +1487,11 @@ impl Upstairs { // Fast-ack, pretending to be done immediately operations res.send_ok(()); + // Update Oximeter stats for this write. + if !is_write_unwritten { + self.downstairs.update_write_done_metrics(data.len()); + } + Some(DeferredWrite { ddef, impacted_blocks,