Support nvidia-smi v13 schema, plus a few other issues. #18176
+9,427
−163
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Summary
Checklist
I am not spending any time reading some policy. GitHub Co-Pilot was certainly auto-completing a few lines here and there. If that is OK, accept the PR. If not
¯\_(ツ)_/¯Related issues
resolves:
or a bit easier to read...
{ "fields": { "clocks_current_graphics": 210, "clocks_current_memory": 405, "clocks_current_sm": 210, "clocks_current_video": 555, "compute_mode": "Default", "cuda_version": "13.1", "current_ecc": "Enabled", "display_active": "Disabled", "driver_version": "590.44.01", "ecc_errors_aggregate_dram_correctable": 0, "ecc_errors_aggregate_dram_uncorrectable": 0, "ecc_errors_aggregate_sram_correctable": 0, "ecc_errors_aggregate_sram_threshold_exceeded": "No", "ecc_errors_aggregate_sram_uncorrectable_l2": 0, "ecc_errors_aggregate_sram_uncorrectable_microcontroller": 0, "ecc_errors_aggregate_sram_uncorrectable_other": 0, "ecc_errors_aggregate_sram_uncorrectable_parity": 0, "ecc_errors_aggregate_sram_uncorrectable_pcie": 0, "ecc_errors_aggregate_sram_uncorrectable_secded": 0, "ecc_errors_aggregate_sram_uncorrectable_sm": 0, "ecc_errors_channel_repair_pending": "No", "ecc_errors_tpc_repair_pending": "No", "ecc_errors_volatile_dram_correctable": 0, "ecc_errors_volatile_dram_uncorrectable": 0, "ecc_errors_volatile_sram_correctable": 0, "ecc_errors_volatile_sram_uncorrectable_parity": 0, "ecc_errors_volatile_sram_uncorrectable_secded": 0, "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, "encoder_stats_session_count": 0, "fan_speed": 30, "fbc_stats_average_fps": 0, "fbc_stats_average_latency": 0, "fbc_stats_session_count": 0, "memory_free": 555, "memory_reserved": 441, "memory_total": 23028, "memory_used": 22033, "pcie_link_gen_current": 1, "pcie_link_width_current": 16, "power_draw": 6.25, "power_limit": 230, "pstate": "P8", "remapped_rows_correctable": 0, "remapped_rows_failure": "No", "remapped_rows_pending": "No", "remapped_rows_uncorrectable": 0, "temperature_gpu": 11, "utilization_decoder": 0, "utilization_encoder": 0, "utilization_gpu": 0, "utilization_jpeg": 0, "utilization_memory": 0, "utilization_ofa": 0, "vbios_version": "94.02.6D.00.0D" }, "name": "nvidia_smi", "tags": { "arch": "Ampere", "host": "ava", "index": "0", "name": "NVIDIA RTX A5000", "serial": "1234567890123", "uuid": "GPU-12345678-aaaa-bbbb-cccc-0123456789ab" }, "timestamp": 1767202870 } { "fields": { "pid": 2623, "used_memory": 22024 }, "name": "nvidia_smi_process", "tags": { "host": "ava", "name": "/root/src/llama.cpp/build/bin/llama-server", "type": "C" }, "timestamp": 1767202870 }