Skip to content

Commit

Permalink
Add and increment counter for RX FIFO stuck condition detected
Browse files Browse the repository at this point in the history
Summary: Introduce a new counter to track RX fifo stuck condition detected in ASIC/SDK.

Reviewed By: jasmeetbagga

Differential Revision: D69682501

fbshipit-source-id: 6c3b127284e232a0edecf5b9d7d62c9aa7a515fa
  • Loading branch information
Nivin Lawrence authored and facebook-github-bot committed Feb 15, 2025
1 parent 60388d0 commit 914ba68
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 5 deletions.
2 changes: 0 additions & 2 deletions fboss/agent/BUCK
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ cpp_library(
"//fboss/agent/capture:capture",
"//fboss/agent/hw:hw_cpu_fb303_stats",
"//fboss/agent/hw:hw_switch_fb303_stats",
"//fboss/agent/hw:hw_switch_warmboot_helper",
"//fboss/agent/hw/switch_asics:switch_asics",
"//fboss/agent/if:ctrl-cpp2-services",
"//fboss/agent/if:ctrl-cpp2-types",
Expand Down Expand Up @@ -844,7 +843,6 @@ cpp_library(
"//fboss/fsdb/if:fsdb_model",
"//fboss/fsdb/if:fsdb_oper-cpp2-types",
"//fboss/lib:alert_logger",
"//fboss/lib:common_file_utils",
"//fboss/lib:common_utils",
"//fboss/lib:exponential_back_off",
"//fboss/lib:hw_write_behavior",
Expand Down
10 changes: 10 additions & 0 deletions fboss/agent/hw/HwSwitchFb303Stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ HwSwitchFb303Stats::HwSwitchFb303Stats(
getCounterPrefix() + vendor + ".isolationFirmwareCrash",
SUM,
RATE),
rxFifoStuckDetected_(
map,
getCounterPrefix() + vendor + ".rxFifoStuckDetected.errors",
SUM,
RATE),
hwInitializedTimeMs_(
map,
getCounterPrefix() + vendor + ".hw_initialized_time_ms",
Expand Down Expand Up @@ -424,6 +429,10 @@ int64_t HwSwitchFb303Stats::getIsolationFirmwareCrashes() const {
return getCumulativeValue(isolationFirmwareCrashes_);
}

int64_t HwSwitchFb303Stats::getRxFifoStuckDetected() const {
return getCumulativeValue(rxFifoStuckDetected_);
}

int64_t HwSwitchFb303Stats::getPacketIntegrityDrops() const {
return currentDropStats_.packetIntegrityDrops().value_or(0);
}
Expand Down Expand Up @@ -485,6 +494,7 @@ HwAsicErrors HwSwitchFb303Stats::getHwAsicErrors() const {
asicErrors.allReassemblyContextsTaken() =
getAllReassemblyContextsTakenError();
asicErrors.isolationFirmwareCrashes() = getIsolationFirmwareCrashes();
asicErrors.rxFifoStuckDetected() = getRxFifoStuckDetected();
return asicErrors;
}

Expand Down
5 changes: 5 additions & 0 deletions fboss/agent/hw/HwSwitchFb303Stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ class HwSwitchFb303Stats {
void isolationFirmwareCrash() {
isolationFirmwareCrashes_.addValue(1);
}
void rxFifoStuckDetected() {
rxFifoStuckDetected_.addValue(1);
}
void fabricReachabilityMissingCount(int64_t value);
void fabricReachabilityMismatchCount(int64_t value);
void virtualDevicesWithAsymmetricConnectivity(int64_t value);
Expand Down Expand Up @@ -182,6 +185,7 @@ class HwSwitchFb303Stats {
int64_t getAllReassemblyContextsTakenError() const;
// FW Errors
int64_t getIsolationFirmwareCrashes() const;
int64_t getRxFifoStuckDetected() const;

// Switch drops
int64_t getPacketIntegrityDrops() const;
Expand Down Expand Up @@ -275,6 +279,7 @@ class HwSwitchFb303Stats {
TLTimeseries forwardingQueueProcessorErrors_;
TLTimeseries allReassemblyContextsTaken_;
TLTimeseries isolationFirmwareCrashes_;
TLTimeseries rxFifoStuckDetected_;
TLTimeseries hwInitializedTimeMs_;
TLTimeseries bootTimeMs_;
TLTimeseries coldBoot_;
Expand Down
1 change: 1 addition & 0 deletions fboss/agent/hw/hardware_stats.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ struct HwAsicErrors {
12: optional i64 fdrFifoOverflowErrors;
13: optional i64 fdaFifoOverflowErrors;
14: optional i64 isolationFirmwareCrashes;
15: optional i64 rxFifoStuckDetected;
}

struct HwTeFlowStats {
Expand Down
10 changes: 10 additions & 0 deletions fboss/agent/hw/sai/switch/npu/bcm/SaiSwitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,16 @@ void SaiSwitch::switchEventCallback(
// TODO(zecheng): Handle and log remote link change
break;
}
#endif
// TODO: Support for 12.0 is pending, move this to BRCM_SAI_SDK_DNX_GTE_11_7
// once 12.0 support is available.
#if defined(SAI_VERSION_11_7_0_0_DNX_ODP)
case SAI_SWITCH_EVENT_TYPE_RX_FIFO_STUCK_DETECTED: {
XLOG(ERR) << "RX FIFO stuck seen on link: " << eventInfo->index
<< ", pipe: " << eventInfo->index2;
getSwitchStats()->rxFifoStuckDetected();
break;
}
#endif
}
}
Expand Down
11 changes: 8 additions & 3 deletions fboss/fsdb/if/oss/fsdb_model_thriftpath.h
Original file line number Diff line number Diff line change
Expand Up @@ -23574,7 +23574,8 @@ std::pair<strings::allReassemblyContextsTaken, Child<::std::int64_t, ::apache::t
std::pair<strings::reassemblyErrors, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<strings::fdrFifoOverflowErrors, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<strings::fdaFifoOverflowErrors, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<strings::isolationFirmwareCrashes, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>>;
std::pair<strings::isolationFirmwareCrashes, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<strings::rxFifoStuckDetected, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>>;
using ChildrenById = fatal::tuple< std::pair<std::integral_constant<apache::thrift::field_id_t, 1>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 2>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 3>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
Expand All @@ -23588,7 +23589,8 @@ std::pair<strings::isolationFirmwareCrashes, Child<::std::int64_t, ::apache::thr
std::pair<std::integral_constant<apache::thrift::field_id_t, 11>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 12>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 13>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 14>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>>;
std::pair<std::integral_constant<apache::thrift::field_id_t, 14>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>,
std::pair<std::integral_constant<apache::thrift::field_id_t, 15>, Child<::std::int64_t, ::apache::thrift::type_class::integral, ::apache::thrift::type::i64_t>>>;
template <typename Name>
using NameToId = fatal::tuple<std::pair<strings::parityErrors, std::integral_constant<apache::thrift::field_id_t, 1>>,
std::pair<strings::correctedParityErrors, std::integral_constant<apache::thrift::field_id_t, 2>>,
Expand All @@ -23603,7 +23605,8 @@ std::pair<strings::allReassemblyContextsTaken, std::integral_constant<apache::th
std::pair<strings::reassemblyErrors, std::integral_constant<apache::thrift::field_id_t, 11>>,
std::pair<strings::fdrFifoOverflowErrors, std::integral_constant<apache::thrift::field_id_t, 12>>,
std::pair<strings::fdaFifoOverflowErrors, std::integral_constant<apache::thrift::field_id_t, 13>>,
std::pair<strings::isolationFirmwareCrashes, std::integral_constant<apache::thrift::field_id_t, 14>>>::template type_of<Name>;
std::pair<strings::isolationFirmwareCrashes, std::integral_constant<apache::thrift::field_id_t, 14>>,
std::pair<strings::rxFifoStuckDetected, std::integral_constant<apache::thrift::field_id_t, 15>>>::template type_of<Name>;

template <typename Name>
using TypeFor = typename Children::template type_of<Name>;
Expand All @@ -23623,6 +23626,7 @@ std::pair<strings::isolationFirmwareCrashes, std::integral_constant<apache::thri
STRUCT_CHILD_GETTERS(fdrFifoOverflowErrors, 12);
STRUCT_CHILD_GETTERS(fdaFifoOverflowErrors, 13);
STRUCT_CHILD_GETTERS(isolationFirmwareCrashes, 14);
STRUCT_CHILD_GETTERS(rxFifoStuckDetected, 15);

template <apache::thrift::field_id_t __id>
auto operator()(const std::integral_constant<apache::thrift::field_id_t, __id>&) {
Expand All @@ -23640,6 +23644,7 @@ std::pair<strings::isolationFirmwareCrashes, std::integral_constant<apache::thri
else if constexpr (__id == 12) { return fdrFifoOverflowErrors(); }
else if constexpr (__id == 13) { return fdaFifoOverflowErrors(); }
else if constexpr (__id == 14) { return isolationFirmwareCrashes(); }
else if constexpr (__id == 15) { return rxFifoStuckDetected(); }
}

template <typename T, T... Values>
Expand Down

0 comments on commit 914ba68

Please sign in to comment.