Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…daemons into vdm_read_through_xcvrd
  • Loading branch information
mihirpat1 committed Feb 14, 2025
2 parents f88fbed + 29e65fe commit 8a8efc6
Show file tree
Hide file tree
Showing 9 changed files with 292 additions and 60 deletions.
38 changes: 20 additions & 18 deletions sonic-chassisd/scripts/chassisd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ try:
import time
import json
import glob
from datetime import datetime
from datetime import datetime, timezone

from sonic_py_common import daemon_base, logger, device_info
from sonic_py_common.task_base import ProcessTaskBase
Expand Down Expand Up @@ -136,6 +136,16 @@ def get_chassis():
self.log_error("Failed to load chassis due to {}".format(repr(e)))
sys.exit(CHASSIS_LOAD_ERROR)

def get_formatted_time(datetimeobj=None, op_format=None):
"""
Get the current time in specified format
:param datetimeobj: Optional - A datetime object already initialized with a specific time
:param op_format: Optional - Output Format for the time to be displayed
:returns time in string format
"""
date_obj = datetimeobj if datetimeobj else datetime.now(timezone.utc)
return date_obj.strftime(op_format if op_format else "%a %b %d %I:%M:%S %p UTC %Y")

#
# Module Config Updater ========================================================
#
Expand Down Expand Up @@ -765,28 +775,22 @@ class SmartSwitchModuleUpdater(ModuleUpdater):

def update_dpu_state(self, key, state):
"""
Update DPU state in chassisStateDB using the given key.
Update specific DPU state fields in chassisStateDB using the given key.
"""
try:
# Connect to the CHASSIS_STATE_DB using daemon_base
if not self.chassis_state_db:
self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")

# Fetch the current data for the given key and convert it to a dict
current_data = self._convert_to_dict(self.chassis_state_db.hgetall(key))

if current_data:
self.chassis_state_db.delete(key)

# Prepare the updated data
# Prepare the fields to update
updates = {
"dpu_midplane_link_state": state,
"dpu_midplane_link_reason": "",
"dpu_midplane_link_time": datetime.now().strftime("%a %b %d %I:%M:%S %p UTC %Y"),
"dpu_midplane_link_time": get_formatted_time(),
}
current_data.update(updates)

for field, value in current_data.items():
# Update each field directly
for field, value in updates.items():
self.chassis_state_db.hset(key, field, value)

except Exception as e:
Expand Down Expand Up @@ -818,7 +822,7 @@ class SmartSwitchModuleUpdater(ModuleUpdater):

def _get_current_time_str(self):
"""Returns the current time as a string in 'YYYY_MM_DD_HH_MM_SS' format."""
return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
return get_formatted_time(op_format="%Y_%m_%d_%H_%M_%S")

def _get_history_path(self, module, file_name):
"""Generates the full path for history files."""
Expand Down Expand Up @@ -880,11 +884,9 @@ class SmartSwitchModuleUpdater(ModuleUpdater):

file_path = self._get_history_path(module, file_name)
try:
dt_obj = datetime.strptime(prev_reboot_time, "%Y_%m_%d_%H_%M_%S")
formatted_time = get_formatted_time(datetimeobj=datetime.strptime(prev_reboot_time, "%Y_%m_%d_%H_%M_%S"))
except ValueError:
dt_obj = datetime.now()

formatted_time = dt_obj.strftime("%a %b %d %I:%M:%S %p UTC %Y")
formatted_time = get_formatted_time()

reboot_cause_dict = {
"cause": cause,
Expand Down Expand Up @@ -1188,7 +1190,7 @@ class DpuStateUpdater(logger.Logger):
return True

def _time_now(self):
return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
return get_formatted_time()

def _update_dp_dpu_state(self, state):
self.dpu_state_table.hset(self.name, self.DP_STATE, state)
Expand Down
24 changes: 24 additions & 0 deletions sonic-chassisd/tests/test_chassisd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,3 +1515,27 @@ def test_chassis_db_bootup_with_empty_slot():
assert status == fvs[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]
assert down_module_lc1_key in sup_module_updater.down_modules.keys()


def test_smartswitch_time_format():
chassis = MockSmartSwitchChassis()
chassis_state_db = MagicMock()
mod_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis)
mod_updater.chassis_state_db = chassis_state_db
mod_updater.chassis_state_db.hgetall = MagicMock(return_value={})
mod_updater.chassis_state_db.hset = MagicMock()
date_format = "%a %b %d %I:%M:%S %p UTC %Y"
def is_valid_date(date_str):
try:
datetime.strptime(date_str, date_format)
except ValueError:
# Parsing failed and we are unable to obtain the time
return False
return True
mod_updater.update_dpu_state("DPU1", 'up')
date_value = None
for args in (mod_updater.chassis_state_db.hset.call_args_list):
if args[0][0] == "DPU1" and args[0][1] == "dpu_midplane_link_time":
date_value = args[0][2]
if not date_value:
AssertionError("Date is not set!")
assert is_valid_date(date_value)
73 changes: 50 additions & 23 deletions sonic-chassisd/tests/test_dpu_chassisd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import signal
import threading
from imp import load_source
import re

from mock import MagicMock
from sonic_py_common import daemon_base
Expand Down Expand Up @@ -75,14 +76,14 @@ def test_dpu_state_update_api(state, expected_state):

@pytest.mark.parametrize('dpu_id, dp_state, cp_state, expected_state', [
(0, False, False, {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
(0, False, True, {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
(0, True, True, {'DPU0':
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
])
def test_dpu_state_update(dpu_id, dp_state, cp_state, expected_state):
chassis = MockDpuChassis()
Expand All @@ -102,7 +103,7 @@ def hset(key, field, value):

with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset) as hset_mock:
dpu_updater = DpuStateUpdater(SYSLOG_IDENTIFIER, chassis)
dpu_updater._time_now = MagicMock(return_value='2000-01-01 00:00:00')
dpu_updater._time_now = MagicMock(return_value='Sat Jan 01 12:00:00 AM UTC 2000')

dpu_updater.update_state()

Expand All @@ -112,20 +113,20 @@ def hset(key, field, value):

# After the deinit we assume that the DPU state is down.
assert chassis_state_db == {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}


@pytest.mark.parametrize('dpu_id, dp_state, cp_state, expected_state', [
(0, False, False, {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
(0, False, True, {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
(0, True, True, {'DPU0':
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}),
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}),
])
def test_dpu_state_manager(dpu_id, dp_state, cp_state, expected_state):
chassis = MockDpuChassis()
Expand All @@ -146,7 +147,7 @@ def hset(key, field, value):
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
with mock.patch.object(swsscommon.Select, 'select', side_effect=((swsscommon.Select.OBJECT, None), (swsscommon.Select.OBJECT, None), KeyboardInterrupt)):
dpu_updater = DpuStateUpdater(SYSLOG_IDENTIFIER, chassis)
dpu_updater._time_now = MagicMock(return_value='2000-01-01 00:00:00')
dpu_updater._time_now = MagicMock(return_value='Sat Jan 01 12:00:00 AM UTC 2000')

dpu_state_mng = DpuStateManagerTask(SYSLOG_IDENTIFIER, dpu_updater)

Expand All @@ -158,8 +159,8 @@ def hset(key, field, value):

# After the deinit we assume that the DPU state is down.
assert chassis_state_db == {'DPU0':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}


def test_dpu_chassis_daemon():
Expand All @@ -180,7 +181,7 @@ def hset(key, field, value):
chassis_state_db[key][field] = value

with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset) as hset_mock:
with mock.patch.object(DpuStateUpdater, '_time_now', side_effect=lambda: '2000-01-01 00:00:00') as mock_time_now:
with mock.patch.object(DpuStateUpdater, '_time_now', side_effect=lambda: 'Sat Jan 01 12:00:00 AM UTC 2000') as mock_time_now:

daemon_chassisd = DpuChassisdDaemon(SYSLOG_IDENTIFIER, chassis)
daemon_chassisd.CHASSIS_INFO_UPDATE_PERIOD_SECS = MagicMock(return_value=1)
Expand All @@ -195,14 +196,40 @@ def hset(key, field, value):
time.sleep(3)

assert chassis_state_db == {'DPU1':
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}
{'dpu_data_plane_state': 'up', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'up', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}

daemon_chassisd.signal_handler(signal.SIGINT, None)
daemon_chassisd.stop.wait.return_value = True

thread.join()

assert chassis_state_db == {'DPU1':
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': '2000-01-01 00:00:00',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': '2000-01-01 00:00:00'}}
{'dpu_data_plane_state': 'down', 'dpu_data_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000',
'dpu_control_plane_state': 'down', 'dpu_control_plane_time': 'Sat Jan 01 12:00:00 AM UTC 2000'}}
with mock.patch.object(swsscommon.Table, 'hset', side_effect=hset):
daemon_chassisd = DpuChassisdDaemon(SYSLOG_IDENTIFIER, chassis)
daemon_chassisd.CHASSIS_INFO_UPDATE_PERIOD_SECS = MagicMock(return_value=1)

daemon_chassisd.stop = MagicMock()
daemon_chassisd.stop.wait.return_value = False

thread = threading.Thread(target=daemon_chassisd.run)
thread.start()
# Wait for thread to start and update DB
time.sleep(3)
date_format = "%a %b %d %I:%M:%S %p UTC %Y"

def is_valid_date(date_str):
try:
datetime.strptime(date_str, date_format)
except ValueError:
# Parsing failed and we are unable to obtain the time
return False
return True
assert is_valid_date(chassis_state_db['DPU1']['dpu_data_plane_time'])
assert is_valid_date(chassis_state_db['DPU1']['dpu_control_plane_time'])
daemon_chassisd.signal_handler(signal.SIGINT, None)
daemon_chassisd.stop.wait.return_value = True

thread.join()
39 changes: 39 additions & 0 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ SYSLOG_IDENTIFIER = "pcied"
PCIE_RESULT_REGEX = "PCIe Device Checking All Test"
PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE"
PCIE_STATUS_TABLE_NAME = "PCIE_DEVICES"
PCIE_DETACH_INFO_TABLE = "PCIE_DETACH_INFO"

PCIE_DETACH_BUS_INFO_FIELD = "bus_info"
PCIE_DETACH_DPU_STATE_FIELD = "dpu_state"

PCIED_MAIN_THREAD_SLEEP_SECS = 60

Expand Down Expand Up @@ -92,6 +96,7 @@ class DaemonPcied(daemon_base.DaemonBase):
self.state_db = daemon_base.db_connect("STATE_DB")
self.device_table = swsscommon.Table(self.state_db, PCIE_DEVICE_TABLE_NAME)
self.status_table = swsscommon.Table(self.state_db, PCIE_STATUS_TABLE_NAME)
self.detach_info = swsscommon.Table(self.state_db, PCIE_DETACH_INFO_TABLE)

def __del__(self):
if self.device_table:
Expand All @@ -102,6 +107,10 @@ class DaemonPcied(daemon_base.DaemonBase):
stable_keys = self.status_table.getKeys()
for stk in stable_keys:
self.status_table._del(stk)
if self.detach_info:
detach_info_keys = self.detach_info.getKeys()
for dk in detach_info_keys:
self.detach_info._del(dk)

# load aer-fields into statedb
def update_aer_to_statedb(self):
Expand Down Expand Up @@ -151,6 +160,28 @@ class DaemonPcied(daemon_base.DaemonBase):

self.status_table.set("status", fvs)

# Check if any PCI interface is in detaching mode by querying the state_db
def is_dpu_in_detaching_mode(self, pcie_dev):
# Ensure detach_info is not None
if self.detach_info is None:
self.log_debug("detach_info is None")
return False

# Query the state_db for the device detaching status
detach_info_keys = list(self.detach_info.getKeys())
if not detach_info_keys:
return False

for key in detach_info_keys:
dpu_info = self.detach_info.get(key)
if dpu_info:
bus_info = dpu_info.get(PCIE_DETACH_BUS_INFO_FIELD)
dpu_state = dpu_info.get(PCIE_DETACH_DPU_STATE_FIELD)
if bus_info == pcie_dev and dpu_state == "detaching":
return True

return False

# Check the PCIe devices
def check_pcie_devices(self):
self.resultInfo = platform_pcieutil.get_pcie_check()
Expand All @@ -160,6 +191,14 @@ class DaemonPcied(daemon_base.DaemonBase):

for result in self.resultInfo:
if result["result"] == "Failed":
# Convert bus, device, and function to a bus_info format like "0000:03:00.0"
pcie_dev = "0000:{int(result['bus'], 16):02x}:{int(result['dev'], 16):02x}.{int(result['fn'], 16)}"

# Check if the device is in detaching mode
if device_info.is_smartswitch() and self.is_dpu_in_detaching_mode(pcie_dev):
self.log_debug("PCIe Device: {} is in detaching mode, skipping warning.".format(pcie_dev))
continue

self.log_warning("PCIe Device: " + result["name"] + " Not Found")
err += 1
else:
Expand Down
Loading

0 comments on commit 8a8efc6

Please sign in to comment.