-
Notifications
You must be signed in to change notification settings - Fork 163
Description
Excellent job!
When I use run same policy in play and deploy ,show different behavior ,it's really strange ,
Here is the detail,
1. run the official policy demo_ckpt.pt file and demo_motion.npz in play seems very good
when run
uv run src/mjlab/scripts/play3.py Mjlab-Tracking-Flat-Unitree-G1-Play --model_path /tmp/mjlab_cache/demo_ckpt.pt --motion_file /tmp/mjlab_cache/demo_motion.npz

2. get the onnx file exported from official demo_ckpt.pt and I write code to preparing for deploy and sim,it seems very strange and behaving very strangely, completely unable to stabilize
Please help me check the deploy sim code and the XML to see if there's anything wrong.
this is the deploy code I use,
import time
import mujoco.viewer
import mujoco
import numpy as np
# from legged_gym import LEGGED_GYM_ROOT_DIR
import torch
import yaml
import onnxruntime
import numpy as np
import mujoco
xml_path = "./unitree_description/mjcf/g1_liao.xml"
# Total simulation time
simulation_duration = 300.0
# Simulation time step
simulation_dt = 0.002
# Controller update frequency (meets the requirement of simulation_dt * controll_decimation=0.02; 50Hz)
control_decimation = 10
def quat_rotate_inverse_np(q: np.ndarray, v: np.ndarray) -> np.ndarray:
"""Rotate a vector by the inverse of a quaternion along the last dimension of q and v (NumPy version).
Args:
q: The quaternion in (w, x, y, z). Shape is (..., 4).
v: The vector in (x, y, z). Shape is (..., 3).
Returns:
The rotated vector in (x, y, z). Shape is (..., 3).
"""
q_w = q[..., 0]
q_vec = q[..., 1:]
# Component a: v * (2.0 * q_w^2 - 1.0)
a = v * np.expand_dims(2.0 * q_w**2 - 1.0, axis=-1)
# Component b: cross(q_vec, v) * q_w * 2.0
b = np.cross(q_vec, v, axis=-1) * np.expand_dims(q_w, axis=-1) * 2.0
# Component c: q_vec * dot(q_vec, v) * 2.0
# For efficient computation, handle different dimensionalities
if q_vec.ndim == 2:
# For 2D case: use matrix multiplication for better performance
dot_product = np.sum(q_vec * v, axis=-1, keepdims=True)
c = q_vec * dot_product * 2.0
else:
# For general case: use Einstein summation
dot_product = np.expand_dims(np.einsum('...i,...i->...', q_vec, v), axis=-1)
c = q_vec * dot_product * 2.0
return a - b + c
def subtract_frame_transforms_mujoco(pos_a, quat_a, pos_b, quat_b):
rotm_a = np.zeros(9)
mujoco.mju_quat2Mat(rotm_a, quat_a)
rotm_a = rotm_a.reshape(3, 3)
rel_pos = rotm_a.T @ (pos_b - pos_a)
rel_quat = quaternion_multiply(quaternion_conjugate(quat_a), quat_b)
rel_quat = rel_quat / np.linalg.norm(rel_quat)
return rel_pos, rel_quat
def quaternion_conjugate(q):
"""[w, x, y, z] -> [w, -x, -y, -z]"""
return np.array([q[0], -q[1], -q[2], -q[3]])
def quaternion_multiply(q1, q2):
""": q1 ⊗ q2"""
w1, x1, y1, z1 = q1
w2, x2, y2, z2 = q2
w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
return np.array([w, x, y, z])
def get_all_body_poses(d, m):
"""
d: mujoco.MjData
m: mujoco.MjModel
"""
body_poses = {}
for body_id in range(1, m.nbody):
body_name = mujoco.mj_id2name(m, mujoco.mjtObj.mjOBJ_BODY, body_id)
if body_name:
position = d.body(body_id).xpos.copy()
quaternion = d.body(body_id).xquat.copy()
rotation_matrix = d.body(body_id).xmat.reshape(3, 3).copy()
body_poses[body_name] = {
'body_id': body_id,
'position': position,
'quaternion': quaternion,
'rotation_matrix': rotation_matrix,
'xmat_flat': d.body(body_id).xmat.copy()
}
return body_poses
def get_gravity_orientation(quaternion):
qw = quaternion[0]
qx = quaternion[1]
qy = quaternion[2]
qz = quaternion[3]
gravity_orientation = np.zeros(3)
gravity_orientation[0] = 2 * (-qz * qx + qw * qy)
gravity_orientation[1] = -2 * (qz * qy + qw * qx)
gravity_orientation[2] = 1 - 2 * (qw * qw + qz * qz)
return gravity_orientation
def pd_control(target_q, q, kp, target_dq, dq, kd):
"""Calculates torques from position commands"""
return (target_q - q) * kp + (target_dq - dq) * kd
joint_xml = [
"left_hip_pitch_joint",
"left_hip_roll_joint",
"left_hip_yaw_joint",
"left_knee_joint",
"left_ankle_pitch_joint",
"left_ankle_roll_joint",
"right_hip_pitch_joint",
"right_hip_roll_joint",
"right_hip_yaw_joint",
"right_knee_joint",
"right_ankle_pitch_joint",
"right_ankle_roll_joint",
"waist_yaw_joint",
"waist_roll_joint",
"waist_pitch_joint",
"left_shoulder_pitch_joint",
"left_shoulder_roll_joint",
"left_shoulder_yaw_joint",
"left_elbow_joint",
"left_wrist_roll_joint",
"left_wrist_pitch_joint",
"left_wrist_yaw_joint",
"right_shoulder_pitch_joint",
"right_shoulder_roll_joint",
"right_shoulder_yaw_joint",
"right_elbow_joint",
"right_wrist_roll_joint",
"right_wrist_pitch_joint",
"right_wrist_yaw_joint"
]
if __name__ == "__main__":
# get config file name from command line
import argparse
# motion_file = "./policy/mjlab_2025-10-18_17-47-25/g1_dance1_subject1.npz"#raw
# policy_path ="./policy/mjlab_2025-10-18_17-47-25/2025-10-18_17-47-25.onnx"
motion_file = "./policy/mjlab_demo/demo_motion.npz"#raw
policy_path ="./policy/mjlab_demo/policy _demo.onnx"
motion = np.load(motion_file)
motionpos = motion["body_pos_w"]
motionquat = motion["body_quat_w"]
motioninputpos = motion["joint_pos"]
motioninputvel = motion["joint_vel"]
i = 0
total_length = motionpos.shape[0]
num_actions = 29
num_obs = 160
import onnx
model = onnx.load(policy_path)
for prop in model.metadata_props:
if prop.key == "joint_names":
joint_seq = prop.value.split(",")
if prop.key == "default_joint_pos":
joint_pos_array_seq = np.array([float(x) for x in prop.value.split(",")])
joint_pos_array = np.array([joint_pos_array_seq[joint_seq.index(joint)] for joint in joint_xml])
if prop.key == "joint_stiffness":
stiffness_array_seq = np.array([float(x) for x in prop.value.split(",")])
stiffness_array = np.array([stiffness_array_seq[joint_seq.index(joint)] for joint in joint_xml])
# stiffness_array = np.array([])
if prop.key == "joint_damping":
damping_array_seq = np.array([float(x) for x in prop.value.split(",")])
damping_array = np.array([damping_array_seq[joint_seq.index(joint)] for joint in joint_xml])
if prop.key == "action_scale":
action_scale = np.array([float(x) for x in prop.value.split(",")])
print(f"{prop.key}: {prop.value}")
action = np.zeros(num_actions, dtype=np.float32)
# target_dof_pos = default_angles.copy()
obs = np.zeros(num_obs, dtype=np.float32)
counter = 0
# Load robot model
m = mujoco.MjModel.from_xml_path(xml_path)
d = mujoco.MjData(m)
m.opt.timestep = simulation_dt
# load policy
# policy = torch.jit.load(policy_path)
policy = onnxruntime.InferenceSession(policy_path)
input_name = policy.get_inputs()[0].name
output_name = policy.get_outputs()[0].name
action_buffer = np.zeros((num_actions,), dtype=np.float32)
timestep = 0
motioninput = np.concatenate((motioninputpos[timestep,:],motioninputvel[timestep,:]), axis=0)
motionposcurrent = motionpos[timestep,9,:] #here 9 means torso body index in g1
motionquatcurrent = motionquat[timestep,9,:]#here 9 means torso body index in g1
target_dof_pos = joint_pos_array.copy()
d.qpos[7:] = target_dof_pos
# target_dof_pos = joint_pos_array_seq
body_name = "torso_link" # robot_ref_body_index=3 motion_ref_body_index=7
# body_name = "pelvis"
body_id = mujoco.mj_name2id(m, mujoco.mjtObj.mjOBJ_BODY, body_name)
if body_id == -1:
raise ValueError(f"Body {body_name} not found in model")
print('body_id:',body_id)
# exit(-1)
with mujoco.viewer.launch_passive(m, d) as viewer:
# Close the viewer automatically after simulation_duration wall-seconds.
start = time.time()
while viewer.is_running() and time.time() - start < simulation_duration:
step_start = time.time()
mujoco.mj_step(m, d)
tau = pd_control(target_dof_pos, d.qpos[7:], stiffness_array, np.zeros_like(damping_array), d.qvel[6:], damping_array)# xml
d.ctrl[:] = tau
counter += 1
# timestep = total_length-1
if counter % control_decimation == 0:
# Apply control signal here.
position = d.xpos[body_id]
quaternion = d.xquat[body_id]
motioninput = np.concatenate((motioninputpos[timestep,:],motioninputvel[timestep,:]),axis=0)
motionposcurrent = motionpos[timestep,9,:]
motionquatcurrent = motionquat[timestep,9,:]
anchor_pos,anchor_quat = subtract_frame_transforms_mujoco(position,quaternion,motionposcurrent,motionquatcurrent)
anchor_ori = np.zeros(9)
mujoco.mju_quat2Mat(anchor_ori, anchor_quat)
anchor_ori = anchor_ori.reshape(3, 3)[:, :2]
anchor_ori = anchor_ori.reshape(-1,)
# create observation
offset = 0
obs[offset:offset + 58] = motioninput
offset += 58
obs[offset:offset + 3] = anchor_pos
offset += 3 #add anchor position
obs[offset:offset + 6] = anchor_ori
offset += 6
linvel = quat_rotate_inverse_np(d.qpos[3:7], d.qvel[0 : 3])
# obs[offset:offset + 3] = d.qvel[0 : 3].copy() # linear velocity
obs[offset:offset + 3] = linvel # linear velocity
offset += 3
angvel = quat_rotate_inverse_np(d.qpos[3:7], d.qvel[3 : 6])
obs[offset:offset + 3] = d.qvel[3 : 6]
# obs[offset:offset + 3] = angvel
offset += 3
qpos_xml = d.qpos[7 : 7 + num_actions] # joint positions
qpos_seq = np.array([qpos_xml[joint_xml.index(joint)] for joint in joint_seq])
obs[offset:offset + num_actions] = qpos_seq - joint_pos_array_seq # joint positions
offset += num_actions
qvel_xml = d.qvel[6 : 6 + num_actions] # joint positions
qvel_seq = np.array([qvel_xml[joint_xml.index(joint)] for joint in joint_seq])
obs[offset:offset + num_actions] = qvel_seq # joint velocities
offset += num_actions
obs[offset:offset + num_actions] = action_buffer
obs_tensor = torch.from_numpy(obs).unsqueeze(0)
action = policy.run(['actions'], {'obs': obs_tensor.numpy(),'time_step':np.array([timestep], dtype=np.float32).reshape(1,1)})[0]
#
action = np.asarray(action).reshape(-1)
action_buffer = action.copy()
target_dof_pos = action * action_scale + joint_pos_array_seq
target_dof_pos = target_dof_pos.reshape(-1,)
target_dof_pos = np.array([target_dof_pos[joint_seq.index(joint)] for joint in joint_xml])
timestep+=1
# Pick up changes to the physics state, apply perturbations, update options from GUI.
viewer.sync()
# Rudimentary time keeping, will drift relative to wall clock.
time_until_next_step = m.opt.timestep - (time.time() - step_start)
if time_until_next_step > 0:
time.sleep(time_until_next_step)
and the xml ,demo_motion.npz,policy _dem.onnx, I use in depoly is:
g1_liao.xml
Different shown result when play and deploy in sim,please take a look,and could you please provide the deploy code in sim or real robot ,many thanks!
