different shown result when play and deploy in sim,please take a look 

Excellent job!

**When I use run same policy in play and deploy ,show different behavior ,it's really strange ,**
Here is the detail,

**1. run the official policy  demo_ckpt.pt file and demo_motion.npz in play seems very good**
when run 
uv run src/mjlab/scripts/play3.py Mjlab-Tracking-Flat-Unitree-G1-Play   --model_path /tmp/mjlab_cache/demo_ckpt.pt  --motion_file /tmp/mjlab_cache/demo_motion.npz
![Image](https://github.com/user-attachments/assets/cc8aa438-bd08-4fab-b6ca-57d85f0360fe)


**2. get the onnx file exported from official demo_ckpt.pt  and I write code to preparing for deploy and sim,it  seems very strange and behaving very strangely, completely unable to stabilize**


![Image](https://github.com/user-attachments/assets/205e39a0-8378-4ea0-b0d2-a75c09c12053)

 **Please help me check the deploy sim code and the XML to see if there's anything wrong.
this is the deploy code I use,**

```
import time

import mujoco.viewer
import mujoco
import numpy as np
# from legged_gym import LEGGED_GYM_ROOT_DIR
import torch
import yaml

import onnxruntime

import numpy as np
import mujoco

xml_path = "./unitree_description/mjcf/g1_liao.xml"

# Total simulation time
simulation_duration = 300.0
# Simulation time step
simulation_dt = 0.002
# Controller update frequency (meets the requirement of simulation_dt * controll_decimation=0.02; 50Hz)
control_decimation = 10
def quat_rotate_inverse_np(q: np.ndarray, v: np.ndarray) -> np.ndarray:
    """Rotate a vector by the inverse of a quaternion along the last dimension of q and v (NumPy version).

    Args:
        q: The quaternion in (w, x, y, z). Shape is (..., 4).
        v: The vector in (x, y, z). Shape is (..., 3).

    Returns:
        The rotated vector in (x, y, z). Shape is (..., 3).
    """
    q_w = q[..., 0]
    q_vec = q[..., 1:]

    # Component a: v * (2.0 * q_w^2 - 1.0)
    a = v * np.expand_dims(2.0 * q_w**2 - 1.0, axis=-1)

    # Component b: cross(q_vec, v) * q_w * 2.0
    b = np.cross(q_vec, v, axis=-1) * np.expand_dims(q_w, axis=-1) * 2.0

    # Component c: q_vec * dot(q_vec, v) * 2.0
    # For efficient computation, handle different dimensionalities
    if q_vec.ndim == 2:
        # For 2D case: use matrix multiplication for better performance
        dot_product = np.sum(q_vec * v, axis=-1, keepdims=True)
        c = q_vec * dot_product * 2.0
    else:
        # For general case: use Einstein summation
        dot_product = np.expand_dims(np.einsum('...i,...i->...', q_vec, v), axis=-1)
        c = q_vec * dot_product * 2.0

    return a - b + c
def subtract_frame_transforms_mujoco(pos_a, quat_a, pos_b, quat_b):

    rotm_a = np.zeros(9)
    mujoco.mju_quat2Mat(rotm_a, quat_a)
    rotm_a = rotm_a.reshape(3, 3)

    rel_pos = rotm_a.T @ (pos_b - pos_a)

    rel_quat = quaternion_multiply(quaternion_conjugate(quat_a), quat_b)

    rel_quat = rel_quat / np.linalg.norm(rel_quat)

    return rel_pos, rel_quat

def quaternion_conjugate(q):
    """[w, x, y, z] -> [w, -x, -y, -z]"""
    return np.array([q[0], -q[1], -q[2], -q[3]])

def quaternion_multiply(q1, q2):
    """: q1 ⊗ q2"""
    w1, x1, y1, z1 = q1
    w2, x2, y2, z2 = q2

    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
    y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
    z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2

    return np.array([w, x, y, z])
def get_all_body_poses(d, m):
    """

        d: mujoco.MjData 
        m: mujoco.MjModel 

    """
    body_poses = {}

    for body_id in range(1, m.nbody):
        body_name = mujoco.mj_id2name(m, mujoco.mjtObj.mjOBJ_BODY, body_id)

        if body_name:  
            position = d.body(body_id).xpos.copy()      
            quaternion = d.body(body_id).xquat.copy() 
            rotation_matrix = d.body(body_id).xmat.reshape(3, 3).copy() 

            body_poses[body_name] = {
                'body_id': body_id,
                'position': position,
                'quaternion': quaternion,
                'rotation_matrix': rotation_matrix,
                'xmat_flat': d.body(body_id).xmat.copy()  
            }

    return body_poses

def get_gravity_orientation(quaternion):
    qw = quaternion[0]
    qx = quaternion[1]
    qy = quaternion[2]
    qz = quaternion[3]

    gravity_orientation = np.zeros(3)

    gravity_orientation[0] = 2 * (-qz * qx + qw * qy)
    gravity_orientation[1] = -2 * (qz * qy + qw * qx)
    gravity_orientation[2] = 1 - 2 * (qw * qw + qz * qz)

    return gravity_orientation


def pd_control(target_q, q, kp, target_dq, dq, kd):
    """Calculates torques from position commands"""
    return (target_q - q) * kp + (target_dq - dq) * kd


joint_xml = [
        "left_hip_pitch_joint",
    "left_hip_roll_joint",
    "left_hip_yaw_joint",
    "left_knee_joint",
    "left_ankle_pitch_joint",
    "left_ankle_roll_joint",
    "right_hip_pitch_joint",
    "right_hip_roll_joint",
    "right_hip_yaw_joint",
    "right_knee_joint",
    "right_ankle_pitch_joint",
    "right_ankle_roll_joint",
    "waist_yaw_joint",
    "waist_roll_joint",
    "waist_pitch_joint",
    "left_shoulder_pitch_joint",
    "left_shoulder_roll_joint",
    "left_shoulder_yaw_joint",
    "left_elbow_joint",
    "left_wrist_roll_joint",
    "left_wrist_pitch_joint",
    "left_wrist_yaw_joint",
    "right_shoulder_pitch_joint",
    "right_shoulder_roll_joint",
    "right_shoulder_yaw_joint",
    "right_elbow_joint",
    "right_wrist_roll_joint",
    "right_wrist_pitch_joint",
    "right_wrist_yaw_joint"
]





if __name__ == "__main__":
    # get config file name from command line

    import argparse

    # motion_file = "./policy/mjlab_2025-10-18_17-47-25/g1_dance1_subject1.npz"#raw
    # policy_path ="./policy/mjlab_2025-10-18_17-47-25/2025-10-18_17-47-25.onnx"

    motion_file = "./policy/mjlab_demo/demo_motion.npz"#raw
    policy_path ="./policy/mjlab_demo/policy _demo.onnx"



    motion =  np.load(motion_file)
    motionpos = motion["body_pos_w"]
    motionquat = motion["body_quat_w"]
    motioninputpos = motion["joint_pos"]
    motioninputvel = motion["joint_vel"]
    i = 0
    total_length = motionpos.shape[0]





    num_actions = 29
    num_obs = 160
    import onnx
    model = onnx.load(policy_path)
    for prop in model.metadata_props:
        if prop.key == "joint_names":
            joint_seq = prop.value.split(",")
        if prop.key == "default_joint_pos":   
            joint_pos_array_seq = np.array([float(x) for x in prop.value.split(",")])
            joint_pos_array = np.array([joint_pos_array_seq[joint_seq.index(joint)] for joint in joint_xml])
        if prop.key == "joint_stiffness":
            stiffness_array_seq = np.array([float(x) for x in prop.value.split(",")])
            stiffness_array = np.array([stiffness_array_seq[joint_seq.index(joint)] for joint in joint_xml])
            # stiffness_array = np.array([])

        if prop.key == "joint_damping":
            damping_array_seq = np.array([float(x) for x in prop.value.split(",")])
            damping_array = np.array([damping_array_seq[joint_seq.index(joint)] for joint in joint_xml])        

        if prop.key == "action_scale":
            action_scale = np.array([float(x) for x in prop.value.split(",")])
        print(f"{prop.key}: {prop.value}")
    action = np.zeros(num_actions, dtype=np.float32)
    # target_dof_pos = default_angles.copy()
    obs = np.zeros(num_obs, dtype=np.float32)

    counter = 0

    # Load robot model
    m = mujoco.MjModel.from_xml_path(xml_path)
    d = mujoco.MjData(m)
    m.opt.timestep = simulation_dt

    # load policy
    # policy = torch.jit.load(policy_path)

    policy = onnxruntime.InferenceSession(policy_path)
    input_name = policy.get_inputs()[0].name
    output_name = policy.get_outputs()[0].name

    action_buffer = np.zeros((num_actions,), dtype=np.float32)
    timestep = 0
    motioninput = np.concatenate((motioninputpos[timestep,:],motioninputvel[timestep,:]), axis=0)
    motionposcurrent = motionpos[timestep,9,:] #here 9 means torso body index in g1
    motionquatcurrent = motionquat[timestep,9,:]#here 9 means torso body index in g1
    target_dof_pos = joint_pos_array.copy()
    d.qpos[7:] = target_dof_pos
    # target_dof_pos = joint_pos_array_seq
    body_name = "torso_link"  # robot_ref_body_index=3 motion_ref_body_index=7
    # body_name = "pelvis"
    body_id = mujoco.mj_name2id(m, mujoco.mjtObj.mjOBJ_BODY, body_name)
    if body_id == -1:
        raise ValueError(f"Body {body_name} not found in model")
    print('body_id:',body_id)
    # exit(-1)
    with mujoco.viewer.launch_passive(m, d) as viewer:
        # Close the viewer automatically after simulation_duration wall-seconds.
        start = time.time()
        while viewer.is_running() and time.time() - start < simulation_duration:
            step_start = time.time()

            mujoco.mj_step(m, d)
            tau = pd_control(target_dof_pos, d.qpos[7:], stiffness_array, np.zeros_like(damping_array), d.qvel[6:], damping_array)# xml

            d.ctrl[:] = tau
            counter += 1
            # timestep = total_length-1
            if counter % control_decimation == 0:
                # Apply control signal here.
                position = d.xpos[body_id]
                quaternion = d.xquat[body_id]
                motioninput = np.concatenate((motioninputpos[timestep,:],motioninputvel[timestep,:]),axis=0)
                motionposcurrent = motionpos[timestep,9,:]
                motionquatcurrent = motionquat[timestep,9,:]
                anchor_pos,anchor_quat = subtract_frame_transforms_mujoco(position,quaternion,motionposcurrent,motionquatcurrent)
                anchor_ori = np.zeros(9)
                mujoco.mju_quat2Mat(anchor_ori, anchor_quat)
                anchor_ori = anchor_ori.reshape(3, 3)[:, :2]
                anchor_ori = anchor_ori.reshape(-1,)
                # create observation
                offset = 0
                obs[offset:offset + 58] = motioninput
                offset += 58

                obs[offset:offset + 3] = anchor_pos  
                offset += 3 #add anchor position

                obs[offset:offset + 6] = anchor_ori  
                offset += 6   


                linvel = quat_rotate_inverse_np(d.qpos[3:7], d.qvel[0 : 3])
                # obs[offset:offset + 3] = d.qvel[0 : 3].copy() # linear velocity
                obs[offset:offset + 3] = linvel # linear velocity

                offset += 3

                angvel = quat_rotate_inverse_np(d.qpos[3:7], d.qvel[3 : 6])
                obs[offset:offset + 3] = d.qvel[3 : 6]
                # obs[offset:offset + 3] = angvel

                offset += 3
                qpos_xml = d.qpos[7 : 7 + num_actions]  # joint positions
                qpos_seq = np.array([qpos_xml[joint_xml.index(joint)] for joint in joint_seq])
                obs[offset:offset + num_actions] = qpos_seq - joint_pos_array_seq  # joint positions
                offset += num_actions
                qvel_xml = d.qvel[6 : 6 + num_actions]  # joint positions
                qvel_seq = np.array([qvel_xml[joint_xml.index(joint)] for joint in joint_seq])
                obs[offset:offset + num_actions] = qvel_seq  # joint velocities
                offset += num_actions   
                obs[offset:offset + num_actions] = action_buffer


                obs_tensor = torch.from_numpy(obs).unsqueeze(0)
                action = policy.run(['actions'], {'obs': obs_tensor.numpy(),'time_step':np.array([timestep], dtype=np.float32).reshape(1,1)})[0]
                # 
                action = np.asarray(action).reshape(-1)
                action_buffer = action.copy()
                target_dof_pos = action * action_scale + joint_pos_array_seq
                target_dof_pos = target_dof_pos.reshape(-1,)
                target_dof_pos = np.array([target_dof_pos[joint_seq.index(joint)] for joint in joint_xml])
                timestep+=1


            # Pick up changes to the physics state, apply perturbations, update options from GUI.
            viewer.sync()

            # Rudimentary time keeping, will drift relative to wall clock.
            time_until_next_step = m.opt.timestep - (time.time() - step_start)
            if time_until_next_step > 0:
                time.sleep(time_until_next_step)
```
and the xml ，demo_motion.npz,policy _dem.onnx, I use in depoly is:
[g1_liao.xml](https://github.com/user-attachments/files/22988285/g1_liao.xml)

[policy _demo_onnx.zip](https://github.com/user-attachments/files/22988353/policy._demo_onnx.zip)

[demo_motion_npz.zip](https://github.com/user-attachments/files/22988354/demo_motion_npz.zip)

**Different shown result when play and deploy in sim,please take a look,and could you please provide the deploy code in sim or real robot ,many thanks!**

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

different shown result when play and deploy in sim,please take a look #201

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

different shown result when play and deploy in sim,please take a look #201

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions