From 452291cb9a25850289d3f65aa1c314edc8b7e78b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentin=20Boittiaux?= Date: Wed, 26 Jun 2024 11:38:55 +0200 Subject: [PATCH] Add depth map support for camera sensor --- .../Editor/CameraSensorComponentEditor.cs | 1 + com.unity.ml-agents/Runtime/Resources.meta | 8 +++ .../Runtime/Resources/DepthShader.shader | 64 +++++++++++++++++++ .../Runtime/Resources/DepthShader.shader.meta | 9 +++ .../Runtime/Sensors/CameraSensor.cs | 37 ++++++++--- .../Runtime/Sensors/CameraSensorComponent.cs | 48 +++++++++++++- .../Runtime/Sensors/CompressionSpec.cs | 7 +- .../Runtime/Sensors/ObservationWriter.cs | 22 ++++--- ml-agents-envs/mlagents_envs/rpc_utils.py | 34 +++++++--- ml-agents-envs/setup.py | 1 + ml-agents-envs/tests/test_rpc_utils.py | 6 +- 11 files changed, 202 insertions(+), 35 deletions(-) create mode 100644 com.unity.ml-agents/Runtime/Resources.meta create mode 100644 com.unity.ml-agents/Runtime/Resources/DepthShader.shader create mode 100644 com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta diff --git a/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs b/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs index 1df66ee3c9..15c6fd446f 100644 --- a/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs +++ b/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs @@ -24,6 +24,7 @@ public override void OnInspectorGUI() EditorGUILayout.PropertyField(so.FindProperty("m_Width"), true); EditorGUILayout.PropertyField(so.FindProperty("m_Height"), true); EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true); + EditorGUILayout.PropertyField(so.FindProperty("m_RGBD"), true); EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true); EditorGUILayout.PropertyField(so.FindProperty("m_ObservationType"), true); } diff --git a/com.unity.ml-agents/Runtime/Resources.meta b/com.unity.ml-agents/Runtime/Resources.meta new file mode 100644 index 0000000000..ef98dc1a06 --- /dev/null +++ b/com.unity.ml-agents/Runtime/Resources.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: ca0aab04b837598dc99f548d13baf0c6 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/com.unity.ml-agents/Runtime/Resources/DepthShader.shader b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader new file mode 100644 index 0000000000..a6b429f916 --- /dev/null +++ b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader @@ -0,0 +1,64 @@ +Shader "Custom/DepthShader" +{ + Properties + { + _MainTex ("Texture", 2D) = "white" {} + } + SubShader + { + Pass + { + CGPROGRAM + #pragma vertex vert + #pragma fragment frag + + #include "UnityCG.cginc" + + struct appdata + { + float4 vertex : POSITION; + float2 uv : TEXCOORD0; + }; + + struct v2f + { + float2 uv : TEXCOORD0; + float4 vertex : SV_POSITION; + float4 screenPos: TEXTCOORD1; + }; + + v2f vert (appdata v) + { + v2f o; + o.vertex = UnityObjectToClipPos(v.vertex); + o.screenPos = ComputeScreenPos(o.vertex); + o.uv = v.uv; + return o; + } + + sampler2D _MainTex, _CameraDepthTexture; + + float4 frag (v2f i) : SV_Target + { + // Extract color from texture + float4 color = tex2D(_MainTex, i.uv); + + // Extract depth from camera depth texture + float depth = LinearEyeDepth(tex2D(_CameraDepthTexture, i.screenPos.xy)); + + // Clip depth to far plane + float farPlane = _ProjectionParams.z; + if (depth > farPlane) depth = 0; + + // Convert color from linear to sRGB + color.rgb = LinearToGammaSpace(saturate(color.rgb)); + + // Store depth in alpha channel + color.a = depth; + + return color; + } + ENDCG + } + } +} diff --git a/com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta new file mode 100644 index 0000000000..1b967c47fd --- /dev/null +++ b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 8c36e1786391089c18743562d1d2de06 +ShaderImporter: + externalObjects: {} + defaultTextures: [] + nonModifiableTextures: [] + userData: + assetBundleName: + assetBundleVariant: diff --git a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs index 12dc651387..be05f8ed54 100644 --- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs +++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs @@ -13,11 +13,19 @@ public class CameraSensor : ISensor, IBuiltInSensor, IDisposable int m_Width; int m_Height; bool m_Grayscale; + bool m_RGBD; string m_Name; private ObservationSpec m_ObservationSpec; SensorCompressionType m_CompressionType; Texture2D m_Texture; + /// + /// Indicates wether or not the Render method is being executed by CameraSensor. + /// This boolean is checked in CameraSensorComponent.OnRenderImage method to avoid + /// applying the depth shader outside of the camera sensor scope. + /// + public bool m_InCameraSensorRender { get; private set; } + /// /// The Camera used for rendering the sensor observations. /// @@ -47,17 +55,19 @@ public SensorCompressionType CompressionType /// The compression to apply to the generated image. /// The type of observation. public CameraSensor( - Camera camera, int width, int height, bool grayscale, string name, SensorCompressionType compression, ObservationType observationType = ObservationType.Default) + Camera camera, int width, int height, bool grayscale, bool rgbd, string name, SensorCompressionType compression, ObservationType observationType = ObservationType.Default) { m_Camera = camera; m_Width = width; m_Height = height; m_Grayscale = grayscale; + m_RGBD = rgbd; m_Name = name; - var channels = grayscale ? 1 : 3; + var channels = rgbd ? 4 : grayscale ? 1 : 3; // RGBD has priority over Grayscale m_ObservationSpec = ObservationSpec.Visual(channels, height, width, observationType); m_CompressionType = compression; - m_Texture = new Texture2D(width, height, TextureFormat.RGB24, false); + m_Texture = new Texture2D(width, height, rgbd ? TextureFormat.RGBAFloat : TextureFormat.RGB24, false); + m_InCameraSensorRender = false; } /// @@ -90,8 +100,11 @@ public byte[] GetCompressedObservation() using (TimerStack.Instance.Scoped("CameraSensor.GetCompressedObservation")) { // TODO support more types here, e.g. JPG - var compressed = m_Texture.EncodeToPNG(); - return compressed; + if (m_CompressionType == SensorCompressionType.OPENEXR) + { + return m_Texture.EncodeToEXR(); + } + return m_Texture.EncodeToPNG(); } } @@ -104,7 +117,7 @@ public int Write(ObservationWriter writer) { using (TimerStack.Instance.Scoped("CameraSensor.WriteToTensor")) { - var numWritten = writer.WriteTexture(m_Texture, m_Grayscale); + var numWritten = writer.WriteTexture(m_Texture, m_Grayscale, m_RGBD); return numWritten; } } @@ -131,7 +144,7 @@ public CompressionSpec GetCompressionSpec() /// Texture2D to render to. /// Width of resulting 2D texture. /// Height of resulting 2D texture. - public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, int width, int height) + public void ObservationToTexture(Camera obsCamera, Texture2D texture2D, int width, int height) { if (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Null) { @@ -140,9 +153,9 @@ public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, i var oldRec = obsCamera.rect; obsCamera.rect = new Rect(0f, 0f, 1f, 1f); - var depth = 24; - var format = RenderTextureFormat.Default; - var readWrite = RenderTextureReadWrite.Default; + var depth = m_RGBD ? 32 : 24; + var format = m_RGBD ? RenderTextureFormat.ARGBFloat : RenderTextureFormat.Default; + var readWrite = m_RGBD ? RenderTextureReadWrite.Linear : RenderTextureReadWrite.Default; var tempRt = RenderTexture.GetTemporary(width, height, depth, format, readWrite); @@ -154,8 +167,12 @@ public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, i RenderTexture.active = tempRt; obsCamera.targetTexture = tempRt; + m_InCameraSensorRender = true; + obsCamera.Render(); + m_InCameraSensorRender = false; + texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0); obsCamera.targetTexture = prevCameraRt; diff --git a/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs b/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs index f6b53f087e..d70fa4d1e8 100644 --- a/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs +++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs @@ -67,13 +67,26 @@ public int Height bool m_Grayscale; /// - /// Whether to generate grayscale images or color. + /// Whether to generate grayscale images or color. Disable RGBD to use it. /// Note that changing this after the sensor is created has no effect. /// public bool Grayscale { get { return m_Grayscale; } - set { m_Grayscale = value; } + set { m_Grayscale = value; UpdateSensor(); } + } + + [HideInInspector, SerializeField, FormerlySerializedAs("rgbd")] + bool m_RGBD; + + /// + /// Whether to generate color+depth images. RGBD has priority over Grayscale. + /// Note that changing this after the sensor is created has no effect. + /// + public bool RGBD + { + get { return m_RGBD; } + set { m_RGBD = value; UpdateSensor(); } } [HideInInspector, SerializeField] @@ -130,9 +143,15 @@ public int ObservationStacks set { m_ObservationStacks = value; } } + /// + /// The material used to render the depth image. + /// + private Material m_DepthMaterial; + void Start() { UpdateSensor(); + m_DepthMaterial = new Material(Shader.Find("Custom/DepthShader")); } /// @@ -142,7 +161,7 @@ void Start() public override ISensor[] CreateSensors() { Dispose(); - m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, Grayscale, m_SensorName, m_Compression, m_ObservationType); + m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, Grayscale, RGBD, m_SensorName, m_Compression, m_ObservationType); if (ObservationStacks != 1) { @@ -158,6 +177,14 @@ internal void UpdateSensor() { if (m_Sensor != null) { + // Update depth settings before camera settings because m_Compression might change + if (m_RGBD) + { + m_Grayscale = false; + m_Compression = SensorCompressionType.OPENEXR; + } + + // Update camera settings m_Sensor.Camera = m_Camera; m_Sensor.CompressionType = m_Compression; m_Sensor.Camera.enabled = m_RuntimeCameraEnable; @@ -175,5 +202,20 @@ public void Dispose() m_Sensor = null; } } + + /// + /// Apply the depth material to the camera image if the sensor is set to RGBD. + /// + void OnRenderImage(RenderTexture src, RenderTexture dest) + { + if (m_RGBD && m_Sensor != null && m_Sensor.m_InCameraSensorRender) + { + Graphics.Blit(src, dest, m_DepthMaterial); + } + else + { + Graphics.Blit(src, dest); + } + } } } diff --git a/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs b/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs index 76e283a14a..74c0c2b362 100644 --- a/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs +++ b/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs @@ -14,7 +14,12 @@ public enum SensorCompressionType /// /// PNG format. Data will be stored in binary format. /// - PNG + PNG, + + /// + /// OpenEXR format. + /// + OPENEXR } /// diff --git a/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs b/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs index 24ed9fa5ba..d3074f1a4f 100644 --- a/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs +++ b/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs @@ -296,7 +296,8 @@ public static class ObservationWriterExtension public static int WriteTexture( this ObservationWriter obsWriter, Texture2D texture, - bool grayScale) + bool grayScale, + bool rgbd = false) { if (texture.format == TextureFormat.RGB24) { @@ -306,7 +307,7 @@ public static int WriteTexture( var width = texture.width; var height = texture.height; - var texturePixels = texture.GetPixels32(); + var texturePixels = texture.GetPixels(); // During training, we convert from Texture to PNG before sending to the trainer, which has the // effect of flipping the image. We need another flip here at inference time to match this. @@ -316,22 +317,25 @@ public static int WriteTexture( { var currentPixel = texturePixels[(height - h - 1) * width + w]; - if (grayScale) + if (grayScale && !rgbd) { obsWriter[0, h, w] = - (currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f; + (currentPixel.r + currentPixel.g + currentPixel.b) / 3f; } else { - // For Color32, the r, g and b values are between 0 and 255. - obsWriter[0, h, w] = currentPixel.r / 255.0f; - obsWriter[1, h, w] = currentPixel.g / 255.0f; - obsWriter[2, h, w] = currentPixel.b / 255.0f; + obsWriter[0, h, w] = currentPixel.r; + obsWriter[1, h, w] = currentPixel.g; + obsWriter[2, h, w] = currentPixel.b; + if (rgbd) + { + obsWriter[3, h, w] = currentPixel.a; + } } } } - return height * width * (grayScale ? 1 : 3); + return height * width * (rgbd ? 4 : grayScale ? 1 : 3); } internal static int WriteTextureRGB24( diff --git a/ml-agents-envs/mlagents_envs/rpc_utils.py b/ml-agents-envs/mlagents_envs/rpc_utils.py index f8df94896a..cca2911880 100644 --- a/ml-agents-envs/mlagents_envs/rpc_utils.py +++ b/ml-agents-envs/mlagents_envs/rpc_utils.py @@ -13,10 +13,13 @@ from mlagents_envs.communicator_objects.observation_pb2 import ( ObservationProto, NONE as COMPRESSION_TYPE_NONE, + PNG as COMPRESSION_TYPE_PNG, ) from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto import numpy as np +import OpenEXR as exr import io +import Imath from typing import cast, List, Tuple, Collection, Optional, Iterable from PIL import Image @@ -104,7 +107,7 @@ def original_tell(self) -> int: @timed def process_pixels( - image_bytes: bytes, expected_channels: int, mappings: Optional[List[int]] = None + image_bytes: bytes, compression_type: int, expected_channels: int, mappings: Optional[List[int]] = None ) -> np.ndarray: """ Converts byte array observation image into numpy array, re-sizes it, @@ -118,13 +121,26 @@ def process_pixels( image_arrays = [] # Read the images back from the bytes (without knowing the sizes). while True: - with hierarchical_timer("image_decompress"): - image = Image.open(image_fp) - # Normally Image loads lazily, load() forces it to do loading in the timer scope. - image.load() - image_arrays.append( - np.moveaxis(np.array(image, dtype=np.float32) / 255.0, -1, 0) - ) + if compression_type == COMPRESSION_TYPE_PNG: + with hierarchical_timer("image_decompress"): + image = Image.open(image_fp) + # Normally Image loads lazily, load() forces it to do loading in the timer scope. + image.load() + image_arrays.append( + np.moveaxis(np.array(image, dtype=np.float32) / 255.0, -1, 0) + ) + else: + with hierarchical_timer("image_decompress"): + file = exr.InputFile(image_fp) + header = file.header() + dw = header["dataWindow"] + channels = "RGBA" if "A" in header["channels"] else "RGB" + image_size = (dw.max.y - dw.min.y + 1, dw.max.x - dw.min.x + 1) + image_data = file.channels(channels, Imath.PixelType(Imath.PixelType.FLOAT)) + image = np.stack([ + np.frombuffer(channel, dtype=np.float32) for channel in image_data + ]).reshape(-1, *image_size) + image_arrays.append(image) # Look for the next header, starting from the current stream location try: @@ -234,7 +250,7 @@ def _observation_to_np_array( return img else: img = process_pixels( - obs.compressed_data, expected_channels, list(obs.compressed_channel_mapping) + obs.compressed_data, obs.compression_type, expected_channels, list(obs.compressed_channel_mapping) ) # Compare decompressed image size to observation shape and make sure they match if list(obs.shape) != list(img.shape): diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index fcbee96151..7b2937dc25 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -62,6 +62,7 @@ def run(self): "pettingzoo==1.15.0", "numpy>=1.23.5,<1.24.0", "filelock>=3.4.0", + "OpenEXR==3.2.4", ], python_requires=">=3.10.1,<=3.10.12", # TODO: Remove this once mypy stops having spurious setuptools issues. diff --git a/ml-agents-envs/tests/test_rpc_utils.py b/ml-agents-envs/tests/test_rpc_utils.py index 8440d6586a..ad193a4278 100644 --- a/ml-agents-envs/tests/test_rpc_utils.py +++ b/ml-agents-envs/tests/test_rpc_utils.py @@ -236,7 +236,7 @@ def proto_from_steps_and_action( def test_process_pixels(): in_array = np.random.rand(3, 128, 64) byte_arr = generate_compressed_data(in_array) - out_array = process_pixels(byte_arr, 3) + out_array = process_pixels(byte_arr, PNG, 3) assert out_array.shape == (3, 128, 64) assert np.sum(in_array - out_array) / np.prod(in_array.shape) < 0.01 assert np.allclose(in_array, out_array, atol=0.01) @@ -248,7 +248,7 @@ def test_process_pixels_multi_png(): num_channels = 7 in_array = np.random.rand(num_channels, height, width) byte_arr = generate_compressed_data(in_array) - out_array = process_pixels(byte_arr, num_channels) + out_array = process_pixels(byte_arr, PNG, num_channels) assert out_array.shape == (num_channels, height, width) assert np.sum(in_array - out_array) / np.prod(in_array.shape) < 0.01 assert np.allclose(in_array, out_array, atol=0.01) @@ -257,7 +257,7 @@ def test_process_pixels_multi_png(): def test_process_pixels_gray(): in_array = np.random.rand(3, 128, 64) byte_arr = generate_compressed_data(in_array) - out_array = process_pixels(byte_arr, 1) + out_array = process_pixels(byte_arr, PNG, 1) assert out_array.shape == (1, 128, 64) assert np.mean(in_array.mean(axis=0, keepdims=True) - out_array) < 0.01 assert np.allclose(in_array.mean(axis=0, keepdims=True), out_array, atol=0.01)