Skip to content

Commit 89529d7

Browse files
authored
Merge pull request #3828 from cudawarped:cudacodec_videoreader_fix_yuv_to_color_conversion
`cudacodec::Videoreader` fix YUV color conversion
2 parents 3e776c8 + d21e42b commit 89529d7

13 files changed

+1595
-484
lines changed

Diff for: modules/cudacodec/CMakeLists.txt

-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ if(HAVE_NVCUVID OR HAVE_NVCUVENC)
3838
endif()
3939
if(HAVE_NVCUVID)
4040
list(APPEND extra_libs ${CUDA_nvcuvid_LIBRARY})
41-
if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
42-
list(APPEND extra_libs CUDA::nppicc${CUDA_LIB_EXT})
43-
endif()
4441
endif()
4542
if(HAVE_NVCUVENC)
4643
if(WIN32)

Diff for: modules/cudacodec/include/opencv2/cudacodec.hpp

+79-25
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,19 @@ enum Codec
9393

9494
/** @brief ColorFormat for the frame returned by VideoReader::nextFrame() and VideoReader::retrieve() or used to initialize a VideoWriter.
9595
*/
96-
enum class ColorFormat {
96+
enum ColorFormat {
9797
UNDEFINED = 0,
98-
BGRA = 1, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
99-
BGR = 2, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
100-
GRAY = 3, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
101-
NV_NV12 = 4, //!< Nvidia color format - equivalent to YUV - Semi-Planar YUV [Y plane followed by interleaved UV plane], can be used with both VideoReader and VideoWriter.
102-
103-
RGB = 5, //!< OpenCV color format, can only be used with VideoWriter.
104-
RGBA = 6, //!< OpenCV color format, can only be used with VideoWriter.
105-
NV_YV12 = 8, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by V and U planes], use with VideoReader, can only be used with VideoWriter.
106-
NV_IYUV = 9, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
107-
NV_YUV444 = 10, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
108-
NV_AYUV = 11, //!< Nvidia Buffer Format - 8 bit Packed A8Y8U8V8. This is a word-ordered format where a pixel is represented by a 32-bit word with V in the lowest 8 bits, U in the next 8 bits, Y in the 8 bits after that and A in the highest 8 bits, can only be used with VideoWriter.
98+
BGRA = 1, //!< OpenCV color format. VideoReader and VideoWriter.
99+
BGR = 2, //!< OpenCV color format. VideoReader and VideoWriter.
100+
GRAY = 3, //!< OpenCV color format. VideoReader and VideoWriter.
101+
RGB = 5, //!< OpenCV color format. VideoReader and VideoWriter.
102+
RGBA = 6, //!< OpenCV color format. VideoReader and VideoWriter.
103+
NV_YUV_SURFACE_FORMAT = 7, //!< Nvidia YUV Surface Format output by the Nvidia decoder, see @ref SurfaceFormat. VideoReader only.
104+
NV_NV12 = 4, //!< Nvidia Buffer Format - Semi-Planar YUV [Y plane followed by interleaved UV plane]. VideoWriter only. @deprecated Deprecated for use with VideoReader, use @ref NV_YUV_SURFACE_FORMAT instead.
105+
NV_YV12 = 8, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by V and U planes]. VideoWriter only.
106+
NV_IYUV = 9, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes]. VideoWriter only.
107+
NV_YUV444 = 10, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes]. VideoWriter only.
108+
NV_AYUV = 11, //!< Nvidia Buffer Format - 8 bit Packed A8Y8U8V8. This is a word-ordered format where a pixel is represented by a 32-bit word with V in the lowest 8 bits, U in the next 8 bits, Y in the 8 bits after that and A in the highest 8 bits. VideoWriter only.
109109
#ifndef CV_DOXYGEN
110110
PROP_NOT_SUPPORTED
111111
#endif
@@ -298,16 +298,41 @@ enum ChromaFormat
298298
NumFormats
299299
};
300300

301-
/** @brief Deinterlacing mode used by decoder.
302-
* @param Weave Weave both fields (no deinterlacing). For progressive content and for content that doesn't need deinterlacing.
303-
* @param Bob Drop one field.
304-
* @param Adaptive Adaptive deinterlacing needs more video memory than other deinterlacing modes.
305-
* */
301+
/** @brief Deinterlacing mode used by decoder. */
306302
enum DeinterlaceMode
307303
{
308-
Weave = 0,
309-
Bob = 1,
310-
Adaptive = 2
304+
Weave = 0, //!< Weave both fields(no deinterlacing).For progressive content and for content that doesn't need deinterlacing.
305+
Bob = 1, //!< Drop one field.
306+
Adaptive = 2 //!< Adaptive deinterlacing needs more video memory than other deinterlacing modes.
307+
};
308+
309+
/** @brief Video Signal Description Color Primaries of the VideoReader source (section E.2.1 VUI parameters semantics of H265 spec file) */
310+
enum class ColorSpaceStandard {
311+
BT709 = 1, //!< ITU-R BT.709 standard for high-definition television.
312+
Unspecified = 2, //!< Unspecified color space standard.
313+
Reserved = 3, //!< Reserved for future use.
314+
FCC = 4, //!< FCC color space standard.
315+
BT470 = 5, //!< ITU - R BT.470, used for older analog television systems.
316+
BT601 = 6, //!< ITU - R BT.601, used for standard definition television.
317+
SMPTE240M = 7, //!< SMPTE 240M, used for early HDTV systems.
318+
YCgCo = 8, //!< YCgCo color space, used in some video compression algorithms.
319+
BT2020 = 9, //!< ITU - R BT.2020, used for ultra-high-definition television.
320+
BT2020C = 10 //!< ITU - R BT.2020 Constant Luminance, used for ultra-high-definition television.
321+
};
322+
323+
/** @brief Video surface formats output by the decoder */
324+
enum SurfaceFormat {
325+
SF_NV12 = 0, //!< Semi-Planar YUV [Y plane followed by interleaved UV plane]
326+
SF_P016 = 1, //!< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)
327+
SF_YUV444 = 2, //!< Planar YUV [Y plane followed by U and V planes]
328+
SF_YUV444_16Bit = 3 //!< 16 bit Planar YUV [Y plane followed by U and V planes]. Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)
329+
};
330+
331+
/** @brief Bit depth of the frame returned by VideoReader::nextFrame() and VideoReader::retrieve() */
332+
enum BitDepth {
333+
EIGHT = 0, //!< 8 bit depth.
334+
SIXTEEN = 1, //!< 16 bit depth.
335+
UNCHANGED = 2 //!< Use source bit depth.
311336
};
312337

313338
/** @brief Utility function demonstrating how to map the luma histogram when FormatInfo::videoFullRangeFlag == false
@@ -316,7 +341,7 @@ enum DeinterlaceMode
316341
317342
@note
318343
- This function demonstrates how to map the luma histogram back so that it is equivalent to the result obtained from cuda::calcHist()
319-
if the returned frame was colorFormat::GRAY.
344+
if the returned frame was ColorFormat::GRAY.
320345
*/
321346
CV_EXPORTS_W void MapHist(const cuda::GpuMat& hist, CV_OUT Mat& histFull);
322347

@@ -325,10 +350,11 @@ CV_EXPORTS_W void MapHist(const cuda::GpuMat& hist, CV_OUT Mat& histFull);
325350
struct CV_EXPORTS_W_SIMPLE FormatInfo
326351
{
327352
CV_WRAP FormatInfo() : nBitDepthMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false),
328-
fps(0), ulNumDecodeSurfaces(0), videoFullRangeFlag(false), enableHistogram(false), nCounterBitDepth(0), nMaxHistogramBins(0){};
353+
fps(0), ulNumDecodeSurfaces(0), videoFullRangeFlag(false), colorSpaceStandard(ColorSpaceStandard::BT601), enableHistogram(false), nCounterBitDepth(0), nMaxHistogramBins(0){};
329354

330355
CV_PROP_RW Codec codec;
331356
CV_PROP_RW ChromaFormat chromaFormat;
357+
CV_PROP_RW SurfaceFormat surfaceFormat; //!< Surface format of the decoded frame.
332358
CV_PROP_RW int nBitDepthMinus8;
333359
CV_PROP_RW int nBitDepthChromaMinus8;
334360
CV_PROP_RW int ulWidth;//!< Coded sequence width in pixels.
@@ -345,12 +371,36 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
345371
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
346372
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
347373
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
348-
CV_PROP_RW bool videoFullRangeFlag;//!< Output value indicating if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification. Internally the conversion from NV12 to BGR obeys ITU 709.
374+
CV_PROP_RW bool videoFullRangeFlag;//!< Output value indicating if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification.
375+
CV_PROP_RW ColorSpaceStandard colorSpaceStandard; //!< Video Signal Description Color Primaries of the VideoReader source (section E.2.1 VUI parameters semantics of H265 spec file)
349376
CV_PROP_RW bool enableHistogram;//!< Flag requesting histogram output if supported. Exception will be thrown when requested but not supported.
350377
CV_PROP_RW int nCounterBitDepth;//!< Bit depth of histogram bins if histogram output is requested and supported.
351378
CV_PROP_RW int nMaxHistogramBins;//!< Max number of histogram bins if histogram output is requested and supported.
352379
};
353380

381+
/** @brief Class for converting the raw YUV Surface output from VideoReader if output color format is set to ColorFormat::NV_YUV_SURFACE_FORMAT (VideoReader::set(ColorFormat::NV_YUV_SURFACE_FORMAT)) to the requested @ref ColorFormat.
382+
*/
383+
class CV_EXPORTS_W NVSurfaceToColorConverter {
384+
public:
385+
/** @brief Performs the conversion from the raw YUV Surface output from VideoReader to the requested color format. Use this function when you want to convert the raw YUV Surface output from VideoReader to more than one color format or you want both the raw Surface output in addition to a color frame.
386+
* @param yuv The raw YUV Surface output from VideoReader see @ref SurfaceFormat.
387+
* @param color The converted frame.
388+
* @param surfaceFormat The surface format of the input YUV data.
389+
* @param outputFormat The requested output color format.
390+
* @param bitDepth The requested bit depth of the output frame.
391+
* @param planar Request seperate planes for each color plane.
392+
* @param videoFullRangeFlag Indicates if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification.
393+
* @param stream Stream for the asynchronous version.
394+
*/
395+
virtual bool convert(InputArray yuv, OutputArray color, const SurfaceFormat surfaceFormat, const ColorFormat outputFormat, const BitDepth bitDepth = BitDepth::UNCHANGED, const bool planar = false, const bool videoFullRangeFlag = false, cuda::Stream& stream = cuda::Stream::Null()) = 0;
396+
};
397+
398+
/** @brief Creates a NVSurfaceToColorConverter.
399+
* @param colorSpace The requested @ref ColorSpaceStandard for the converter.
400+
* @param videoFullRangeFlag Indicates if the black level, luma and chroma of the source are represented using the full or limited range (AKA TV or "analogue" range) of values as defined in Annex E of the ITU-T Specification.
401+
*/
402+
CV_EXPORTS_W Ptr<NVSurfaceToColorConverter> createNVSurfaceToColorConverter(const ColorSpaceStandard colorSpace, const bool videoFullRangeFlag = false);
403+
354404
/** @brief cv::cudacodec::VideoReader generic properties identifier.
355405
*/
356406
enum class VideoReaderProps {
@@ -360,9 +410,11 @@ enum class VideoReaderProps {
360410
PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB = 3, //!< Number of raw packages recieved since the last call to grab().
361411
PROP_RAW_MODE = 4, //!< Status of raw mode.
362412
PROP_LRF_HAS_KEY_FRAME = 5, //!< FFmpeg source only - Indicates whether the Last Raw Frame (LRF), output from VideoReader::retrieve() when VideoReader is initialized in raw mode, contains encoded data for a key frame.
363-
PROP_COLOR_FORMAT = 6, //!< Set the ColorFormat of the decoded frame. This can be changed before every call to nextFrame() and retrieve().
413+
PROP_COLOR_FORMAT = 6, //!< ColorFormat of the decoded frame. This can be changed before every call to nextFrame() and retrieve().
364414
PROP_UDP_SOURCE = 7, //!< Status of VideoReaderInitParams::udpSource initialization.
365415
PROP_ALLOW_FRAME_DROP = 8, //!< Status of VideoReaderInitParams::allowFrameDrop initialization.
416+
PROP_BIT_DEPTH = 9, //!< Bit depth of the decoded frame. This can be changed before every call to nextFrame() and retrieve().
417+
PROP_PLANAR = 10, //!< Planar when true, packed when false. This can be changed before every call to nextFrame() and retrieve().
366418
#ifndef CV_DOXYGEN
367419
PROP_NOT_SUPPORTED
368420
#endif
@@ -481,9 +533,11 @@ class CV_EXPORTS_W VideoReader
481533
/** @brief Set the desired ColorFormat for the frame returned by nextFrame()/retrieve().
482534
483535
@param colorFormat Value of the ColorFormat.
536+
@param bitDepth Requested bit depth of the frame.
537+
@param planar Set to true for planar and false for packed color format.
484538
@return `true` unless the colorFormat is not supported.
485539
*/
486-
CV_WRAP virtual bool set(const ColorFormat colorFormat) = 0;
540+
CV_WRAP virtual bool set(const ColorFormat colorFormat, const BitDepth bitDepth = BitDepth::UNCHANGED, const bool planar = false) = 0;
487541

488542
/** @brief Returns the specified VideoReader property
489543

0 commit comments

Comments
 (0)