Skip to content

Commit 54e357a

Browse files
committed
Enable Intel GPU support in torchcodec on Linux (xpu device)
This commit enables support for Intel GPUs in torchcodec. It adds: * ffmpeg-vaapi for decoding * VAAPI based color space conversion (decoding output to RGBA) * RGBA surface import as torch tensor (on torch xpu device) * RGBA to RGB24 tensor slicing To build torchcodec with Intel GPU support: * Install pytorch with XPU backend support. For example, with: ``` pip3 install torch --index-url https://download.pytorch.org/whl/xpu ``` * Install oneAPI development environment following https://github.com/pytorch/pytorch?tab=readme-ov-file#intel-gpu-support * Build and install FFmpeg with `--enable-vaapi` * Install torcheval (for tests): `pip3 install torcheval` * Build torchcodec with: `ENABLE_XPU=1 python3 setup.py devel` Notes: * RGB24 is not supported color format on current Intel GPUs (as it is considered to be suboptimal due to odd alignments) * Intel media and compute APIs can't seamlessly work with the memory from each other. For example, Intel computes's Unified Shared Memory pointers are not recognized by media APIs. Thus, lower level sharing via dma fds is needed. This alos makes this part of the solution OS dependent. * Color space conversion algoriths might be quite different as it happens for Intel. This requires to check PSNR values instead of per-pixel atol/rtol differences. * Installing oneAPI environment is neded due to pytorch/pytorch#149075 This commit was primary verfied on Intel Battlemage G21 (0xe20b) and Intel Data Center GPU Flex (0x56c0). Signed-off-by: Dmitry Rogozhkin <[email protected]>
1 parent 1fd20b2 commit 54e357a

File tree

11 files changed

+634
-51
lines changed

11 files changed

+634
-51
lines changed

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def _build_all_extensions_with_cmake(self):
112112
torch_dir = Path(torch.utils.cmake_prefix_path) / "Torch"
113113
cmake_build_type = os.environ.get("CMAKE_BUILD_TYPE", "Release")
114114
enable_cuda = os.environ.get("ENABLE_CUDA", "")
115+
enable_xpu = os.environ.get("ENABLE_XPU", "")
115116
python_version = sys.version_info
116117
cmake_args = [
117118
f"-DCMAKE_INSTALL_PREFIX={self._install_prefix}",
@@ -120,6 +121,7 @@ def _build_all_extensions_with_cmake(self):
120121
f"-DCMAKE_BUILD_TYPE={cmake_build_type}",
121122
f"-DPYTHON_VERSION={python_version.major}.{python_version.minor}",
122123
f"-DENABLE_CUDA={enable_cuda}",
124+
f"-DENABLE_XPU={enable_xpu}",
123125
]
124126

125127
Path(self.build_temp).mkdir(parents=True, exist_ok=True)

src/torchcodec/decoders/_core/CMakeLists.txt

+17-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,15 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
55

66
find_package(Torch REQUIRED)
77
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
8+
if(ENABLE_CUDA)
9+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_CUDA")
10+
endif()
11+
if(ENABLE_XPU)
12+
find_package(PkgConfig REQUIRED)
13+
pkg_check_modules(L0 REQUIRED IMPORTED_TARGET level-zero)
14+
pkg_check_modules(LIBVA REQUIRED IMPORTED_TARGET libva)
15+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_XPU")
16+
endif()
817
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
918

1019
function(make_torchcodec_library library_name ffmpeg_target)
@@ -17,11 +26,13 @@ function(make_torchcodec_library library_name ffmpeg_target)
1726
VideoDecoderOps.h
1827
VideoDecoderOps.cpp
1928
DeviceInterface.h
29+
CPUOnlyDevice.cpp
2030
)
2131
if(ENABLE_CUDA)
2232
list(APPEND sources CudaDevice.cpp)
23-
else()
24-
list(APPEND sources CPUOnlyDevice.cpp)
33+
endif()
34+
if(ENABLE_XPU)
35+
list(APPEND sources XpuDevice.cpp)
2536
endif()
2637
add_library(${library_name} SHARED ${sources})
2738
set_property(TARGET ${library_name} PROPERTY CXX_STANDARD 17)
@@ -40,6 +51,10 @@ function(make_torchcodec_library library_name ffmpeg_target)
4051
list(APPEND NEEDED_LIBRARIES
4152
${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
4253
endif()
54+
if(ENABLE_XPU)
55+
list(APPEND NEEDED_LIBRARIES
56+
PkgConfig::L0 PkgConfig::LIBVA )
57+
endif()
4358
target_link_libraries(
4459
${library_name}
4560
PUBLIC

src/torchcodec/decoders/_core/CPUOnlyDevice.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace facebook::torchcodec {
1414
TORCH_CHECK(false, "Unsupported device: " + device.str());
1515
}
1616

17+
#ifndef ENABLE_CUDA
1718
void convertAVFrameToFrameOutputOnCuda(
1819
const torch::Device& device,
1920
[[maybe_unused]] const VideoDecoder::VideoStreamOptions& videoStreamOptions,
@@ -40,5 +41,35 @@ std::optional<const AVCodec*> findCudaCodec(
4041
[[maybe_unused]] const AVCodecID& codecId) {
4142
throwUnsupportedDeviceError(device);
4243
}
44+
#endif // ENABLE_CUDA
45+
46+
#ifndef ENABLE_XPU
47+
void convertAVFrameToFrameOutputOnXpu(
48+
const torch::Device& device,
49+
[[maybe_unused]] const VideoDecoder::VideoStreamOptions& videoStreamOptions,
50+
[[maybe_unused]] VideoDecoder::AVFrameStream& avFrameStream,
51+
[[maybe_unused]] VideoDecoder::FrameOutput& frameOutput,
52+
[[maybe_unused]] std::optional<torch::Tensor> preAllocatedOutputTensor) {
53+
throwUnsupportedDeviceError(device);
54+
}
55+
56+
void initializeContextOnXpu(
57+
const torch::Device& device,
58+
[[maybe_unused]] AVCodecContext* codecContext) {
59+
throwUnsupportedDeviceError(device);
60+
}
61+
62+
void releaseContextOnXpu(
63+
const torch::Device& device,
64+
[[maybe_unused]] AVCodecContext* codecContext) {
65+
throwUnsupportedDeviceError(device);
66+
}
67+
68+
std::optional<const AVCodec*> findXpuCodec(
69+
const torch::Device& device,
70+
[[maybe_unused]] const AVCodecID& codecId) {
71+
throwUnsupportedDeviceError(device);
72+
}
73+
#endif // ENABLE_XPU
4374

4475
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/DeviceInterface.h

+19
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,38 @@ void initializeContextOnCuda(
2929
const torch::Device& device,
3030
AVCodecContext* codecContext);
3131

32+
void initializeContextOnXpu(
33+
const torch::Device& device,
34+
AVCodecContext* codecContext);
35+
3236
void convertAVFrameToFrameOutputOnCuda(
3337
const torch::Device& device,
3438
const VideoDecoder::VideoStreamOptions& videoStreamOptions,
3539
VideoDecoder::AVFrameStream& avFrameStream,
3640
VideoDecoder::FrameOutput& frameOutput,
3741
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
3842

43+
void convertAVFrameToFrameOutputOnXpu(
44+
const torch::Device& device,
45+
const VideoDecoder::VideoStreamOptions& videoStreamOptions,
46+
VideoDecoder::AVFrameStream& avFrameStream,
47+
VideoDecoder::FrameOutput& frameOutput,
48+
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
49+
3950
void releaseContextOnCuda(
4051
const torch::Device& device,
4152
AVCodecContext* codecContext);
4253

54+
void releaseContextOnXpu(
55+
const torch::Device& device,
56+
AVCodecContext* codecContext);
57+
4358
std::optional<const AVCodec*> findCudaCodec(
4459
const torch::Device& device,
4560
const AVCodecID& codecId);
4661

62+
std::optional<const AVCodec*> findXpuCodec(
63+
const torch::Device& device,
64+
const AVCodecID& codecId);
65+
4766
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/VideoDecoder.cpp

+27-7
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ VideoDecoder::~VideoDecoder() {
118118
if (device.type() == torch::kCPU) {
119119
} else if (device.type() == torch::kCUDA) {
120120
releaseContextOnCuda(device, streamInfo.codecContext.get());
121+
} else if (device.type() == torch::kXPU) {
122+
releaseContextOnXpu(device, streamInfo.codecContext.get());
121123
} else {
122124
TORCH_CHECK(false, "Invalid device type: " + device.str());
123125
}
@@ -449,10 +451,16 @@ void VideoDecoder::addStream(
449451

450452
// TODO_CODE_QUALITY it's pretty meh to have a video-specific logic within
451453
// addStream() which is supposed to be generic
452-
if (mediaType == AVMEDIA_TYPE_VIDEO && device.type() == torch::kCUDA) {
453-
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
454-
findCudaCodec(device, streamInfo.stream->codecpar->codec_id)
455-
.value_or(avCodec));
454+
if (mediaType == AVMEDIA_TYPE_VIDEO) {
455+
if (device.type() == torch::kCUDA) {
456+
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
457+
findCudaCodec(device, streamInfo.stream->codecpar->codec_id)
458+
.value_or(avCodec));
459+
} else if (device.type() == torch::kXPU) {
460+
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
461+
findXpuCodec(device, streamInfo.stream->codecpar->codec_id)
462+
.value_or(avCodec));
463+
}
456464
}
457465

458466
AVCodecContext* codecContext = avcodec_alloc_context3(avCodec);
@@ -466,8 +474,12 @@ void VideoDecoder::addStream(
466474
streamInfo.codecContext->thread_count = ffmpegThreadCount.value_or(0);
467475

468476
// TODO_CODE_QUALITY same as above.
469-
if (mediaType == AVMEDIA_TYPE_VIDEO && device.type() == torch::kCUDA) {
470-
initializeContextOnCuda(device, codecContext);
477+
if (mediaType == AVMEDIA_TYPE_VIDEO) {
478+
if (device.type() == torch::kCUDA) {
479+
initializeContextOnCuda(device, codecContext);
480+
} else if (device.type() == torch::kXPU) {
481+
initializeContextOnXpu(device, codecContext);
482+
}
471483
}
472484

473485
retVal = avcodec_open2(streamInfo.codecContext.get(), avCodec, nullptr);
@@ -495,7 +507,8 @@ void VideoDecoder::addVideoStream(
495507
const VideoStreamOptions& videoStreamOptions) {
496508
TORCH_CHECK(
497509
videoStreamOptions.device.type() == torch::kCPU ||
498-
videoStreamOptions.device.type() == torch::kCUDA,
510+
videoStreamOptions.device.type() == torch::kCUDA ||
511+
videoStreamOptions.device.type() == torch::kXPU,
499512
"Invalid device type: " + videoStreamOptions.device.str());
500513

501514
addStream(
@@ -1165,6 +1178,13 @@ VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
11651178
avFrameStream,
11661179
frameOutput,
11671180
preAllocatedOutputTensor);
1181+
} else if (streamInfo.videoStreamOptions.device.type() == torch::kXPU) {
1182+
convertAVFrameToFrameOutputOnXpu(
1183+
streamInfo.videoStreamOptions.device,
1184+
streamInfo.videoStreamOptions,
1185+
avFrameStream,
1186+
frameOutput,
1187+
preAllocatedOutputTensor);
11681188
} else {
11691189
TORCH_CHECK(
11701190
false,

src/torchcodec/decoders/_core/VideoDecoderOps.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,13 @@ void _add_video_stream(
207207
} else if (device.value().rfind("cuda", 0) == 0) { // starts with "cuda"
208208
std::string deviceStr(device.value());
209209
videoStreamOptions.device = torch::Device(deviceStr);
210+
} else if (device.value().rfind("xpu", 0) == 0) { // starts with "xpu"
211+
std::string deviceStr(device.value());
212+
videoStreamOptions.device = torch::Device(deviceStr);
210213
} else {
211214
throw std::runtime_error(
212215
"Invalid device=" + std::string(device.value()) +
213-
". device must be either cpu or cuda.");
216+
". device must be either cpu, cuda or xpu.");
214217
}
215218
}
216219

0 commit comments

Comments
 (0)