10
10
11
11
#include " program.hpp"
12
12
13
+ #ifdef SYCL_ENABLE_KERNEL_FUSION
14
+ #include < amd_comgr/amd_comgr.h>
15
+ namespace {
16
+ template <typename ReleaseType, ReleaseType Release, typename T>
17
+ struct COMgrObjCleanUp {
18
+ COMgrObjCleanUp (T Obj) : Obj{Obj} {}
19
+ ~COMgrObjCleanUp () { Release (Obj); }
20
+ T Obj;
21
+ };
22
+
23
+ using COMgrDataTCleanUp =
24
+ COMgrObjCleanUp<decltype(&amd_comgr_release_data), &amd_comgr_release_data,
25
+ amd_comgr_data_t >;
26
+ using COMgrDataSetTCleanUp =
27
+ COMgrObjCleanUp<decltype(&amd_comgr_destroy_data_set),
28
+ &amd_comgr_destroy_data_set, amd_comgr_data_set_t >;
29
+ using COMgrActionInfoCleanUp =
30
+ COMgrObjCleanUp<decltype(&amd_comgr_destroy_action_info),
31
+ &amd_comgr_destroy_action_info, amd_comgr_action_info_t >;
32
+
33
+ void getCoMgrBuildLog (const amd_comgr_data_set_t BuildDataSet, char *BuildLog,
34
+ size_t MaxLogSize) {
35
+ size_t count = 0 ;
36
+ amd_comgr_status_t status = amd_comgr_action_data_count (
37
+ BuildDataSet, AMD_COMGR_DATA_KIND_LOG, &count);
38
+
39
+ if (status != AMD_COMGR_STATUS_SUCCESS || count == 0 ) {
40
+ std::strcpy (BuildLog, " extracting build log failed (no log)." );
41
+ return ;
42
+ }
43
+
44
+ amd_comgr_data_t LogBinaryData;
45
+
46
+ if (amd_comgr_action_data_get_data (BuildDataSet, AMD_COMGR_DATA_KIND_LOG, 0 ,
47
+ &LogBinaryData) !=
48
+ AMD_COMGR_STATUS_SUCCESS) {
49
+ std::strcpy (BuildLog, " extracting build log failed (no data)." );
50
+ return ;
51
+ }
52
+ COMgrDataTCleanUp LogDataCleanup{LogBinaryData};
53
+
54
+ size_t binarySize = 0 ;
55
+ if (amd_comgr_get_data (LogBinaryData, &binarySize, NULL ) !=
56
+ AMD_COMGR_STATUS_SUCCESS) {
57
+ std::strcpy (BuildLog, " extracting build log failed (no log size)." );
58
+ return ;
59
+ }
60
+
61
+ if (binarySize == 0 ) {
62
+ std::strcpy (BuildLog, " no log." );
63
+ return ;
64
+ }
65
+
66
+ size_t bufSize = binarySize < MaxLogSize ? binarySize : MaxLogSize;
67
+
68
+ if (amd_comgr_get_data (LogBinaryData, &bufSize, BuildLog) !=
69
+ AMD_COMGR_STATUS_SUCCESS) {
70
+ std::strcpy (BuildLog, " extracting build log failed (cannot copy log)." );
71
+ return ;
72
+ }
73
+ }
74
+ } // namespace
75
+ #endif
76
+
13
77
ur_program_handle_t_::ur_program_handle_t_ (ur_context_handle_t Ctxt)
14
78
: Module{nullptr }, Binary{}, BinarySizeInBytes{0 }, RefCount{1 }, Context{
15
79
Ctxt} {
@@ -18,6 +82,22 @@ ur_program_handle_t_::ur_program_handle_t_(ur_context_handle_t Ctxt)
18
82
19
83
ur_program_handle_t_::~ur_program_handle_t_ () { urContextRelease (Context); }
20
84
85
+ ur_result_t
86
+ ur_program_handle_t_::setMetadata (const ur_program_metadata_t *Metadata,
87
+ size_t Length) {
88
+ for (size_t i = 0 ; i < Length; ++i) {
89
+ const ur_program_metadata_t MetadataElement = Metadata[i];
90
+ std::string MetadataElementName{MetadataElement.pName };
91
+
92
+ if (MetadataElementName ==
93
+ __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION) {
94
+ assert (MetadataElement.type == UR_PROGRAM_METADATA_TYPE_UINT32);
95
+ IsRelocatable = MetadataElement.value .data32 ;
96
+ }
97
+ }
98
+ return UR_RESULT_SUCCESS;
99
+ }
100
+
21
101
ur_result_t ur_program_handle_t_::setBinary (const char *Source, size_t Length) {
22
102
// Do not re-set program binary data which has already been set as that will
23
103
// delete the old binary data.
@@ -28,7 +108,80 @@ ur_result_t ur_program_handle_t_::setBinary(const char *Source, size_t Length) {
28
108
return UR_RESULT_SUCCESS;
29
109
}
30
110
111
+ ur_result_t ur_program_handle_t_::finalizeRelocatable () {
112
+ #ifndef SYCL_ENABLE_KERNEL_FUSION
113
+ assert (false && " Relocation only available with fusion" );
114
+ return UR_RESULT_ERROR_UNKNOWN;
115
+ #else
116
+ assert (IsRelocatable && " Not a relocatable input" );
117
+ amd_comgr_data_t ComgrData;
118
+ amd_comgr_data_set_t RelocatableData;
119
+ UR_CHECK_ERROR (amd_comgr_create_data_set (&RelocatableData));
120
+ COMgrDataSetTCleanUp RelocatableDataCleanup{RelocatableData};
121
+
122
+ UR_CHECK_ERROR (
123
+ amd_comgr_create_data (AMD_COMGR_DATA_KIND_RELOCATABLE, &ComgrData));
124
+ // RAII for auto clean-up
125
+ COMgrDataTCleanUp DataCleanup{ComgrData};
126
+ UR_CHECK_ERROR (amd_comgr_set_data (ComgrData, BinarySizeInBytes, Binary));
127
+ UR_CHECK_ERROR (amd_comgr_set_data_name (ComgrData, " jit_obj.o" ));
128
+
129
+ UR_CHECK_ERROR (amd_comgr_data_set_add (RelocatableData, ComgrData));
130
+
131
+ amd_comgr_action_info_t Action;
132
+
133
+ UR_CHECK_ERROR (amd_comgr_create_action_info (&Action));
134
+ COMgrActionInfoCleanUp ActionCleanUp{Action};
135
+
136
+ std::string ISA = " amdgcn-amd-amdhsa--" ;
137
+ hipDeviceProp_t Props;
138
+ detail::ur::assertion (hipGetDeviceProperties (
139
+ &Props, Context->getDevice ()->get ()) == hipSuccess);
140
+ ISA += Props.gcnArchName ;
141
+ UR_CHECK_ERROR (amd_comgr_action_info_set_isa_name (Action, ISA.data ()));
142
+
143
+ UR_CHECK_ERROR (amd_comgr_action_info_set_logging (Action, true ));
144
+
145
+ amd_comgr_data_set_t Output;
146
+ UR_CHECK_ERROR (amd_comgr_create_data_set (&Output));
147
+ COMgrDataSetTCleanUp OutputDataCleanup{Output};
148
+
149
+ if (amd_comgr_do_action (AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
150
+ Action, RelocatableData,
151
+ Output) != AMD_COMGR_STATUS_SUCCESS) {
152
+ getCoMgrBuildLog (Output, ErrorLog, MAX_LOG_SIZE);
153
+ return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE;
154
+ }
155
+ amd_comgr_data_t binaryData;
156
+
157
+ UR_CHECK_ERROR (amd_comgr_action_data_get_data (
158
+ Output, AMD_COMGR_DATA_KIND_EXECUTABLE, 0 , &binaryData));
159
+ {
160
+ COMgrDataTCleanUp binaryDataCleanUp{binaryData};
161
+
162
+ size_t binarySize = 0 ;
163
+ UR_CHECK_ERROR (amd_comgr_get_data (binaryData, &binarySize, NULL ));
164
+
165
+ ExecutableCache.resize (binarySize);
166
+
167
+ UR_CHECK_ERROR (
168
+ amd_comgr_get_data (binaryData, &binarySize, ExecutableCache.data ()));
169
+ }
170
+ Binary = ExecutableCache.data ();
171
+ BinarySizeInBytes = ExecutableCache.size ();
172
+ return UR_RESULT_SUCCESS;
173
+ #endif
174
+ }
175
+
31
176
ur_result_t ur_program_handle_t_::buildProgram (const char *BuildOptions) {
177
+ if (IsRelocatable) {
178
+ if (finalizeRelocatable () != UR_RESULT_SUCCESS) {
179
+ BuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR;
180
+ return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE;
181
+ }
182
+ IsRelocatable = false ;
183
+ }
184
+
32
185
if (BuildOptions) {
33
186
this ->BuildOptions = BuildOptions;
34
187
}
@@ -246,7 +399,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
246
399
// / Note: Only supports one device
247
400
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary (
248
401
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
249
- const uint8_t *pBinary, const ur_program_properties_t *,
402
+ const uint8_t *pBinary, const ur_program_properties_t *pProperties ,
250
403
ur_program_handle_t *phProgram) {
251
404
UR_ASSERT (pBinary != nullptr && size != 0 , UR_RESULT_ERROR_INVALID_BINARY);
252
405
UR_ASSERT (hContext->getDevice ()->get () == hDevice->get (),
@@ -259,6 +412,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
259
412
260
413
// TODO: Set metadata here and use reqd_work_group_size information.
261
414
// See urProgramCreateWithBinary in CUDA adapter.
415
+ if (pProperties) {
416
+ if (pProperties->count > 0 && pProperties->pMetadatas == nullptr ) {
417
+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
418
+ } else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr ) {
419
+ return UR_RESULT_ERROR_INVALID_SIZE;
420
+ }
421
+ Result =
422
+ RetProgram->setMetadata (pProperties->pMetadatas , pProperties->count );
423
+ }
424
+ UR_ASSERT (Result == UR_RESULT_SUCCESS, Result);
262
425
263
426
auto pBinary_string = reinterpret_cast <const char *>(pBinary);
264
427
if (size == 0 ) {
0 commit comments