Qrack  9.0
General classical-emulating-quantum development framework
oclengine.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "oclapi.hpp"
16 
17 #if !ENABLE_OPENCL
18 #error OpenCL has not been enabled
19 #endif
20 
21 #if defined(_WIN32) && !defined(__CYGWIN__)
22 #include <direct.h>
23 #endif
24 
25 #include <cstdint>
26 #include <map>
27 #include <memory>
28 #include <mutex>
29 #include <string>
30 #include <sys/stat.h>
31 
32 #if defined(__APPLE__)
33 #define CL_SILENCE_DEPRECATION
34 #include <OpenCL/cl.hpp>
35 #elif defined(_WIN32) || ENABLE_SNUCL
36 #include <CL/cl.hpp>
37 #elif defined(OPENCL_V3)
38 #include <CL/opencl.hpp>
39 #else
40 #include <CL/cl2.hpp>
41 #endif
42 
43 namespace Qrack {
44 
45 class OCLDeviceCall;
46 
47 class OCLDeviceContext;
48 
49 typedef std::shared_ptr<OCLDeviceContext> DeviceContextPtr;
50 typedef std::vector<cl::Event> EventVec;
51 typedef std::shared_ptr<EventVec> EventVecPtr;
52 
55  std::string kernelname;
56 
57  OCLKernelHandle(OCLAPI o, std::string kn)
58  : oclapi(o)
59  , kernelname(kn)
60  {
61  }
62 };
63 
65 protected:
66  std::lock_guard<std::mutex> guard;
67 
68 public:
69  // A cl::Kernel is unique object which should always be taken by reference, or the OCLDeviceContext will lose
70  // ownership.
71  cl::Kernel& call;
73 
74 protected:
75  OCLDeviceCall(std::mutex& m, cl::Kernel& c)
76  : guard(m)
77  , call(c)
78  {
79  }
80 
81  friend class OCLDeviceContext;
82 
83 private:
85 };
86 
88 public:
89  const cl::Platform platform;
90  const cl::Device device;
91  const cl::Context context;
92  const int64_t context_id;
93  const int64_t device_id;
94  const bool is_gpu;
95  const bool is_cpu;
96  const bool use_host_mem;
97  cl::CommandQueue queue;
99 
100 protected:
101  std::mutex waitEventsMutex;
102  std::map<OCLAPI, cl::Kernel> calls;
103  std::map<OCLAPI, std::unique_ptr<std::mutex>> mutexes;
104 
105 private:
106  const size_t procElemCount = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
107  const size_t maxWorkItems = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
108  const size_t maxWorkGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
109  const size_t maxAlloc = device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>();
110  const size_t globalSize = device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
111  const size_t localSize = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
112  size_t globalLimit;
115 
116 public:
117  OCLDeviceContext(cl::Platform& p, cl::Device& d, cl::Context& c, int64_t dev_id, int64_t cntxt_id, int64_t maxAlloc,
118  bool isGpu, bool isCpu, bool useHostMem)
119  : platform(p)
120  , device(d)
121  , context(c)
122  , context_id(cntxt_id)
123  , device_id(dev_id)
124  , is_gpu(isGpu)
125  , is_cpu(isCpu)
126  , use_host_mem(useHostMem)
127  , wait_events(new EventVec())
128 #if ENABLE_OCL_MEM_GUARDS
129  , globalLimit((maxAlloc >= 0) ? maxAlloc : ((3U * globalSize) >> 2U))
130 #else
131  , globalLimit((maxAlloc >= 0) ? maxAlloc : -1)
132 #endif
135  {
136  cl_int error;
137 #if defined(_WIN32)
138  cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
139  queue = cl::CommandQueue(c, d, &props, &error);
140 #else
141  queue = cl::CommandQueue(c, d, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
142 #endif
143  if (error != CL_SUCCESS) {
144  queue = cl::CommandQueue(c, d, 0, &error);
145  if (error != CL_SUCCESS) {
146  throw std::runtime_error("Failed to create OpenCL command queue!");
147  }
148  }
149  }
150 
151  OCLDeviceCall Reserve(OCLAPI call) { return OCLDeviceCall(*(mutexes[call]), calls[call]); }
152 
154  {
155  std::lock_guard<std::mutex> guard(waitEventsMutex);
156  EventVecPtr waitVec = std::move(wait_events);
158  return waitVec;
159  }
160 
161  template <typename Fn> void EmplaceEvent(Fn fn)
162  {
163  std::lock_guard<std::mutex> guard(waitEventsMutex);
164  wait_events->emplace_back();
165  fn(wait_events->back());
166  }
167 
169  {
170  std::lock_guard<std::mutex> guard(waitEventsMutex);
171  if ((wait_events.get())->size()) {
172  cl::Event::waitForEvents((const EventVec&)*(wait_events.get()));
173  wait_events->clear();
174  }
175  }
176 
178  {
179  return preferredSizeMultiple
182  calls[OCL_API_APPLY2X2_NORM_SINGLE].getWorkGroupInfo<CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE>(
183  device);
184  }
185 
187  {
188  if (preferredConcurrency) {
189  return preferredConcurrency;
190  }
191 
192  int hybridOffset = 3U;
193 #if ENABLE_ENV_VARS
194  if (getenv("QRACK_GPU_OFFSET_QB")) {
195  hybridOffset = std::stoi(std::string(getenv("QRACK_GPU_OFFSET_QB")));
196  }
197 #endif
198 
199  const size_t pc = procElemCount * GetPreferredSizeMultiple();
201  while (preferredConcurrency < pc) {
202  preferredConcurrency <<= 1U;
203  }
205  hybridOffset > 0 ? (preferredConcurrency << hybridOffset) : (preferredConcurrency >> -hybridOffset);
206  if (preferredConcurrency < 1U) {
208  }
209 
210  return preferredConcurrency;
211  }
212 
213  size_t GetProcElementCount() { return procElemCount; }
214  size_t GetMaxWorkItems() { return maxWorkItems; }
216  size_t GetMaxAlloc() { return maxAlloc; }
217  size_t GetGlobalSize() { return globalSize; }
218  size_t GetLocalSize() { return localSize; }
219  size_t GetGlobalAllocLimit() { return globalLimit; }
220 
221  friend class OCLEngine;
222 };
223 
225  std::vector<DeviceContextPtr> all_dev_contexts;
227 
229  : all_dev_contexts()
230  , default_dev_context(NULL)
231  {
232  // Intentionally left blank
233  }
234 
235  InitOClResult(std::vector<DeviceContextPtr> adc, DeviceContextPtr ddc)
236  : all_dev_contexts(adc)
237  , default_dev_context(ddc)
238  {
239  // Intentionally left blank
240  }
241 };
242 
244 class OCLEngine {
245 public:
246  // See https://stackoverflow.com/questions/1008019/c-singleton-design-pattern
248  static OCLEngine& Instance()
249  {
250  static OCLEngine instance;
251  return instance;
252  }
254  static std::string GetDefaultBinaryPath()
255  {
256 #if ENABLE_ENV_VARS
257  if (getenv("QRACK_OCL_PATH")) {
258  std::string toRet = std::string(getenv("QRACK_OCL_PATH"));
259  if ((toRet.back() != '/') && (toRet.back() != '\\')) {
260 #if defined(_WIN32) && !defined(__CYGWIN__)
261  toRet += "\\";
262 #else
263  toRet += "/";
264 #endif
265  }
266  return toRet;
267  }
268 #endif
269 #if defined(_WIN32) && !defined(__CYGWIN__)
270  return std::string(getenv("HOMEDRIVE") ? getenv("HOMEDRIVE") : "") +
271  std::string(getenv("HOMEPATH") ? getenv("HOMEPATH") : "") + "\\.qrack\\";
272 #else
273  return std::string(getenv("HOME") ? getenv("HOME") : "") + "/.qrack/";
274 #endif
275  }
279  static InitOClResult InitOCL(bool buildFromSource = false, bool saveBinaries = false, std::string home = "*",
280  std::vector<int64_t> maxAllocVec = { -1 });
281 
283  DeviceContextPtr GetDeviceContextPtr(const int64_t& dev = -1);
285  std::vector<DeviceContextPtr> GetDeviceContextPtrVector();
290  void SetDeviceContextPtrVector(std::vector<DeviceContextPtr> vec, DeviceContextPtr dcp = nullptr);
292  int GetDeviceCount() { return all_device_contexts.size(); }
294  size_t GetDefaultDeviceID() { return default_device_context->device_id; }
297 
298  size_t GetActiveAllocSize(const int64_t& dev)
299  {
300  if (dev > ((int64_t)activeAllocSizes.size())) {
301  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
302  }
303  return (dev < 0) ? activeAllocSizes[GetDefaultDeviceID()] : activeAllocSizes[(size_t)dev];
304  }
305  size_t AddToActiveAllocSize(const int64_t& dev, size_t size)
306  {
307  if (dev > ((int64_t)activeAllocSizes.size())) {
308  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
309  }
310 
311  size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
312 
313  if (size == 0) {
314  return activeAllocSizes[lDev];
315  }
316 
317  std::lock_guard<std::mutex> lock(allocMutex);
318  activeAllocSizes[lDev] += size;
319 
320  return activeAllocSizes[lDev];
321  }
322  size_t SubtractFromActiveAllocSize(const int64_t& dev, size_t size)
323  {
324  if (dev > ((int64_t)activeAllocSizes.size())) {
325  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
326  }
327 
328  size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
329 
330  if (size == 0) {
331  return activeAllocSizes[lDev];
332  }
333 
334  std::lock_guard<std::mutex> lock(allocMutex);
335  if (size < activeAllocSizes[lDev]) {
336  activeAllocSizes[lDev] -= size;
337  } else {
338  activeAllocSizes[lDev] = 0;
339  }
340  return activeAllocSizes[lDev];
341  }
342  void ResetActiveAllocSize(const int64_t& dev)
343  {
344  if (dev > ((int64_t)activeAllocSizes.size())) {
345  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
346  }
347  size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
348  std::lock_guard<std::mutex> lock(allocMutex);
349  // User code should catch std::bad_alloc and reset:
350  activeAllocSizes[lDev] = 0;
351  }
352 
353  OCLEngine(OCLEngine const&) = delete;
354  void operator=(OCLEngine const&) = delete;
355 
356 private:
357  static const std::vector<OCLKernelHandle> kernelHandles;
358  static const std::string binary_file_prefix;
359  static const std::string binary_file_ext;
360 
361  std::vector<size_t> activeAllocSizes;
362  std::vector<int64_t> maxActiveAllocSizes;
363  std::mutex allocMutex;
364  std::vector<DeviceContextPtr> all_device_contexts;
366 
367  OCLEngine(); // Private so that it can not be called
368 
370  static cl::Program MakeProgram(bool buildFromSource, std::string path, std::shared_ptr<OCLDeviceContext> devCntxt);
372  static void SaveBinary(cl::Program program, std::string path, std::string fileName);
373 };
374 
375 } // namespace Qrack
Definition: oclengine.hpp:64
std::lock_guard< std::mutex > guard
Definition: oclengine.hpp:66
OCLDeviceCall & operator=(const OCLDeviceCall &)=delete
cl::Kernel & call
Definition: oclengine.hpp:71
OCLDeviceCall(std::mutex &m, cl::Kernel &c)
Definition: oclengine.hpp:75
OCLDeviceCall(const OCLDeviceCall &)
Definition: oclengine.hpp:87
OCLDeviceContext(cl::Platform &p, cl::Device &d, cl::Context &c, int64_t dev_id, int64_t cntxt_id, int64_t maxAlloc, bool isGpu, bool isCpu, bool useHostMem)
Definition: oclengine.hpp:117
const cl::Device device
Definition: oclengine.hpp:90
const size_t globalSize
Definition: oclengine.hpp:110
size_t globalLimit
Definition: oclengine.hpp:112
size_t GetPreferredSizeMultiple()
Definition: oclengine.hpp:177
const size_t localSize
Definition: oclengine.hpp:111
std::mutex waitEventsMutex
Definition: oclengine.hpp:101
const bool use_host_mem
Definition: oclengine.hpp:96
const int64_t context_id
Definition: oclengine.hpp:92
std::map< OCLAPI, cl::Kernel > calls
Definition: oclengine.hpp:102
void EmplaceEvent(Fn fn)
Definition: oclengine.hpp:161
size_t GetGlobalSize()
Definition: oclengine.hpp:217
size_t GetMaxAlloc()
Definition: oclengine.hpp:216
std::map< OCLAPI, std::unique_ptr< std::mutex > > mutexes
Definition: oclengine.hpp:103
const bool is_cpu
Definition: oclengine.hpp:95
OCLDeviceCall Reserve(OCLAPI call)
Definition: oclengine.hpp:151
size_t GetMaxWorkGroupSize()
Definition: oclengine.hpp:215
void WaitOnAllEvents()
Definition: oclengine.hpp:168
size_t GetLocalSize()
Definition: oclengine.hpp:218
const int64_t device_id
Definition: oclengine.hpp:93
const size_t maxWorkGroupSize
Definition: oclengine.hpp:108
cl::CommandQueue queue
Definition: oclengine.hpp:97
size_t preferredConcurrency
Definition: oclengine.hpp:114
size_t GetMaxWorkItems()
Definition: oclengine.hpp:214
const size_t procElemCount
Definition: oclengine.hpp:106
EventVecPtr ResetWaitEvents()
Definition: oclengine.hpp:153
const cl::Context context
Definition: oclengine.hpp:91
size_t GetPreferredConcurrency()
Definition: oclengine.hpp:186
size_t preferredSizeMultiple
Definition: oclengine.hpp:113
const bool is_gpu
Definition: oclengine.hpp:94
size_t GetGlobalAllocLimit()
Definition: oclengine.hpp:219
size_t GetProcElementCount()
Definition: oclengine.hpp:213
EventVecPtr wait_events
Definition: oclengine.hpp:98
const size_t maxWorkItems
Definition: oclengine.hpp:107
const cl::Platform platform
Definition: oclengine.hpp:89
const size_t maxAlloc
Definition: oclengine.hpp:109
"Qrack::OCLEngine" manages the single OpenCL context.
Definition: oclengine.hpp:244
size_t AddToActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:305
static const std::string binary_file_ext
Definition: oclengine.hpp:359
std::vector< int64_t > maxActiveAllocSizes
Definition: oclengine.hpp:362
int GetDeviceCount()
Get the count of devices in the current list.
Definition: oclengine.hpp:292
std::vector< DeviceContextPtr > all_device_contexts
Definition: oclengine.hpp:364
OCLEngine(OCLEngine const &)=delete
std::vector< size_t > activeAllocSizes
Definition: oclengine.hpp:361
void ResetActiveAllocSize(const int64_t &dev)
Definition: oclengine.hpp:342
void operator=(OCLEngine const &)=delete
static const std::vector< OCLKernelHandle > kernelHandles
Definition: oclengine.hpp:357
std::vector< DeviceContextPtr > GetDeviceContextPtrVector()
Get the list of all available devices (and their supporting objects).
Definition: oclengine.cpp:150
size_t SubtractFromActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:322
OCLEngine()
Definition: oclengine.cpp:442
static const std::string binary_file_prefix
Definition: oclengine.hpp:358
std::mutex allocMutex
Definition: oclengine.hpp:363
static OCLEngine & Instance()
Get a pointer to the Instance of the singleton. (The instance will be instantiated,...
Definition: oclengine.hpp:248
static void SaveBinary(cl::Program program, std::string path, std::string fileName)
Save the program binary:
Definition: oclengine.cpp:240
static InitOClResult InitOCL(bool buildFromSource=false, bool saveBinaries=false, std::string home="*", std::vector< int64_t > maxAllocVec={ -1 })
Initialize the OCL environment, with the option to save the generated binaries.
Definition: oclengine.cpp:278
void SetDefaultDeviceContext(DeviceContextPtr dcp)
Pick a default device, for QEngineOCL instances that don't specify a preferred device.
Definition: oclengine.cpp:159
DeviceContextPtr default_device_context
Definition: oclengine.hpp:365
size_t GetActiveAllocSize(const int64_t &dev)
Definition: oclengine.hpp:298
static cl::Program MakeProgram(bool buildFromSource, std::string path, std::shared_ptr< OCLDeviceContext > devCntxt)
Make the program, from either source or binary.
Definition: oclengine.cpp:161
static std::string GetDefaultBinaryPath()
Get default location for precompiled binaries:
Definition: oclengine.hpp:254
size_t GetDefaultDeviceID()
Get default device ID.
Definition: oclengine.hpp:294
DeviceContextPtr GetDeviceContextPtr(const int64_t &dev=-1)
Get a pointer one of the available OpenCL contexts, by its index in the list of all contexts.
Definition: oclengine.cpp:54
void SetDeviceContextPtrVector(std::vector< DeviceContextPtr > vec, DeviceContextPtr dcp=nullptr)
Set the list of DeviceContextPtr object available for use.
Definition: oclengine.cpp:151
Definition: complex16x2simd.hpp:25
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
OCLAPI
Definition: oclapi.hpp:19
@ OCL_API_APPLY2X2_NORM_SINGLE
Definition: oclapi.hpp:23
std::vector< cl::Event > EventVec
Definition: oclengine.hpp:50
MICROSOFT_QUANTUM_DECL void U(_In_ uintq sid, _In_ uintq q, _In_ double theta, _In_ double phi, _In_ double lambda)
(External API) 3-parameter unitary gate
Definition: pinvoke_api.cpp:1362
Definition: oclengine.hpp:224
std::vector< DeviceContextPtr > all_dev_contexts
Definition: oclengine.hpp:225
InitOClResult()
Definition: oclengine.hpp:228
InitOClResult(std::vector< DeviceContextPtr > adc, DeviceContextPtr ddc)
Definition: oclengine.hpp:235
DeviceContextPtr default_dev_context
Definition: oclengine.hpp:226
Definition: oclengine.hpp:53
std::string kernelname
Definition: oclengine.hpp:55
OCLAPI oclapi
Definition: oclengine.hpp:54
OCLKernelHandle(OCLAPI o, std::string kn)
Definition: oclengine.hpp:57