Qrack  10.0
General classical-emulating-quantum development framework
qengine_opencl.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "common/oclengine.hpp"
16 #include "qengine.hpp"
17 #include "qengine_gpu_util.hpp"
18 
19 #if !ENABLE_OPENCL
20 #error OpenCL has not been enabled
21 #endif
22 
23 #include <list>
24 #include <mutex>
25 
26 #define BCI_ARG_LEN 10
27 #define CMPLX_NORM_LEN 6
28 #define REAL_ARG_LEN 2
29 
30 namespace Qrack {
31 
32 typedef std::shared_ptr<cl::Buffer> BufferPtr;
33 
34 class QEngineOCL;
35 typedef std::shared_ptr<QEngineOCL> QEngineOCLPtr;
36 
37 struct QueueItem {
39  size_t workItemCount;
40  size_t localGroupSize;
41  size_t deallocSize;
42  std::vector<BufferPtr> buffers;
43  size_t localBuffSize;
44  bool isSetDoNorm;
45  bool isSetRunningNorm;
46  bool doNorm;
48 
50  : api_call()
51  , workItemCount(0U)
52  , localGroupSize(0U)
53  , deallocSize(0U)
54  , buffers()
55  , localBuffSize(0U)
56  , isSetDoNorm(false)
57  , isSetRunningNorm(true)
58  , doNorm(false)
60  {
61  }
62 
63  QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector<BufferPtr> b, size_t lbs)
64  : api_call(ac)
65  , workItemCount(wic)
66  , localGroupSize(lgs)
67  , deallocSize(ds)
68  , buffers(b)
69  , localBuffSize(lbs)
70  , isSetDoNorm(false)
71  , isSetRunningNorm(false)
72  , doNorm(false)
74  {
75  }
76 
77  QueueItem(bool doNrm)
78  : api_call()
79  , workItemCount(0U)
80  , localGroupSize(0U)
81  , deallocSize(0U)
82  , buffers()
83  , localBuffSize(0U)
84  , isSetDoNorm(true)
85  , isSetRunningNorm(false)
86  , doNorm(doNrm)
88  {
89  }
90 
91  QueueItem(real1_f runningNrm)
92  : api_call()
93  , workItemCount(0U)
94  , localGroupSize(0U)
95  , deallocSize(0U)
96  , buffers()
97  , localBuffSize(0U)
98  , isSetDoNorm(false)
99  , isSetRunningNorm(true)
100  , doNorm(false)
101  , runningNorm(runningNrm)
102  {
103  }
104 };
105 
106 class PoolItem {
107 public:
111 
112  std::shared_ptr<real1> probArray;
113  std::shared_ptr<real1> angleArray;
114 
115  PoolItem(cl::Context& context)
116  : probArray{ nullptr }
117  , angleArray{ nullptr }
118  {
119  cmplxBuffer = MakeBuffer(context, sizeof(complex) * CMPLX_NORM_LEN);
120  realBuffer = MakeBuffer(context, sizeof(real1) * REAL_ARG_LEN);
121  ulongBuffer = MakeBuffer(context, sizeof(bitCapIntOcl) * BCI_ARG_LEN);
122  }
123 
125 
126 protected:
127  BufferPtr MakeBuffer(const cl::Context& context, size_t size)
128  {
129  cl_int error;
130  BufferPtr toRet = std::make_shared<cl::Buffer>(context, CL_MEM_READ_ONLY, size, (void*)nullptr, &error);
131  if (error != CL_SUCCESS) {
132  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
133  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in PoolItem::MakeBuffer()");
134  }
135  if (error == CL_OUT_OF_HOST_MEMORY) {
136  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in PoolItem::MakeBuffer()");
137  }
138  if (error == CL_INVALID_BUFFER_SIZE) {
139  throw bad_alloc("CL_INVALID_BUFFER_SIZE in PoolItem::MakeBuffer()");
140  }
141  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
142  }
143 
144  return toRet;
145  }
146 };
147 
148 typedef std::shared_ptr<PoolItem> PoolItemPtr;
149 
168 class QEngineOCL : public QEngine {
169 protected:
170  bool didInit;
175  size_t nrmGroupSize;
177  int64_t deviceID;
180  std::shared_ptr<complex> stateVec;
181  std::mutex queue_mutex;
182  cl::CommandQueue queue;
183  cl::Context context;
184  // stateBuffer is allocated as a shared_ptr, because it's the only buffer that will be acted on outside of
185  // QEngineOCL itself, specifically by QEngineOCLMulti.
189  std::vector<EventVecPtr> wait_refs;
190  std::list<QueueItem> wait_queue_items;
191  std::vector<PoolItemPtr> poolItems;
192  std::unique_ptr<real1[], void (*)(real1*)> nrmArray;
193 
194 #if defined(__APPLE__)
195  real1* _aligned_nrm_array_alloc(bitCapIntOcl allocSize)
196  {
197  void* toRet;
198  posix_memalign(&toRet, QRACK_ALIGN_SIZE, allocSize);
199  return (real1*)toRet;
200  }
201 #endif
202 
204  {
205  if (callbackError == CL_SUCCESS) {
206  return;
207  }
208 
209  wait_queue_items.clear();
210  wait_refs.clear();
211 
212  throw std::runtime_error("Failed to enqueue kernel, error code: " + std::to_string(callbackError));
213  }
214 
215  // For std::function, cl_int use might discard int qualifiers.
216  void tryOcl(std::string message, std::function<int()> oclCall)
217  {
219 
220  if (oclCall() == CL_SUCCESS) {
221  // Success
222  return;
223  }
224 
225  // Soft finish (just for this QEngineOCL)
226  clFinish();
227 
228  if (oclCall() == CL_SUCCESS) {
229  // Success after clearing QEngineOCL queue
230  return;
231  }
232 
233  // Hard finish (for the unique OpenCL device)
234  clFinish(true);
235 
236  cl_int error = oclCall();
237  if (error == CL_SUCCESS) {
238  // Success after clearing all queues for the OpenCL device
239  return;
240  }
241 
242  wait_queue_items.clear();
243  wait_refs.clear();
244 
245  // We're fatally blocked. Throw to exit.
246  throw std::runtime_error(message + ", error code: " + std::to_string(error));
247  }
248 
249  using QEngine::Copy;
250  void Copy(QInterfacePtr orig) { Copy(std::dynamic_pointer_cast<QEngineOCL>(orig)); }
251  void Copy(QEngineOCLPtr orig)
252  {
253  QEngine::Copy(std::dynamic_pointer_cast<QEngine>(orig));
254  didInit = orig->didInit;
255  usingHostRam = orig->usingHostRam;
256  unlockHostMem = orig->unlockHostMem;
257  callbackError = orig->callbackError;
258  nrmGroupCount = orig->nrmGroupCount;
259  nrmGroupSize = orig->nrmGroupSize;
260  AddAlloc(orig->totalOclAllocSize);
261  deviceID = orig->deviceID;
262  lockSyncFlags = orig->lockSyncFlags;
263  permutationAmp = orig->permutationAmp;
264  stateVec = orig->stateVec;
265  // queue_mutex = orig->queue_mutex;
266  queue = orig->queue;
267  context = orig->context;
268  stateBuffer = orig->stateBuffer;
269  nrmBuffer = orig->nrmBuffer;
270  device_context = orig->device_context;
271  wait_refs = orig->wait_refs;
272  wait_queue_items = orig->wait_queue_items;
273  poolItems = orig->poolItems;
274  }
275 
276 public:
279  static const bitCapIntOcl OclMemDenom = 3U;
280 
299  QEngineOCL(bitLenInt qBitCount, const bitCapInt& initState, qrack_rand_gen_ptr rgp = nullptr,
300  const complex& phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
301  bool useHostMem = false, int64_t devID = -1, bool useHardwareRNG = true, bool ignored = false,
302  real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored2 = {}, bitLenInt ignored4 = 0U,
303  real1_f ignored3 = _qrack_qunit_sep_thresh);
304 
306  {
307  // Theoretically, all user output is blocking, so don't throw in destructor.
308  callbackError = CL_SUCCESS;
309  // Make sure we track device allocation.
310  FreeAll();
311  }
312 
313  virtual bool isOpenCL() { return true; }
314 
315  bool IsZeroAmplitude() { return !stateBuffer; }
317  {
318  if (!stateBuffer) {
319  return ZERO_R1_F;
320  }
321 
323  }
324 
325  void ZeroAmplitudes();
326  void CopyStateVec(QEnginePtr src);
327 
328  void GetAmplitudePage(complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
329  void SetAmplitudePage(const complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
330  void SetAmplitudePage(
331  QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length);
332  void ShuffleBuffers(QEnginePtr engine);
334 
335  bitCapIntOcl GetMaxSize() { return device_context->GetMaxAlloc() / sizeof(complex); };
336 
337  void SetPermutation(const bitCapInt& perm, const complex& phaseFac = CMPLX_DEFAULT_ARG);
338 
340  void UniformlyControlledSingleBit(const std::vector<bitLenInt>& controls, bitLenInt qubitIndex,
341  const complex* mtrxs, const std::vector<bitCapInt>& mtrxSkipPowers, const bitCapInt& mtrxSkipValueMask);
342  void UniformParityRZ(const bitCapInt& mask, real1_f angle);
343  void CUniformParityRZ(const std::vector<bitLenInt>& controls, const bitCapInt& mask, real1_f angle);
344 
345  /* Operations that have an improved implementation. */
346  using QEngine::X;
347  void X(bitLenInt target);
348  using QEngine::Z;
349  void Z(bitLenInt target);
350  using QEngine::Invert;
351  void Invert(const complex& topRight, const complex& bottomLeft, bitLenInt qubitIndex);
352  using QEngine::Phase;
353  void Phase(const complex& topLeft, const complex& bottomRight, bitLenInt qubitIndex);
354  void XMask(const bitCapInt& mask);
355  void PhaseParity(real1_f radians, const bitCapInt& mask);
356  void PhaseRootNMask(bitLenInt n, const bitCapInt& mask);
357 
358  using QEngine::Compose;
360  bitLenInt Compose(QInterfacePtr toCopy) { return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy)); }
361  bitLenInt Compose(QEngineOCLPtr toCopy, bitLenInt start);
363  {
364  return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy), start);
365  }
366  using QEngine::Decompose;
367  void Decompose(bitLenInt start, QInterfacePtr dest);
368  void Dispose(bitLenInt start, bitLenInt length);
369  void Dispose(bitLenInt start, bitLenInt length, const bitCapInt& disposedPerm);
370  using QEngine::Allocate;
371  bitLenInt Allocate(bitLenInt start, bitLenInt length);
372 
373  void ROL(bitLenInt shift, bitLenInt start, bitLenInt length);
374 
375 #if ENABLE_ALU
376  void INC(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
377  void CINC(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector<bitLenInt>& controls);
378  void INCS(const bitCapInt& toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex);
379 #if ENABLE_BCD
380  void INCBCD(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
381 #endif
382  void MUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
383  void DIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
384  void MULModNOut(
385  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
386  void IMULModNOut(
387  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
388  void POWModNOut(
389  const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
390  void CMUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
391  const std::vector<bitLenInt>& controls);
392  void CDIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
393  const std::vector<bitLenInt>& controls);
394  void CMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
395  bitLenInt length, const std::vector<bitLenInt>& controls);
396  void CIMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
397  bitLenInt length, const std::vector<bitLenInt>& controls);
398  void CPOWModNOut(const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
399  bitLenInt length, const std::vector<bitLenInt>& controls);
400  void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
401  void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
402 
403  bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
404  const unsigned char* values, bool resetValue = true);
405  bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
406  bitLenInt carryIndex, const unsigned char* values);
407  bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
408  bitLenInt carryIndex, const unsigned char* values);
409  void Hash(bitLenInt start, bitLenInt length, const unsigned char* values);
410 
411  void CPhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex);
412  void PhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length);
413 #endif
414 
415  real1_f Prob(bitLenInt qubit);
416  real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target);
417  real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt& permutation);
418  void ProbRegAll(bitLenInt start, bitLenInt length, real1* probsArray);
419  real1_f ProbMask(const bitCapInt& mask, const bitCapInt& permutation);
420  void ProbMaskAll(const bitCapInt& mask, real1* probsArray);
421  real1_f ProbParity(const bitCapInt& mask);
422  bool ForceMParity(const bitCapInt& mask, bool result, bool doForce = true);
423  real1_f ExpectationBitsAll(const std::vector<bitLenInt>& bits, const bitCapInt& offset = ZERO_BCI);
424 
425  void SetDevice(int64_t dID);
426  int64_t GetDevice() { return deviceID; }
427  void SetDeviceList(std::vector<int64_t> dIDs);
428  std::vector<int64_t> GetDeviceList() { return std::vector<int64_t>{ deviceID }; }
429 
430  void SetQuantumState(const complex* inputState);
431  void GetQuantumState(complex* outputState);
432  void GetProbs(real1* outputProbs);
434  bitCapInt MAll();
435  complex GetAmplitude(const bitCapInt& perm);
436  void SetAmplitude(const bitCapInt& perm, const complex& amp);
437 
438  real1_f SumSqrDiff(QInterfacePtr toCompare) { return SumSqrDiff(std::dynamic_pointer_cast<QEngineOCL>(toCompare)); }
439  real1_f SumSqrDiff(QEngineOCLPtr toCompare);
440 
441  void NormalizeState(
442  real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
443  void UpdateRunningNorm(real1_f norm_thresh = REAL1_DEFAULT_ARG);
444  void Finish() { clFinish(); };
445  bool isFinished() { return wait_queue_items.empty(); };
446 
449 
450  void PopQueue(bool isDispatch);
451  void DispatchQueue();
452 
453 protected:
454  void AddAlloc(size_t size)
455  {
456  size_t currentAlloc = OCLEngine::Instance().AddToActiveAllocSize(deviceID, size);
457  if (device_context && (currentAlloc > device_context->GetGlobalAllocLimit())) {
459  throw bad_alloc("VRAM limits exceeded in QEngineOCL::AddAlloc()");
460  }
461  totalOclAllocSize += size;
462  }
463  void SubtractAlloc(size_t size)
464  {
466  totalOclAllocSize -= size;
467  }
468 
469  BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void* host_ptr = nullptr)
470  {
472 
473  cl_int error;
474  BufferPtr toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
475  if (error == CL_SUCCESS) {
476  // Success
477  return toRet;
478  }
479 
480  // Soft finish (just for this QEngineOCL)
481  clFinish();
482 
483  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
484  if (error == CL_SUCCESS) {
485  // Success after clearing QEngineOCL queue
486  return toRet;
487  }
488 
489  // Hard finish (for the unique OpenCL device)
490  clFinish(true);
491 
492  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
493  if (error != CL_SUCCESS) {
494  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
495  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in QEngineOCL::MakeBuffer()");
496  }
497  if (error == CL_OUT_OF_HOST_MEMORY) {
498  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in QEngineOCL::MakeBuffer()");
499  }
500  if (error == CL_INVALID_BUFFER_SIZE) {
501  throw bad_alloc("CL_INVALID_BUFFER_SIZE in QEngineOCL::MakeBuffer()");
502  }
503  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
504  }
505 
506  return toRet;
507  }
508 
509  void SwitchHostPtr(bool useHostMem)
510  {
511  if (useHostMem == usingHostRam) {
512  return;
513  }
514 
515  std::shared_ptr<complex> copyVec = AllocStateVec(maxQPowerOcl, true);
516  GetQuantumState(copyVec.get());
517 
518  if (useHostMem) {
519  stateVec = copyVec;
521  } else {
522  stateVec = nullptr;
524  tryOcl("Failed to write buffer", [&] {
525  return queue.enqueueWriteBuffer(
526  *stateBuffer, CL_TRUE, 0U, sizeof(complex) * maxQPowerOcl, copyVec.get(), ResetWaitEvents().get());
527  });
528  wait_refs.clear();
529  copyVec = nullptr;
530  }
531 
532  usingHostRam = useHostMem;
533  }
534 
535  void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
536  size_t localBuffSize = 0U, size_t deallocSize = 0U)
537  {
538  if (localBuffSize > device_context->GetLocalSize()) {
539  throw bad_alloc("Local memory limits exceeded in QEngineOCL::QueueCall()");
540  }
541  AddQueueItem(QueueItem(api_call, workItemCount, localGroupSize, deallocSize, args, localBuffSize));
542  }
543 
544  void QueueSetDoNormalize(bool doNorm) { AddQueueItem(QueueItem(doNorm)); }
545  void QueueSetRunningNorm(real1_f runningNrm) { AddQueueItem(QueueItem(runningNrm)); }
546  void AddQueueItem(const QueueItem& item)
547  {
548  bool isBase;
549  // For lock_guard:
550  if (true) {
551  std::lock_guard<std::mutex> lock(queue_mutex);
553  isBase = wait_queue_items.empty();
554  wait_queue_items.push_back(item);
555  }
556 
557  if (isBase) {
558  DispatchQueue();
559  }
560  }
561 
562  real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength);
563 
564  std::shared_ptr<complex> AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc = false);
565  void FreeStateVec() { stateVec = nullptr; }
566  void FreeAll();
567  void ResetStateBuffer(BufferPtr nStateBuffer);
568  BufferPtr MakeStateVecBuffer(std::shared_ptr<complex> nStateVec);
569  void ReinitBuffer();
570 
571  void Compose(OCLAPI apiCall, const bitCapIntOcl* bciArgs, QEngineOCLPtr toCopy);
572 
573  void InitOCL(int64_t devID);
575 
576  real1_f ParSum(real1* toSum, bitCapIntOcl maxI);
577 
588  void LockSync(cl_map_flags flags = (CL_MAP_READ | CL_MAP_WRITE));
599  void UnlockSync();
600 
607  void clFinish(bool doHard = false);
608 
612  void clDump();
613 
614  size_t FixWorkItemCount(size_t maxI, size_t wic)
615  {
616  if (wic > maxI) {
617  // Guaranteed to be a power of two
618  return maxI;
619  }
620 
621  // Otherwise, clamp to a power of two
622  return pow2Ocl(log2Ocl(wic));
623  }
624 
625  size_t FixGroupSize(size_t wic, size_t gs)
626  {
627  if (gs > wic) {
628  return wic;
629  }
630 
631  return gs - (wic % gs);
632  }
633 
634  void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest);
635 
636  using QEngine::Apply2x2;
637  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
638  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, real1_f norm_thresh = REAL1_DEFAULT_ARG)
639  {
640  Apply2x2(offset1, offset2, mtrx, bitCount, qPowersSorted, doCalcNorm, SPECIAL_2X2::NONE, norm_thresh);
641  }
642  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
643  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special,
644  real1_f norm_thresh = REAL1_DEFAULT_ARG);
645 
646  void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = (real1_f)PI_R1);
647 
648  void ApplyM(const bitCapInt& mask, bool result, const complex& nrm);
649  void ApplyM(const bitCapInt& mask, const bitCapInt& result, const complex& nrm);
650 
651  /* Utility functions used by the operations above. */
652  void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
653  size_t localBuffSize = 0U);
654  EventVecPtr ResetWaitEvents(bool waitQueue = true);
655  void ApplyMx(OCLAPI api_call, const bitCapIntOcl* bciArgs, const complex& nrm);
656  real1_f Probx(OCLAPI api_call, const bitCapIntOcl* bciArgs);
657 
658  void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN],
659  const unsigned char* values = nullptr, bitCapIntOcl valuesLength = 0U);
660  void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], bitCapIntOcl* controlPowers,
661  bitLenInt controlLen, const unsigned char* values = nullptr, bitCapIntOcl valuesLength = 0U);
662  void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length);
663 
664 #if ENABLE_ALU
665  void INCDECC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
666  void INCDECSC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
667  void INCDECSC(
668  const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex);
669 #if ENABLE_BCD
670  void INCDECBCDC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
671 #endif
672 
673  void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
674  void CINT(
675  OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector<bitLenInt>& controls);
676  void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
677  void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex);
678  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
679  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex,
680  bitLenInt carryIndex);
681 #if ENABLE_BCD
682  void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
683  void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
684 #endif
685  void xMULx(OCLAPI api_call, const bitCapIntOcl* bciArgs, BufferPtr controlBuffer);
686  void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
687  void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
688  bitLenInt length);
689  void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
690  const std::vector<bitLenInt>& controls);
691  void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
692  bitLenInt length, const std::vector<bitLenInt>& controls);
693  void FullAdx(
694  bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call);
695  void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl* bciArgs);
696 
697  bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength,
698  bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char* values);
699 #endif
700 
701  void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size);
702 };
703 
704 } // namespace Qrack
size_t AddToActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:330
size_t SubtractFromActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:347
static OCLEngine & Instance()
Get a pointer to the Instance of the singleton. (The instance will be instantiated,...
Definition: oclengine.hpp:270
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_opencl.hpp:124
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
BufferPtr MakeBuffer(const cl::Context &context, size_t size)
Definition: qengine_opencl.hpp:127
PoolItem(cl::Context &context)
Definition: qengine_opencl.hpp:115
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_opencl.hpp:168
real1_f ProbMask(const bitCapInt &mask, const bitCapInt &permutation)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1800
void SetQuantumState(const complex *inputState)
Set arbitrary pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:2966
cl::Context context
Definition: qengine_opencl.hpp:183
void GetProbs(real1 *outputProbs)
Get all probabilities, in unsigned int permutation basis.
Definition: opencl.cpp:3113
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
Definition: opencl.cpp:2024
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
Definition: opencl.cpp:2523
void Copy(QInterfacePtr orig)
Definition: qengine_opencl.hpp:250
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
Definition: opencl.cpp:3225
void checkCallbackError()
Definition: qengine_opencl.hpp:203
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (BCD, with carry)
Definition: opencl.cpp:2412
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
Definition: opencl.cpp:2517
size_t nrmGroupSize
Definition: qengine_opencl.hpp:175
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_opencl.hpp:426
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_opencl.hpp:192
bool usingHostRam
Definition: qengine_opencl.hpp:171
real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt &permutation)
Direct measure of register permutation probability.
Definition: opencl.cpp:1750
void AddQueueItem(const QueueItem &item)
Definition: qengine_opencl.hpp:546
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_opencl.hpp:614
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
Definition: opencl.cpp:3303
QEngineOCL(bitLenInt qBitCount, const bitCapInt &initState, qrack_rand_gen_ptr rgp=nullptr, const complex &phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=_qrack_qunit_sep_thresh)
Initialize a Qrack::QEngineOCL object.
Definition: opencl.cpp:68
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
Definition: opencl.cpp:2528
std::list< QueueItem > wait_queue_items
Definition: qengine_opencl.hpp:190
void INCDECC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behing INCC and DECC.
Definition: opencl.cpp:2258
DeviceContextPtr device_context
Definition: qengine_opencl.hpp:188
size_t nrmGroupCount
Definition: qengine_opencl.hpp:174
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
Definition: opencl.cpp:3399
std::vector< EventVecPtr > wait_refs
Definition: qengine_opencl.hpp:189
void PhaseRootNMask(bitLenInt n, const bitCapInt &mask)
Masked PhaseRootN gate.
Definition: opencl.cpp:765
void IMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2493
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_opencl.hpp:360
bool unlockHostMem
Definition: qengine_opencl.hpp:172
void XMask(const bitCapInt &mask)
Masked X gate.
Definition: opencl.cpp:738
void INC(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2210
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_opencl.hpp:509
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
Add or Subtract integer (without sign or carry, with controls)
Definition: opencl.cpp:2171
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
Add or Subtract integer (with overflow, without carry)
Definition: opencl.cpp:2264
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, const complex &nrm)
Definition: opencl.cpp:1225
size_t totalOclAllocSize
Definition: qengine_opencl.hpp:176
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2810
void ResetStateBuffer(BufferPtr nStateBuffer)
Definition: opencl.cpp:666
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
"Circular shift left" - shift bits left, and carry last bits.
Definition: opencl.cpp:2140
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, const bitCapInt &mtrxSkipValueMask)
Definition: opencl.cpp:1072
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
Definition: opencl.cpp:1713
void CMUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
Definition: opencl.cpp:2553
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
Definition: opencl.cpp:400
PoolItemPtr GetFreePoolItem()
Definition: opencl.cpp:371
real1_f SumSqrDiff(QInterfacePtr toCompare)
Calculates (1 - <\psi_e|\psi_c>) between states |\psi_c> and |\psi_e>.
Definition: qengine_opencl.hpp:438
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
Definition: opencl.cpp:152
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
Definition: opencl.cpp:351
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=nullptr, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2044
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
Definition: opencl.cpp:100
complex GetAmplitude(const bitCapInt &perm)
Get the representational amplitude of a full permutation.
Definition: opencl.cpp:3048
QInterfacePtr Copy()
Copy this QInterface.
Definition: opencl.cpp:3235
void InitOCL(int64_t devID)
Definition: opencl.cpp:664
std::mutex queue_mutex
Definition: qengine_opencl.hpp:181
bitCapInt HighestProbAll()
Get highest probability permutation.
Definition: opencl.cpp:2980
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
Definition: opencl.cpp:1031
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2676
std::shared_ptr< complex > AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc=false)
Definition: opencl.cpp:3351
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
Definition: opencl.cpp:3246
void CDIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by integer.
Definition: opencl.cpp:2575
void PhaseParity(real1_f radians, const bitCapInt &mask)
Parity phase gate.
Definition: opencl.cpp:750
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_opencl.hpp:535
void SetPermutation(const bitCapInt &perm, const complex &phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
Definition: opencl.cpp:668
void CIMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2616
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2767
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (without sign or carry)
Definition: opencl.cpp:2144
cl_int callbackError
Definition: qengine_opencl.hpp:173
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2724
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2900
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_opencl.hpp:444
cl_map_flags lockSyncFlags
Definition: qengine_opencl.hpp:178
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
Definition: opencl.cpp:115
void CPOWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2634
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2697
std::shared_ptr< complex > stateVec
Definition: qengine_opencl.hpp:180
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2907
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
Definition: opencl.cpp:327
void DIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
Definition: opencl.cpp:2472
QInterfacePtr Clone()
Clone this QInterface.
Definition: opencl.cpp:3200
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:2916
void CPhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
Definition: opencl.cpp:2936
std::vector< int64_t > GetDeviceList()
Get the device index.
Definition: qengine_opencl.hpp:428
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (with sign, with carry)
Definition: opencl.cpp:2368
void Z(bitLenInt target)
Z gate.
Definition: opencl.cpp:704
void AddAlloc(size_t size)
Definition: qengine_opencl.hpp:454
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (without sign, with carry)
Definition: opencl.cpp:2226
void INCS(const bitCapInt &toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2296
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
Definition: opencl.cpp:1762
bitCapIntOcl GetMaxSize()
Definition: qengine_opencl.hpp:335
void CMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2595
void Copy(QEngineOCLPtr orig)
Definition: qengine_opencl.hpp:251
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_opencl.hpp:625
void SubtractAlloc(size_t size)
Definition: qengine_opencl.hpp:463
void ReinitBuffer()
Definition: opencl.cpp:3392
void FreeAll()
Definition: opencl.cpp:90
void PhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
Definition: opencl.cpp:2952
void GetQuantumState(complex *outputState)
Get pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:3097
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
Definition: opencl.cpp:2114
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_opencl.hpp:637
virtual QInterfacePtr Decompose(bitLenInt start, bitLenInt length)
Definition: qengine.hpp:293
void CINC(const bitCapInt &toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
Definition: opencl.cpp:2215
cl::CommandQueue queue
Definition: qengine_opencl.hpp:182
void SetAmplitude(const bitCapInt &perm, const complex &amp)
Sets the representational amplitude of a full permutation.
Definition: opencl.cpp:3066
void tryOcl(std::string message, std::function< int()> oclCall)
Definition: qengine_opencl.hpp:216
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_opencl.hpp:545
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_opencl.hpp:544
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_opencl.hpp:315
bitLenInt Compose(QEngineOCLPtr toCopy)
Definition: opencl.cpp:1368
void CUniformParityRZ(const std::vector< bitLenInt > &controls, const bitCapInt &mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1174
BufferPtr stateBuffer
Definition: qengine_opencl.hpp:186
void MUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
Definition: opencl.cpp:2455
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add or Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2844
void PopQueue(bool isDispatch)
Definition: opencl.cpp:409
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
Definition: opencl.cpp:307
EventVecPtr ResetWaitEvents(bool waitQueue=true)
Definition: opencl.cpp:384
void DispatchQueue()
Definition: opencl.cpp:444
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Compose() a QInterface peer, inserting its qubit into index order at start index.
Definition: qengine_opencl.hpp:362
void MULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2483
void SetDevice(int64_t dID)
Set GPU device ID.
Definition: opencl.cpp:530
bool didInit
Definition: qengine_opencl.hpp:170
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
Definition: opencl.cpp:2648
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
Definition: opencl.cpp:652
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_opencl.hpp:445
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
Definition: opencl.cpp:138
void UniformParityRZ(const bitCapInt &mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1140
void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
Definition: opencl.cpp:722
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:1662
bool ForceMParity(const bitCapInt &mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
Definition: opencl.cpp:1955
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, const bitCapInt &offset=ZERO_BCI)
Get permutation expectation value of bits.
Definition: opencl.cpp:1986
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
Definition: opencl.cpp:1592
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_opencl.hpp:279
int64_t deviceID
Definition: qengine_opencl.hpp:177
bitCapInt MAll()
Measure permutation state of all coherent bits.
Definition: opencl.cpp:3015
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
Definition: opencl.cpp:228
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
Definition: opencl.cpp:284
~QEngineOCL()
Definition: qengine_opencl.hpp:305
void INCDECBCDC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (BCD, with carry)
Definition: opencl.cpp:2448
complex permutationAmp
Definition: qengine_opencl.hpp:179
BufferPtr nrmBuffer
Definition: qengine_opencl.hpp:187
void INCBCD(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Increment integer (BCD)
Definition: opencl.cpp:2406
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add based on an indexed load from classical memory.
Definition: opencl.cpp:2893
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
Definition: opencl.cpp:1650
real1_f ProbParity(const bitCapInt &mask)
Overall probability of any odd permutation of the masked set of bits.
Definition: opencl.cpp:1934
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_opencl.hpp:313
void ApplyM(const bitCapInt &mask, bool result, const complex &nrm)
Definition: opencl.cpp:1249
std::vector< PoolItemPtr > poolItems
Definition: qengine_opencl.hpp:191
void FreeStateVec()
Definition: qengine_opencl.hpp:565
virtual void X(bitLenInt q)
Definition: qengine.hpp:196
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (BCD)
Definition: opencl.cpp:2375
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (with sign, with carry)
Definition: opencl.cpp:2347
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
Definition: opencl.cpp:3379
void POWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
Definition: opencl.cpp:2502
void ProbMaskAll(const bitCapInt &mask, real1 *probsArray)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1850
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=nullptr)
Definition: qengine_opencl.hpp:469
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_opencl.hpp:316
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=nullptr, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2049
void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: opencl.cpp:711
void SetDeviceList(std::vector< int64_t > dIDs)
Set the device index list, if more than one device is available.
Definition: opencl.cpp:521
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest)
Definition: opencl.cpp:1413
real1_f Prob(bitLenInt qubit)
PSEUDO-QUANTUM Direct measure of bit probability to be in |1> state.
Definition: opencl.cpp:1693
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Copy(QInterfacePtr orig)
Copy this QInterface.
Definition: qinterface.hpp:222
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1091
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:477
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:371
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubit, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:634
virtual void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubit)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:523
virtual void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubit)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:536
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1124
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:3021
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:159
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:50
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:29
const real1_f _qrack_qunit_sep_thresh
Definition: qrack_functions.hpp:249
std::shared_ptr< QEngineOCL > QEngineOCLPtr
Definition: qengine_opencl.hpp:34
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:54
bitLenInt log2Ocl(bitCapIntOcl n)
Definition: qrack_functions.hpp:95
void U(quid sid, bitLenInt q, real1_f theta, real1_f phi, real1_f lambda)
(External API) 3-parameter unitary gate
Definition: wasm_api.cpp:1199
std::complex< real1 > complex
Definition: qrack_types.hpp:136
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
QRACK_CONST real1 REAL1_EPSILON
Definition: qrack_types.hpp:208
QRACK_CONST real1 ONE_R1
Definition: qrack_types.hpp:193
float real1_f
Definition: qrack_types.hpp:103
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:267
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
QRACK_CONST real1 PI_R1
Definition: qrack_types.hpp:186
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
const bitCapInt ZERO_BCI
Definition: qrack_types.hpp:138
bitCapIntOcl pow2Ocl(const bitLenInt &p)
Definition: qrack_functions.hpp:144
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define CL_MEM_READ_ONLY
Definition: qengine_cuda.hpp:40
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define BCI_ARG_LEN
Definition: qengine_opencl.hpp:26
#define CMPLX_NORM_LEN
Definition: qengine_opencl.hpp:27
#define REAL_ARG_LEN
Definition: qengine_opencl.hpp:28
#define REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:185
#define bitLenInt
Definition: qrack_types.hpp:42
#define ZERO_R1_F
Definition: qrack_types.hpp:168
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:164
#define bitCapInt
Definition: qrack_types.hpp:66
#define bitCapIntOcl
Definition: qrack_types.hpp:54
#define QRACK_ALIGN_SIZE
Definition: qrack_types.hpp:165
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_opencl.hpp:63
QueueItem(real1_f runningNrm)
Definition: qengine_opencl.hpp:91
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_opencl.hpp:49
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_opencl.hpp:77
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60