15 #include "common/cudaengine.cuh"
20 #error CUDA has not been enabled
25 #define BCI_ARG_LEN 10
26 #define CMPLX_NORM_LEN 6
27 #define REAL_ARG_LEN 2
35 #define CL_MAP_READ (1 << 0)
36 #define CL_MAP_WRITE (1 << 1)
38 #define CL_MEM_READ_WRITE (1 << 0)
39 #define CL_MEM_WRITE_ONLY (1 << 1)
40 #define CL_MEM_READ_ONLY (1 << 2)
41 #define CL_MEM_USE_HOST_PTR (1 << 3)
42 #define CL_MEM_COPY_HOST_PTR (1 << 5)
76 QueueItem(
OCLAPI ac,
size_t wic,
size_t lgs,
size_t ds, std::vector<BufferPtr> b,
size_t lbs)
144 BufferPtr toRet = std::shared_ptr<void>(
AllocRaw(size, &error), [](
void* c) { cudaFree(c); });
146 if (error != cudaSuccess) {
147 throw std::runtime_error(
"CUDA error code on buffer allocation attempt: " + std::to_string(error));
156 *errorPtr = cudaMalloc(&toRet, size);
205 void tryCuda(std::string message, std::function<cudaError_t()> oclCall)
207 if (oclCall() == cudaSuccess) {
215 if (oclCall() == cudaSuccess) {
223 cudaError_t error = oclCall();
224 if (error == cudaSuccess) {
232 throw std::runtime_error(message +
", error code: " + std::to_string(error));
260 bool useHostMem =
false, int64_t devID = -1,
bool useHardwareRNG =
true,
bool ignored =
false,
298 tryCuda(
"Failed to write buffer", [&] {
331 void QueueCall(
OCLAPI api_call,
size_t workItemCount,
size_t localGroupSize, std::vector<BufferPtr> args,
332 size_t localBuffSize = 0
U,
size_t deallocSize = 0
U)
335 throw bad_alloc(
"Local memory limits exceeded in QEngineCUDA::QueueCall()");
347 complex const* mtrxs,
const std::vector<bitCapInt>& mtrxSkipPowers,
bitCapInt mtrxSkipValueMask);
369 return Compose(std::dynamic_pointer_cast<QEngineCUDA>(toCopy), start);
393 const std::vector<bitLenInt>& controls);
395 const std::vector<bitLenInt>& controls);
397 const std::vector<bitLenInt>& controls);
399 const std::vector<bitLenInt>& controls);
401 const std::vector<bitLenInt>& controls);
406 const unsigned char* values,
bool resetValue =
true);
408 bitLenInt carryIndex,
const unsigned char* values);
410 bitLenInt carryIndex,
const unsigned char* values);
439 return SumSqrDiff(std::dynamic_pointer_cast<QEngineCUDA>(toCompare));
458 size_t currentAlloc = CUDAEngine::Instance().AddToActiveAllocSize(
deviceID, size);
460 CUDAEngine::Instance().SubtractFromActiveAllocSize(
deviceID, size);
461 throw bad_alloc(
"VRAM limits exceeded in QEngineCUDA::AddAlloc()");
467 CUDAEngine::Instance().SubtractFromActiveAllocSize(
deviceID, size);
476 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
478 if (error == cudaSuccess) {
486 toRet = std::shared_ptr<void>(
487 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
489 if (error == cudaSuccess) {
497 toRet = std::shared_ptr<void>(
498 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
500 if (error != cudaSuccess) {
501 throw std::runtime_error(
"CUDA error code on buffer allocation attempt: " + std::to_string(error));
509 void* toRet = host_ptr;
510 *errorPtr = (flags &
CL_MEM_USE_HOST_PTR) ? cudaHostRegister(host_ptr, size, cudaHostRegisterDefault)
511 : cudaMalloc(&toRet, size);
513 cudaMemcpy(toRet, host_ptr, size, cudaMemcpyHostToDevice);
522 cudaHostUnregister(c);
596 return gs - (wic % gs);
617 void WaitCall(
OCLAPI api_call,
size_t workItemCount,
size_t localGroupSize, std::vector<BufferPtr> args,
618 size_t localBuffSize = 0
U);
655 const std::vector<bitLenInt>& controls);
657 bitLenInt length,
const std::vector<bitLenInt>& controls);
Definition: qengine_cuda.hpp:119
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_cuda.hpp:137
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
PoolItem()
Definition: qengine_cuda.hpp:128
void * AllocRaw(size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:153
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_cuda.hpp:182
real1_f Prob(bitLenInt qubit)
Direct measure of bit probability to be in |1> state.
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
real1_f SumSqrDiff(QInterfacePtr toCompare)
Definition: qengine_cuda.hpp:437
void MUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
void Compose(OCLAPI apiCall, const bitCapIntOcl *bciArgs, QEngineCUDAPtr toCopy)
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, complex const *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, bitCapInt mtrxSkipValueMask)
void INCBCD(bitCapInt toAdd, bitLenInt start, bitLenInt length)
Add classical BCD integer (without sign)
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_cuda.hpp:270
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_cuda.hpp:331
void Decompose(bitLenInt start, QInterfacePtr dest)
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_cuda.hpp:273
void ApplyM(bitCapInt mask, bool result, complex nrm)
bool didInit
Definition: qengine_cuda.hpp:184
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_cuda.hpp:602
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_cuda.hpp:447
void Phase(complex topLeft, complex bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
void CUniformParityRZ(const std::vector< bitLenInt > &controls, bitCapInt mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
real1_f ProbReg(bitLenInt start, bitLenInt length, bitCapInt permutation)
Direct measure of register permutation probability.
void CPOWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled, raise a classical base to a quantum power, modulo N, (out of place)
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
bitLenInt Compose(QEngineCUDAPtr toCopy)
void CDIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by power of integer.
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
void PhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
void SubtractAlloc(size_t size)
Definition: qengine_cuda.hpp:465
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
BufferPtr stateBuffer
Definition: qengine_cuda.hpp:197
void INCDECSC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (with overflow flag)
void tryCuda(std::string message, std::function< cudaError_t()> oclCall)
Definition: qengine_cuda.hpp:205
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
void SetPermutation(bitCapInt perm, complex phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
std::shared_ptr< complex > stateVec
Definition: qengine_cuda.hpp:193
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract from an entangled 8 bit register state with a superposed index-offset-based read from classi...
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_cuda.hpp:320
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
Circular shift left - shift bits left, and carry last bits.
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_cuda.hpp:202
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_cuda.hpp:238
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_cuda.hpp:428
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
void CPhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
void AddAlloc(size_t size)
Definition: qengine_cuda.hpp:456
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
void INC(bitCapInt toAdd, bitLenInt start, bitLenInt length)
Add integer (without sign)
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_cuda.hpp:590
size_t totalOclAllocSize
Definition: qengine_cuda.hpp:189
void GetProbs(real1 *outputProbs)
Get the pure quantum state representation.
void CINC(bitCapInt toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
void GetQuantumState(complex *outputState)
Get the pure quantum state representation.
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
bitLenInt Compose(QEngineCUDAPtr toCopy, bitLenInt start)
~QEngineCUDA()
Definition: qengine_cuda.hpp:264
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Definition: qengine_cuda.hpp:367
real1_f ProbMask(bitCapInt mask, bitCapInt permutation)
Direct measure of masked permutation probability.
void SetQuantumState(complex const *inputState)
Set an arbitrary pure quantum state representation.
void MULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
void SetAmplitude(bitCapInt perm, complex amp)
Sets the representational amplitude of a full permutation.
QEngineCUDA(bitLenInt qBitCount, bitCapInt initState, qrack_rand_gen_ptr rgp=nullptr, complex phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=FP_NORM_EPSILON_F)
Initialize a Qrack::QEngineCUDA object.
int64_t deviceID
Definition: qengine_cuda.hpp:190
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void INCS(bitCapInt toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Add a classical integer to the register, with sign and without carry.
void INCDECSC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
void ApplyM(bitCapInt mask, bitCapInt result, complex nrm)
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
std::mutex queue_mutex
Definition: qengine_cuda.hpp:194
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_cuda.hpp:448
void ResetStateBuffer(BufferPtr nStateBuffer)
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits by a superposed index-offset-based read from classical memory.
void Z(bitLenInt target)
Z gate.
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
void FreeStateVec()
Definition: qengine_cuda.hpp:531
complex permutationAmp
Definition: qengine_cuda.hpp:192
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
void SetAmplitudePage(QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length)
Copy a "page" of amplitudes from another QEngine, pointed to by pageEnginePtr, into this QEngine's in...
BufferPtr nrmBuffer
Definition: qengine_cuda.hpp:198
void CMUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
bool usingHostRam
Definition: qengine_cuda.hpp:185
std::list< QueueItem > wait_queue_items
Definition: qengine_cuda.hpp:200
void PhaseParity(real1_f radians, bitCapInt mask)
Parity phase gate.
void Invert(complex topRight, complex bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
DeviceContextPtr device_context
Definition: qengine_cuda.hpp:199
bool ForceMParity(bitCapInt mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
void XMask(bitCapInt mask)
Masked X gate.
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, bitCapInt offset=0)
Get permutation expectation value of bits.
void INCDECBCDC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add to entangled 8 bit register state with a superposed index-offset-based read from classical memory...
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
real1_f ProbParity(bitCapInt mask)
Overall probability of any odd permutation of the masked set of bits.
void INCDECC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCC and DECC (without sign, with carry)
void POWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_cuda.hpp:319
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_cuda.hpp:365
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
bitCapInt MAll()
Measure permutation state of all coherent bits.
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
complex GetAmplitude(bitCapInt perm)
Get the representational amplitude of a full permutation.
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_cuda.hpp:579
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
real1_f SumSqrDiff(QEngineCUDAPtr toCompare)
void DIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
std::shared_ptr< complex > AllocStateVec(bitCapInt elemCount, bool doForceAlloc=false)
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=NULL)
Definition: qengine_cuda.hpp:471
void InitOCL(int64_t devID)
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_cuda.hpp:272
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
void FreeRaw(cl_mem_flags flags, void *c)
Definition: qengine_cuda.hpp:519
size_t nrmGroupSize
Definition: qengine_cuda.hpp:188
void UniformParityRZ(bitCapInt mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
EventVecPtr ResetWaitEvents(bool waitQueue=true)
void * AllocRaw(cl_mem_flags flags, void *host_ptr, size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:507
size_t nrmGroupCount
Definition: qengine_cuda.hpp:187
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, complex nrm)
bool unlockHostMem
Definition: qengine_cuda.hpp:186
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
cl_map_flags lockSyncFlags
Definition: qengine_cuda.hpp:191
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_cuda.hpp:282
void CIMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
void ProbMaskAll(bitCapInt mask, real1 *probsArray)
Direct measure of masked permutation probability.
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCUDAPtr dest)
void IMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
PoolItemPtr GetFreePoolItem()
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Transform a length of qubit register via lookup through a hash table.
bitCapIntOcl GetMaxSize()
Definition: qengine_cuda.hpp:341
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
void Dispose(bitLenInt start, bitLenInt length, bitCapInt disposedPerm)
Dispose a a contiguous set of qubits that are already in a permutation eigenstate.
std::vector< PoolItemPtr > poolItems
Definition: qengine_cuda.hpp:201
void AddQueueItem(const QueueItem &item)
Definition: qengine_cuda.hpp:321
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special, real1_f norm_thresh=REAL1_DEFAULT_ARG)
void CMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
QInterfacePtr Clone()
Clone this QInterface.
void SetDevice(int64_t dID)
Set GPU device ID.
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1054
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:434
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:338
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void Invert(const complex topRight, const complex bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:493
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:590
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1087
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual void Phase(const complex topLeft, const complex bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:480
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:2709
Definition: complex16x2simd.hpp:25
std::complex< half_float::half > complex
Definition: qrack_types.hpp:62
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:141
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:28
constexpr real1_f ZERO_R1_F
Definition: qrack_types.hpp:152
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
constexpr real1_f FP_NORM_EPSILON_F
Definition: qrack_types.hpp:245
const real1 ONE_R1
Definition: qrack_types.hpp:153
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
bitCapInt pow2(const bitLenInt &p)
Definition: qrack_functions.hpp:22
const real1 REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:155
const real1 PI_R1
Definition: qrack_types.hpp:158
float real1_f
Definition: qrack_types.hpp:64
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:242
std::shared_ptr< QEngineCUDA > QEngineCUDAPtr
Definition: qengine_cuda.hpp:47
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
const real1 REAL1_EPSILON
Definition: qrack_types.hpp:157
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
bitLenInt log2(bitCapInt n)
Definition: qrack_functions.hpp:26
MICROSOFT_QUANTUM_DECL void U(_In_ uintq sid, _In_ uintq q, _In_ double theta, _In_ double phi, _In_ double lambda)
(External API) 3-parameter unitary gate
Definition: pinvoke_api.cpp:1362
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define BCI_ARG_LEN
Definition: qengine_cuda.hpp:25
#define CL_MEM_USE_HOST_PTR
Definition: qengine_cuda.hpp:41
#define CL_MEM_COPY_HOST_PTR
Definition: qengine_cuda.hpp:42
#define CMPLX_NORM_LEN
Definition: qengine_cuda.hpp:26
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define REAL_ARG_LEN
Definition: qengine_cuda.hpp:27
#define bitLenInt
Definition: qrack_types.hpp:44
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:146
#define bitCapInt
Definition: qrack_types.hpp:105
#define bitCapIntOcl
Definition: qrack_types.hpp:91
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_cuda.hpp:76
QueueItem(real1_f runningNrm)
Definition: qengine_cuda.hpp:104
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_cuda.hpp:62
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_cuda.hpp:90
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60