Qrack  10.0
General classical-emulating-quantum development framework
complex16x2simd.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a SIMD implementation of the double precision complex type.
6 // The API is designed to (almost entirely) mirror that of the C++ standard library
7 // double precision complex type.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #if defined(_WIN32)
16 #include <intrin.h>
17 #else
18 #include <emmintrin.h>
19 #include <immintrin.h>
20 #include <smmintrin.h>
21 #endif
22 
23 #include <complex>
24 
25 namespace Qrack {
26 
27 static const __m256d SIGNMASK = _mm256_set_pd(-0.0, -0.0, -0.0, -0.0);
28 
30 union complex2 {
31  __m256d c2;
32  double f[4];
33 
34  inline complex2() {}
35  inline complex2(const __m256d& cm2) { c2 = cm2; }
36  inline complex2(const complex2& cm2) { c2 = cm2.c2; }
37  inline complex2(const std::complex<double>& cm1, const std::complex<double>& cm2)
38  {
39  c2 = _mm256_set_pd(cm2.imag(), cm2.real(), cm1.imag(), cm1.real());
40  }
41  inline complex2(const double& r1, const double& i1, const double& r2, const double& i2)
42  {
43  c2 = _mm256_set_pd(i2, r2, i1, r1);
44  }
45  inline std::complex<double> c(const size_t& i) const { return complex(f[i << 1U], f[(i << 1U) + 1U]); }
46  inline complex2 operator+(const complex2& other) const { return _mm256_add_pd(c2, other.c2); }
47  inline complex2 operator+=(const complex2& other)
48  {
49  c2 = _mm256_add_pd(c2, other.c2);
50  return c2;
51  }
52  inline complex2 operator-(const complex2& other) const { return _mm256_sub_pd(c2, other.c2); }
53  inline complex2 operator-=(const complex2& other)
54  {
55  c2 = _mm256_sub_pd(c2, other.c2);
56  return c2;
57  }
58  inline complex2 operator*(const complex2& other) const
59  {
60 #if ENABLE_FMA
61  // FMA proposed by Elara (OpenAI custom GPT)
62  return _mm256_fmadd_pd(_mm256_shuffle_pd(c2, c2, 5),
63  _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, other.c2), other.c2, 15),
64  _mm256_mul_pd(c2, _mm256_shuffle_pd(other.c2, other.c2, 0)));
65 #else
66  return _mm256_add_pd(_mm256_mul_pd(_mm256_shuffle_pd(c2, c2, 5),
67  _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, other.c2), other.c2, 15)),
68  _mm256_mul_pd(c2, _mm256_shuffle_pd(other.c2, other.c2, 0)));
69 #endif
70  }
71  inline complex2 operator*=(const complex2& other)
72  {
73 #if ENABLE_FMA
74  // FMA proposed by Elara (OpenAI custom GPT)
75  c2 = _mm256_fmadd_pd(_mm256_shuffle_pd(c2, c2, 5),
76  _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, other.c2), other.c2, 15),
77  _mm256_mul_pd(c2, _mm256_shuffle_pd(other.c2, other.c2, 0)));
78 #else
79  c2 = _mm256_add_pd(_mm256_mul_pd(_mm256_shuffle_pd(c2, c2, 5),
80  _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, other.c2), other.c2, 15)),
81  _mm256_mul_pd(c2, _mm256_shuffle_pd(other.c2, other.c2, 0)));
82 #endif
83  return c2;
84  }
85  inline complex2 operator*(const double& rhs) const { return _mm256_mul_pd(c2, _mm256_set1_pd(rhs)); }
86  inline complex2 operator-() const { return _mm256_mul_pd(_mm256_set1_pd(-1.0), c2); }
87  inline complex2 operator*=(const double& rhs)
88  {
89  c2 = _mm256_mul_pd(c2, _mm256_set1_pd(rhs));
90  return c2;
91  }
92 };
93 
94 inline complex2 mtrxColShuff(const complex2& mtrxCol) { return _mm256_shuffle_pd(mtrxCol.c2, mtrxCol.c2, 5); }
95 inline complex2 matrixMul(const complex2& mtrxCol1, const complex2& mtrxCol2, const complex2& mtrxCol1Shuff,
96  const complex2& mtrxCol2Shuff, const complex2& qubit)
97 {
98  const __m256d dupeLo = _mm256_permute2f128_pd(qubit.c2, qubit.c2, 0);
99  const __m256d dupeHi = _mm256_permute2f128_pd(qubit.c2, qubit.c2, 17);
100 #if ENABLE_FMA
101  // FMA proposed by Elara (OpenAI custom GPT)
102  return _mm256_add_pd(
103  _mm256_fmadd_pd(mtrxCol1Shuff.c2, _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, dupeLo), dupeLo, 15),
104  _mm256_mul_pd(mtrxCol1.c2, _mm256_shuffle_pd(dupeLo, dupeLo, 0))),
105  _mm256_fmadd_pd(mtrxCol2Shuff.c2, _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, dupeHi), dupeHi, 15),
106  _mm256_mul_pd(mtrxCol2.c2, _mm256_shuffle_pd(dupeHi, dupeHi, 0))));
107 #else
108  return _mm256_add_pd(
109  _mm256_add_pd(_mm256_mul_pd(mtrxCol1Shuff.c2, _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, dupeLo), dupeLo, 15)),
110  _mm256_mul_pd(mtrxCol1.c2, _mm256_shuffle_pd(dupeLo, dupeLo, 0))),
111  _mm256_add_pd(_mm256_mul_pd(mtrxCol2Shuff.c2, _mm256_shuffle_pd(_mm256_xor_pd(SIGNMASK, dupeHi), dupeHi, 15)),
112  _mm256_mul_pd(mtrxCol2.c2, _mm256_shuffle_pd(dupeHi, dupeHi, 0))));
113 #endif
114 }
115 inline complex2 matrixMul(const float& nrm, const complex2& mtrxCol1, const complex2& mtrxCol2,
116  const complex2& mtrxCol1Shuff, const complex2& mtrxCol2Shuff, const complex2& qubit)
117 {
118  return matrixMul(mtrxCol1, mtrxCol2, mtrxCol1Shuff, mtrxCol2Shuff, qubit) * nrm;
119 }
120 inline complex2 operator*(const double& lhs, const complex2& rhs) { return _mm256_mul_pd(_mm256_set1_pd(lhs), rhs.c2); }
121 
122 inline double norm(const complex2& c)
123 {
124  // Suggested by Elara (OpenAI custom GPT)
125  const __m256d sq = _mm256_mul_pd(c.c2, c.c2);
126  const __m256d sum = _mm256_hadd_pd(sq, sq);
127  return _mm_cvtsd_f64(_mm_add_pd(_mm256_castpd256_pd128(sum), _mm256_extractf128_pd(sum, 1)));
128 }
129 
130 } // namespace Qrack
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
static const __m256d SIGNMASK
Definition: complex16x2simd.hpp:27
std::complex< real1 > complex
Definition: qrack_types.hpp:136
double norm(const complex2 &c)
Definition: complex16x2simd.hpp:122
complex2 matrixMul(const complex2 &mtrxCol1, const complex2 &mtrxCol2, const complex2 &mtrxCol1Shuff, const complex2 &mtrxCol2Shuff, const complex2 &qubit)
Definition: complex16x2simd.hpp:95
complex2 mtrxColShuff(const complex2 &mtrxCol)
Definition: complex16x2simd.hpp:94
complex2 operator*(const double &lhs, const complex2 &rhs)
Definition: complex16x2simd.hpp:120
SIMD implementation of the double precision complex vector type of 2 complex numbers,...
Definition: complex16x2simd.hpp:30
complex2 operator*(const complex2 &other) const
Definition: complex16x2simd.hpp:58
complex2(const complex2 &cm2)
Definition: complex16x2simd.hpp:36
double f[4]
Definition: complex16x2simd.hpp:32
complex2 operator*(const double &rhs) const
Definition: complex16x2simd.hpp:85
__m256d c2
Definition: complex16x2simd.hpp:31
complex2 operator*=(const complex2 &other)
Definition: complex16x2simd.hpp:71
complex2()
Definition: complex16x2simd.hpp:34
complex2(const std::complex< double > &cm1, const std::complex< double > &cm2)
Definition: complex16x2simd.hpp:37
complex2 operator-(const complex2 &other) const
Definition: complex16x2simd.hpp:52
complex2(const double &r1, const double &i1, const double &r2, const double &i2)
Definition: complex16x2simd.hpp:41
complex2 operator*=(const double &rhs)
Definition: complex16x2simd.hpp:87
complex2(const __m256d &cm2)
Definition: complex16x2simd.hpp:35
complex2 operator+=(const complex2 &other)
Definition: complex16x2simd.hpp:47
complex2 operator-() const
Definition: complex16x2simd.hpp:86
std::complex< double > c(const size_t &i) const
Definition: complex16x2simd.hpp:45
complex2 operator+(const complex2 &other) const
Definition: complex16x2simd.hpp:46
complex2 operator-=(const complex2 &other)
Definition: complex16x2simd.hpp:53