Intel HE Acceleration Library for FPGAs
Intel Homomorphic Encryption Acceleration Library for FPGAs, accelerating the modular arithmetic operations used in homomorphic encryption on Intel FPGAs.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fpga.h
Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #ifndef __FPGA_H__
5 #define __FPGA_H__
6 
7 #include <atomic>
8 #include <condition_variable>
9 #include <deque>
10 #include <future>
11 #include <memory>
12 #include <mutex>
13 #include <thread>
14 #include <unordered_map>
15 #include <vector>
16 
17 #include "CL/opencl.h"
18 
19 namespace intel {
20 namespace hexl {
21 namespace fpga {
22 
23 __extension__ typedef unsigned __int128 fpga_uint128_t;
30 typedef struct {
31  uint64_t modulus;
32  uint64_t len;
33  uint64_t barr_lo;
35 
40 typedef struct {
41  cl_ulong4 data[8];
43 
48 typedef struct {
49  cl_ulong4 data[8];
51 
55 typedef struct {
56  uint64_t key1 : 52;
57  uint64_t key2 : 52;
58  uint64_t key3 : 52;
59  uint64_t key4 : 52;
60  uint64_t key5 : 48;
61 } __attribute__((packed)) DyadmultKeys1_t;
62 
66 typedef struct {
67  uint64_t key1 : 4;
68  uint64_t key2 : 52;
69  uint64_t key3 : 52;
70  uint64_t key4 : 52;
71  uint64_t key5 : 52;
72  uint64_t key6 : 44;
73 } __attribute__((packed)) DyadmultKeys2_t;
74 
78 typedef struct {
79  uint64_t key1 : 8;
80  uint64_t key2 : 52;
81  uint64_t key3 : 52;
82  uint64_t key4 : 52;
83  uint64_t key5 : 52;
84  uint64_t NOT_USED : 40;
85 } __attribute__((packed)) DyadmultKeys3_t;
86 
87 #define BIT_MASK(BITS) ((1UL << BITS) - 1)
88 #define MAX_RNS_MODULUS_SIZE 7
89 #define RWMEM_FLAG 1
90 
95 };
96 
97 enum class kernel_t {
98  NONE,
100  NTT,
101  INTT,
102  KEYSWITCH,
104 };
105 
113 struct Object {
114 public:
115  explicit Object(kernel_t type = kernel_t::NONE, bool fence = false);
116  virtual ~Object() = default;
117 
118  bool ready_;
119  int id_;
120 
122  bool fence_;
123  static unsigned int g_wid_;
124 };
125 
136 struct Object_NTT : public Object {
137  explicit Object_NTT(uint64_t* coeff_poly,
138  const uint64_t* root_of_unity_powers,
139  const uint64_t* precon_root_of_unity_powers,
140  uint64_t coeff_modulus, uint64_t n, bool fence = false);
141 
142  uint64_t* coeff_poly_;
143  const uint64_t* root_of_unity_powers_;
145  uint64_t coeff_modulus_;
146  uint64_t n_;
147 };
148 
163 struct Object_INTT : public Object {
164  explicit Object_INTT(uint64_t* coeff_poly,
165  const uint64_t* inv_root_of_unity_powers,
166  const uint64_t* precon_inv_root_of_unity_powers,
167  uint64_t coeff_modulus, uint64_t inv_n,
168  uint64_t inv_n_w, uint64_t n, bool fence = false);
169 
170  uint64_t* coeff_poly_;
171  const uint64_t* inv_root_of_unity_powers_;
173  uint64_t coeff_modulus_;
174  uint64_t inv_n_;
175  uint64_t inv_n_w_;
176  uint64_t n_;
177 };
188 struct Object_DyadicMultiply : public Object {
189  explicit Object_DyadicMultiply(uint64_t* results, const uint64_t* operand1,
190  const uint64_t* operand2, uint64_t n,
191  const uint64_t* moduli, uint64_t n_moduli,
192  bool fence = false);
193 
194  uint64_t* results_;
195  const uint64_t* operand1_;
196  const uint64_t* operand2_;
197  uint64_t n_;
198  const uint64_t* moduli_;
199  uint64_t n_moduli_;
200 };
201 
218 struct Object_KeySwitch : public Object {
219  explicit Object_KeySwitch(
220  uint64_t* result, const uint64_t* t_target_iter_ptr, uint64_t n,
221  uint64_t decomp_modulus_size, uint64_t key_modulus_size,
222  uint64_t rns_modulus_size, uint64_t key_component_count,
223  const uint64_t* moduli, const uint64_t** k_switch_keys,
224  const uint64_t* modswitch_factors, const uint64_t* twiddle_factors,
225  bool fence = false);
226 
227  uint64_t* result_;
228  const uint64_t* t_target_iter_ptr_;
229  uint64_t n_;
234  const uint64_t* moduli_;
235  const uint64_t** k_switch_keys_;
236  const uint64_t* modswitch_factors_;
237  const uint64_t* twiddle_factors_;
238 };
239 
273 class Buffer {
274 public:
275  Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply,
276  uint64_t n_batch_ntt, uint64_t n_batch_intt,
277  uint64_t n_batch_KeySwitch)
278  : capacity_(capacity),
279  n_batch_dyadic_multiply_(n_batch_dyadic_multiply),
280  n_batch_ntt_(n_batch_ntt),
281  n_batch_intt_(n_batch_intt),
282  n_batch_KeySwitch_(n_batch_KeySwitch),
283  total_worksize_DyadicMultiply_(1),
284  num_DyadicMultiply_(0),
285  total_worksize_NTT_(1),
286  num_NTT_(0),
287  total_worksize_INTT_(1),
288  num_INTT_(0),
289  total_worksize_KeySwitch_(1),
290  num_KeySwitch_(0) {}
291 
292  void push(Object* obj);
293  Object* front() const;
294  Object* back() const;
295  std::vector<Object*> pop();
296 
297  uint64_t size();
298 
299  uint64_t get_worksize_DyadicMultiply() const {
300  return total_worksize_DyadicMultiply_;
301  }
302  uint64_t get_worksize_NTT() const { return total_worksize_NTT_; }
303  uint64_t get_worksize_INTT() const { return total_worksize_INTT_; }
304  uint64_t get_worksize_KeySwitch() const {
305  return total_worksize_KeySwitch_;
306  }
307 
308  void set_worksize_DyadicMultiply(uint64_t ws) {
309  total_worksize_DyadicMultiply_ = ws;
310  num_DyadicMultiply_ = total_worksize_DyadicMultiply_;
311  }
312  void set_worksize_NTT(uint64_t ws) {
313  total_worksize_NTT_ = ws;
314  num_NTT_ = total_worksize_NTT_;
315  }
316  void set_worksize_INTT(uint64_t ws) {
317  total_worksize_INTT_ = ws;
318  num_INTT_ = total_worksize_INTT_;
319  }
320  void set_worksize_KeySwitch(uint64_t ws) {
321  total_worksize_KeySwitch_ = ws;
322  num_KeySwitch_ = total_worksize_KeySwitch_;
323  }
324 
325 private:
326  uint64_t get_worksize_int_DyadicMultiply() const {
327  return ((num_DyadicMultiply_ > n_batch_dyadic_multiply_)
328  ? n_batch_dyadic_multiply_
329  : num_DyadicMultiply_);
330  }
331 
332  uint64_t get_worksize_int_NTT() const {
333  return ((num_NTT_ > n_batch_ntt_) ? n_batch_ntt_ : num_NTT_);
334  }
335 
336  uint64_t get_worksize_int_INTT() const {
337  return ((num_INTT_ > n_batch_intt_) ? n_batch_intt_ : num_INTT_);
338  }
339 
340  uint64_t get_worksize_int_KeySwitch() const {
341  return ((num_KeySwitch_ > n_batch_KeySwitch_) ? n_batch_KeySwitch_
342  : num_KeySwitch_);
343  }
344 
345  void update_DyadicMultiply_work_size(uint64_t ws) {
346  num_DyadicMultiply_ -= ws;
347  }
348  void update_NTT_work_size(uint64_t ws) { num_NTT_ -= ws; }
349  void update_INTT_work_size(uint64_t ws) { num_INTT_ -= ws; }
350  void update_KeySwitch_work_size(uint64_t ws) { num_KeySwitch_ -= ws; }
351 
352  std::mutex mu_;
353  std::mutex mu_size_;
354  std::condition_variable cond_;
355  std::deque<Object*> buffer_;
356  const uint64_t capacity_;
357  const uint64_t n_batch_dyadic_multiply_;
358  const uint64_t n_batch_ntt_;
359  const uint64_t n_batch_intt_;
360  const uint64_t n_batch_KeySwitch_;
361 
362  uint64_t total_worksize_DyadicMultiply_;
363  uint64_t num_DyadicMultiply_;
364 
365  uint64_t total_worksize_NTT_;
366  uint64_t num_NTT_;
367 
368  uint64_t total_worksize_INTT_;
369  uint64_t num_INTT_;
370 
371  uint64_t total_worksize_KeySwitch_;
372  uint64_t num_KeySwitch_;
373 };
389 struct FPGAObject {
390  FPGAObject(const cl_context& context, uint64_t n_batch,
391  kernel_t type = kernel_t::NONE, bool fence = false);
392  virtual ~FPGAObject() = default;
393  virtual void fill_in_data(const std::vector<Object*>& objs) = 0;
394  virtual void fill_out_data(uint64_t* results) = 0;
395 
396  void recycle();
397 
398  const cl_context& context_;
399  int tag_;
400  uint64_t n_batch_;
401  uint64_t batch_size_;
403  bool fence_;
404 
405  std::vector<Object*> in_objs_;
406 
407  static std::atomic<int> g_tag_;
408 };
409 
425 struct FPGAObject_NTT : public FPGAObject {
426  explicit FPGAObject_NTT(const cl_context& context, uint64_t coeff_count,
427  uint64_t batch_size);
428  ~FPGAObject_NTT();
429 
430  FPGAObject_NTT(const FPGAObject_NTT&) = delete;
431  FPGAObject_NTT& operator=(const FPGAObject_NTT&) = delete;
432 
433  void fill_in_data(const std::vector<Object*>& objs) override;
434  void fill_out_data(uint64_t* coeff_poly) override;
435 
440  uint64_t n_;
441 };
442 
460 struct FPGAObject_INTT : public FPGAObject {
461  explicit FPGAObject_INTT(const cl_context& context, uint64_t coeff_count,
462  uint64_t batch_size);
464  FPGAObject_INTT(const FPGAObject_INTT&) = delete;
465  FPGAObject_INTT& operator=(const FPGAObject_INTT&) = delete;
466 
467  void fill_in_data(const std::vector<Object*>& objs) override;
468  void fill_out_data(uint64_t* coeff_poly) override;
469 
474  uint64_t* inv_n_in_svm_;
475  uint64_t* inv_n_w_in_svm_;
476  uint64_t n_;
477 };
478 
497  explicit FPGAObject_DyadicMultiply(const cl_context& context,
498  uint64_t coeff_size,
499  uint32_t modulus_size,
500  uint64_t batch_size);
504  delete;
505 
506  void fill_in_data(const std::vector<Object*>& objs) override;
507  void fill_out_data(uint64_t* results) override;
508 
509  uint64_t* operand1_in_svm_;
510  uint64_t* operand2_in_svm_;
512  uint64_t n_;
513  uint64_t n_moduli_;
516 };
517 
539  explicit FPGAObject_KeySwitch(const cl_context& context,
540  uint64_t batch_size);
541 
545  void fill_in_data(const std::vector<Object*>& objs) override;
546  void fill_out_data(uint64_t* results) override;
547 
548  uint64_t n_;
553  uint64_t* moduli_;
554  uint64_t** k_switch_keys_;
556  uint64_t* twiddle_factors_;
557  uint64_t* ms_output_;
558 
561 
562 private:
563  enum {
564  MAX_KEY_MODULUS_SIZE = 7,
565  MAX_KEY_COMPONENT_SIZE = 2,
566  MAX_COEFF_COUNT = 16384
567  };
568 };
569 
571  explicit KeySwitchMemKeys(cl_mem k1 = nullptr, cl_mem k2 = nullptr,
572  cl_mem k3 = nullptr);
574 
578 };
579 
584 typedef enum { NONE = 0, EMU, FPGA } DEV_TYPE;
603 class Device {
604 public:
605  Device(const cl_device_id& device, Buffer& buffer,
606  std::shared_future<bool> exit_signal, uint64_t coeff_size,
607  uint32_t modulus_size, uint64_t batch_size_dyadic_multiply,
608  uint64_t batch_size_ntt, uint64_t batch_size_intt,
609  uint64_t batch_size_KeySwitch, uint32_t debug);
610  ~Device();
611  Device(const Device&) = delete;
612  Device& operator=(const Device&) = delete;
613  void run();
614 
615 private:
616  enum { CREDIT = 2 };
617 
618  void process_blocking_api();
619  bool process_input(int index);
620  bool process_output();
621 
622  bool process_output_dyadic_multiply();
623  bool process_output_NTT();
624  bool process_output_INTT();
625  bool process_output_KeySwitch();
626 
627  void enqueue_input_data(FPGAObject* fpga_obj);
628  void enqueue_input_data_dyadic_multiply(
629  FPGAObject_DyadicMultiply* fpga_obj);
630  void enqueue_input_data_NTT(FPGAObject_NTT* fpga_obj);
631  void enqueue_input_data_INTT(FPGAObject_INTT* fpga_obj);
632  void enqueue_input_data_KeySwitch(FPGAObject_KeySwitch* fpga_obj);
633 
634  int device_id() { return id_; }
635 
636  void KeySwitch_load_twiddles(FPGAObject_KeySwitch* fpga_obj);
637  KeySwitchMemKeys* KeySwitch_check_keys(uint64_t** keys);
638  KeySwitchMemKeys* KeySwitch_load_keys(FPGAObject_KeySwitch* fpga_obj);
639  void build_modulus_meta(FPGAObject_KeySwitch* fpga_obj);
640  void build_invn_meta(FPGAObject_KeySwitch* fpga_obj);
641  void KeySwitch_read_output();
642 
643  uint64_t precompute_modulus_k(uint64_t modulus);
644 
645  kernel_t get_kernel_type();
646  std::string get_bitstream_name();
647 
648  const cl_device_id& device_;
649  Buffer& buffer_;
650  unsigned int credit_;
651  std::shared_future<bool> future_exit_;
652  int id_;
653  static int device_id_;
654  kernel_t kernel_type_;
655 
656  std::vector<FPGAObject*> fpgaObjects_;
657 
658  cl_context context_;
659  cl_program program_;
660 
661  // DYADIC_MULTIPLY section
662  cl_command_queue dyadic_multiply_input_queue_;
663  cl_command_queue dyadic_multiply_output_queue_;
664  cl_kernel dyadic_multiply_input_fifo_kernel_;
665  cl_kernel dyadic_multiply_output_fifo_nb_kernel_;
666 
667  uint64_t* dyadic_multiply_results_out_svm_;
668  int* dyadic_multiply_tag_out_svm_;
669  int* dyadic_multiply_results_out_valid_svm_;
670  //
671 
672  // NTT section
673  cl_command_queue ntt_load_queue_;
674  cl_command_queue ntt_store_queue_;
675  cl_kernel ntt_load_kernel_;
676  cl_kernel ntt_store_kernel_;
677 
678  uint64_t* NTT_coeff_poly_svm_;
679 
680  // INTT section
681  cl_command_queue intt_load_queue_;
682  cl_command_queue intt_store_queue_;
683  cl_kernel intt_load_kernel_;
684  cl_kernel intt_store_kernel_;
685 
686  uint64_t* INTT_coeff_poly_svm_;
687  //
688 
689  // KeySwitch section
690  cl_mem KeySwitch_mem_root_of_unity_powers_;
691  cl_command_queue KeySwitch_queues_[KEYSWITCH_NUM_KERNELS];
692  cl_kernel KeySwitch_kernels_[KEYSWITCH_NUM_KERNELS];
693  bool KeySwitch_load_once_;
694  uint64_t* root_of_unity_powers_ptr_;
695  KeySwitch_modulus_t modulus_meta_;
696  KeySwitch_invn_t invn_;
697  uint64_t KeySwitch_id_;
698  cl_event KeySwitch_events_write_[2][128];
699  cl_event KeySwitch_events_enqueue_[2][2];
700  std::unordered_map<uint64_t**, KeySwitchMemKeys*> keys_map_;
701  std::unordered_map<uint64_t**, KeySwitchMemKeys*>::iterator keys_map_iter_;
702 
703  uint32_t debug_;
704  static const std::unordered_map<std::string, kernel_t> kernels;
705 };
706 
723 class DevicePool {
724 public:
725  DevicePool(int choice, Buffer& buffer, std::future<bool>& exit_signal,
726  uint64_t coeff_size, uint32_t modulus_size,
727  uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt,
728  uint64_t batch_size_intt, uint64_t batch_size_KeySwitch,
729  uint32_t debug);
730  ~DevicePool();
731 
732 private:
733  DevicePool(const DevicePool& d) = delete;
734  DevicePool& operator=(const DevicePool& d) = delete;
735 
736  cl_platform_id platform_;
737  cl_uint device_count_;
738  cl_device_id* cl_devices_;
739  Device** devices_;
740  std::shared_future<bool> future_exit_;
741 
742  std::vector<std::thread> runners_;
743 };
748 void attach_fpga_pooling();
753 void detach_fpga_pooling();
754 
755 } // namespace fpga
756 } // namespace hexl
757 } // namespace intel
758 
759 #endif
uint64_t n_moduli_
Definition: fpga.h:199
uint64_t n_
Definition: fpga.h:512
bool fence_
Definition: fpga.h:122
uint64_t barr_lo
Definition: fpga.h:33
bool fence_
Definition: fpga.h:403
Definition: fpga.h:584
uint64_t n_
Definition: fpga.h:197
virtual ~FPGAObject()=default
void set_worksize_KeySwitch(uint64_t ws)
Definition: fpga.h:320
Struct FPGAObject_NTT stores the NTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:425
KeySwitchMemKeys(cl_mem k1=nullptr, cl_mem k2=nullptr, cl_mem k3=nullptr)
uint64_t len
Definition: fpga.h:32
const uint64_t * inv_root_of_unity_powers_
Definition: fpga.h:171
Object(kernel_t type=kernel_t::NONE, bool fence=false)
Definition: fpga.h:584
Parent Struct FPGAObject stores the blob of objects to be transfered to the FPGA. ...
Definition: fpga.h:389
uint64_t n_moduli_
Definition: fpga.h:513
uint64_t * modswitch_factors_
Definition: fpga.h:555
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:439
const uint64_t * operand1_
Definition: fpga.h:195
struct Object_DyadicMultiply Stores the parameters for the multiplication
Definition: fpga.h:188
Class Device.
Definition: fpga.h:603
FPGAObject_INTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)
uint64_t decomp_modulus_size_
Definition: fpga.h:230
uint64_t * inv_n_in_svm_
Definition: fpga.h:474
void attach_fpga_pooling()
attach_fpga_pooling Attach a device to this thread
uint64_t * result_
Definition: fpga.h:227
uint64_t key_component_count_
Definition: fpga.h:552
Struct DyadmultKeys1_t.
Definition: fpga.h:55
virtual void fill_in_data(const std::vector< Object * > &objs)=0
uint64_t * coeff_poly_
Definition: fpga.h:170
void fill_in_data(const std::vector< Object * > &objs) override
Object_INTT(uint64_t *coeff_poly, const uint64_t *inv_root_of_unity_powers, const uint64_t *precon_inv_root_of_unity_powers, uint64_t coeff_modulus, uint64_t inv_n, uint64_t inv_n_w, uint64_t n, bool fence=false)
Struct Object.
Definition: fpga.h:113
const uint64_t * moduli_
Definition: fpga.h:234
Struct Buffer Structure containing information for the polynomial operations.
Definition: fpga.h:273
DEV_TYPE
enum DEV_TYPE Lists the available device mode: emulation mode, FPGA
Definition: fpga.h:584
static unsigned int g_wid_
Definition: fpga.h:123
uint64_t n_
Definition: fpga.h:229
FPGAObject_NTT & operator=(const FPGAObject_NTT &)=delete
uint64_t rns_modulus_size_
Definition: fpga.h:551
virtual ~Object()=default
uint64_t get_worksize_KeySwitch() const
Definition: fpga.h:304
const uint64_t * precon_root_of_unity_powers_
Definition: fpga.h:144
uint64_t n_
Definition: fpga.h:476
bool ready_
Definition: fpga.h:118
uint64_t * results_
Definition: fpga.h:194
cl_mem mem_KeySwitch_results_
Definition: fpga.h:560
DevicePool(int choice, Buffer &buffer, std::future< bool > &exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint64_t batch_size_KeySwitch, uint32_t debug)
FPGAObject(const cl_context &context, uint64_t n_batch, kernel_t type=kernel_t::NONE, bool fence=false)
FPGAObject_KeySwitch & operator=(const FPGAObject_KeySwitch &)=delete
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:473
Struct KeySwitch_invn_t.
Definition: fpga.h:48
Device(const cl_device_id &device, Buffer &buffer, std::shared_future< bool > exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint64_t batch_size_KeySwitch, uint32_t debug)
uint64_t * operand2_in_svm_
Definition: fpga.h:510
void set_worksize_INTT(uint64_t ws)
Definition: fpga.h:316
uint64_t * ms_output_
Definition: fpga.h:557
Struct FPGAObject_DyadicMultiply Stores the multiplication blob of objects to be transfered to the FP...
Definition: fpga.h:496
void fill_out_data(uint64_t *coeff_poly) override
uint64_t * operand1_in_svm_
Definition: fpga.h:509
FPGAObject_DyadicMultiply & operator=(const FPGAObject_DyadicMultiply &)=delete
uint64_t get_worksize_NTT() const
Definition: fpga.h:302
Struct moduli_info_t.
Definition: fpga.h:30
void fill_in_data(const std::vector< Object * > &objs) override
uint64_t coeff_modulus_
Definition: fpga.h:145
int tag_
Definition: fpga.h:399
uint64_t modulus
Definition: fpga.h:31
Object_DyadicMultiply(uint64_t *results, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, const uint64_t *moduli, uint64_t n_moduli, bool fence=false)
cl_mem results_out_ddr_
Definition: fpga.h:515
uint64_t n_
Definition: fpga.h:548
void fill_in_data(const std::vector< Object * > &objs) override
moduli_info_t * moduli_info_
Definition: fpga.h:511
uint64_t n_
Definition: fpga.h:146
cl_mem k_switch_keys_2_
Definition: fpga.h:576
Struct FPGAObject_KeySwitch Stores the keyswitch blob of objects to be transfered to the FPGA...
Definition: fpga.h:538
uint64_t * inv_root_of_unity_powers_in_svm_
Definition: fpga.h:471
int id_
Definition: fpga.h:119
uint64_t coeff_modulus_
Definition: fpga.h:173
uint64_t get_worksize_INTT() const
Definition: fpga.h:303
uint64_t batch_size_
Definition: fpga.h:401
const uint64_t * modswitch_factors_
Definition: fpga.h:236
uint64_t inv_n_
Definition: fpga.h:174
uint64_t key_modulus_size_
Definition: fpga.h:550
kernel_t type_
Definition: fpga.h:402
uint64_t rns_modulus_size_
Definition: fpga.h:232
void set_worksize_DyadicMultiply(uint64_t ws)
Definition: fpga.h:308
uint64_t n_
Definition: fpga.h:440
Struct Object NTT Stores the Number Theoretic Transform parameters.
Definition: fpga.h:136
uint64_t key_component_count_
Definition: fpga.h:233
uint64_t n_batch_
Definition: fpga.h:400
uint64_t * root_of_unity_powers_in_svm_
Definition: fpga.h:437
const uint64_t * twiddle_factors_
Definition: fpga.h:237
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:470
std::vector< Object * > pop()
Object_KeySwitch(uint64_t *result, const uint64_t *t_target_iter_ptr, uint64_t n, uint64_t decomp_modulus_size, uint64_t key_modulus_size, uint64_t rns_modulus_size, uint64_t key_component_count, const uint64_t *moduli, const uint64_t **k_switch_keys, const uint64_t *modswitch_factors, const uint64_t *twiddle_factors, bool fence=false)
uint64_t inv_n_w_
Definition: fpga.h:175
uint64_t * twiddle_factors_
Definition: fpga.h:556
cl_mem k_switch_keys_1_
Definition: fpga.h:575
static std::atomic< int > g_tag_
Definition: fpga.h:407
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:436
uint64_t * moduli_
Definition: fpga.h:553
void fill_out_data(uint64_t *coeff_poly) override
const uint64_t ** k_switch_keys_
Definition: fpga.h:235
const uint64_t * operand2_
Definition: fpga.h:196
Object * front() const
uint64_t * precon_root_of_unity_powers_in_svm_
Definition: fpga.h:438
Object_NTT(uint64_t *coeff_poly, const uint64_t *root_of_unity_powers, const uint64_t *precon_root_of_unity_powers, uint64_t coeff_modulus, uint64_t n, bool fence=false)
cl_mem operands_in_ddr_
Definition: fpga.h:514
uint64_t ** k_switch_keys_
Definition: fpga.h:554
const cl_context & context_
Definition: fpga.h:398
void push(Object *obj)
Definition: fpga.h:584
uint64_t n_
Definition: fpga.h:176
uint64_t * coeff_poly_
Definition: fpga.h:142
cl_mem k_switch_keys_3_
Definition: fpga.h:577
void fill_out_data(uint64_t *results) override
FPGAObject_INTT & operator=(const FPGAObject_INTT &)=delete
cl_mem mem_t_target_iter_ptr_
Definition: fpga.h:559
const uint64_t * moduli_
Definition: fpga.h:198
uint64_t key_modulus_size_
Definition: fpga.h:231
kernel_t
Definition: fpga.h:97
kernel_t type_
Definition: fpga.h:121
Class DevicePool.
Definition: fpga.h:723
const uint64_t * precon_inv_root_of_unity_powers_
Definition: fpga.h:172
Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply, uint64_t n_batch_ntt, uint64_t n_batch_intt, uint64_t n_batch_KeySwitch)
Definition: fpga.h:275
void detach_fpga_pooling()
detach_fpga_pooling Detach a device from this thread
FPGAObject_KeySwitch(const cl_context &context, uint64_t batch_size)
Struct Object INTT Stores the Inverse Number Theoretic Transform parameters.
Definition: fpga.h:163
FPGAObject_DyadicMultiply(const cl_context &context, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size)
uint64_t * inv_n_w_in_svm_
Definition: fpga.h:475
void fill_out_data(uint64_t *results) override
Struct FPGAObject_INTT stores the INTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:460
Object * back() const
void set_worksize_NTT(uint64_t ws)
Definition: fpga.h:312
virtual void fill_out_data(uint64_t *results)=0
const uint64_t * root_of_unity_powers_
Definition: fpga.h:143
void fill_in_data(const std::vector< Object * > &objs) override
uint64_t get_worksize_DyadicMultiply() const
Definition: fpga.h:299
__extension__ typedef unsigned __int128 fpga_uint128_t
Definition: fpga.h:23
struct Object_KeySwitch Stores the parameters for the keyswitch
Definition: fpga.h:218
KeySwitch_Kernels
Definition: fpga.h:91
const uint64_t * t_target_iter_ptr_
Definition: fpga.h:228
uint64_t decomp_modulus_size_
Definition: fpga.h:549
Struct KeySwitch_moduli_t.
Definition: fpga.h:40
uint64_t * precon_inv_root_of_unity_powers_in_svm_
Definition: fpga.h:472
Device & operator=(const Device &)=delete
std::vector< Object * > in_objs_
Definition: fpga.h:405
FPGAObject_NTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)