8 #include <condition_variable>
14 #include <unordered_map>
17 #include "CL/opencl.h"
84 uint64_t NOT_USED : 40;
87 #define BIT_MASK(BITS) ((1UL << BITS) - 1)
88 #define MAX_RNS_MODULUS_SIZE 7
138 const uint64_t* root_of_unity_powers,
139 const uint64_t* precon_root_of_unity_powers,
140 uint64_t coeff_modulus, uint64_t n,
bool fence =
false);
165 const uint64_t* inv_root_of_unity_powers,
166 const uint64_t* precon_inv_root_of_unity_powers,
167 uint64_t coeff_modulus, uint64_t inv_n,
168 uint64_t inv_n_w, uint64_t n,
bool fence =
false);
190 const uint64_t* operand2, uint64_t n,
191 const uint64_t* moduli, uint64_t n_moduli,
220 uint64_t* result,
const uint64_t* t_target_iter_ptr, uint64_t n,
221 uint64_t decomp_modulus_size, uint64_t key_modulus_size,
222 uint64_t rns_modulus_size, uint64_t key_component_count,
223 const uint64_t* moduli,
const uint64_t** k_switch_keys,
224 const uint64_t* modswitch_factors,
const uint64_t* twiddle_factors,
275 Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply,
276 uint64_t n_batch_ntt, uint64_t n_batch_intt,
277 uint64_t n_batch_KeySwitch)
278 : capacity_(capacity),
279 n_batch_dyadic_multiply_(n_batch_dyadic_multiply),
280 n_batch_ntt_(n_batch_ntt),
281 n_batch_intt_(n_batch_intt),
282 n_batch_KeySwitch_(n_batch_KeySwitch),
283 total_worksize_DyadicMultiply_(1),
284 num_DyadicMultiply_(0),
285 total_worksize_NTT_(1),
287 total_worksize_INTT_(1),
289 total_worksize_KeySwitch_(1),
295 std::vector<Object*>
pop();
300 return total_worksize_DyadicMultiply_;
305 return total_worksize_KeySwitch_;
309 total_worksize_DyadicMultiply_ = ws;
310 num_DyadicMultiply_ = total_worksize_DyadicMultiply_;
313 total_worksize_NTT_ = ws;
314 num_NTT_ = total_worksize_NTT_;
317 total_worksize_INTT_ = ws;
318 num_INTT_ = total_worksize_INTT_;
321 total_worksize_KeySwitch_ = ws;
322 num_KeySwitch_ = total_worksize_KeySwitch_;
326 uint64_t get_worksize_int_DyadicMultiply()
const {
327 return ((num_DyadicMultiply_ > n_batch_dyadic_multiply_)
328 ? n_batch_dyadic_multiply_
329 : num_DyadicMultiply_);
332 uint64_t get_worksize_int_NTT()
const {
333 return ((num_NTT_ > n_batch_ntt_) ? n_batch_ntt_ : num_NTT_);
336 uint64_t get_worksize_int_INTT()
const {
337 return ((num_INTT_ > n_batch_intt_) ? n_batch_intt_ : num_INTT_);
340 uint64_t get_worksize_int_KeySwitch()
const {
341 return ((num_KeySwitch_ > n_batch_KeySwitch_) ? n_batch_KeySwitch_
345 void update_DyadicMultiply_work_size(uint64_t ws) {
346 num_DyadicMultiply_ -= ws;
348 void update_NTT_work_size(uint64_t ws) { num_NTT_ -= ws; }
349 void update_INTT_work_size(uint64_t ws) { num_INTT_ -= ws; }
350 void update_KeySwitch_work_size(uint64_t ws) { num_KeySwitch_ -= ws; }
354 std::condition_variable cond_;
355 std::deque<Object*> buffer_;
356 const uint64_t capacity_;
357 const uint64_t n_batch_dyadic_multiply_;
358 const uint64_t n_batch_ntt_;
359 const uint64_t n_batch_intt_;
360 const uint64_t n_batch_KeySwitch_;
362 uint64_t total_worksize_DyadicMultiply_;
363 uint64_t num_DyadicMultiply_;
365 uint64_t total_worksize_NTT_;
368 uint64_t total_worksize_INTT_;
371 uint64_t total_worksize_KeySwitch_;
372 uint64_t num_KeySwitch_;
390 FPGAObject(
const cl_context& context, uint64_t n_batch,
393 virtual void fill_in_data(
const std::vector<Object*>& objs) = 0;
426 explicit FPGAObject_NTT(
const cl_context& context, uint64_t coeff_count,
427 uint64_t batch_size);
433 void fill_in_data(
const std::vector<Object*>& objs)
override;
461 explicit FPGAObject_INTT(
const cl_context& context, uint64_t coeff_count,
462 uint64_t batch_size);
467 void fill_in_data(
const std::vector<Object*>& objs)
override;
499 uint32_t modulus_size,
500 uint64_t batch_size);
506 void fill_in_data(
const std::vector<Object*>& objs)
override;
540 uint64_t batch_size);
545 void fill_in_data(
const std::vector<Object*>& objs)
override;
564 MAX_KEY_MODULUS_SIZE = 7,
565 MAX_KEY_COMPONENT_SIZE = 2,
566 MAX_COEFF_COUNT = 16384
572 cl_mem k3 =
nullptr);
606 std::shared_future<bool> exit_signal, uint64_t coeff_size,
607 uint32_t modulus_size, uint64_t batch_size_dyadic_multiply,
608 uint64_t batch_size_ntt, uint64_t batch_size_intt,
609 uint64_t batch_size_KeySwitch, uint32_t debug);
618 void process_blocking_api();
619 bool process_input(
int index);
620 bool process_output();
622 bool process_output_dyadic_multiply();
623 bool process_output_NTT();
624 bool process_output_INTT();
625 bool process_output_KeySwitch();
627 void enqueue_input_data(
FPGAObject* fpga_obj);
628 void enqueue_input_data_dyadic_multiply(
634 int device_id() {
return id_; }
641 void KeySwitch_read_output();
643 uint64_t precompute_modulus_k(uint64_t modulus);
646 std::string get_bitstream_name();
648 const cl_device_id& device_;
650 unsigned int credit_;
651 std::shared_future<bool> future_exit_;
653 static int device_id_;
656 std::vector<FPGAObject*> fpgaObjects_;
662 cl_command_queue dyadic_multiply_input_queue_;
663 cl_command_queue dyadic_multiply_output_queue_;
664 cl_kernel dyadic_multiply_input_fifo_kernel_;
665 cl_kernel dyadic_multiply_output_fifo_nb_kernel_;
667 uint64_t* dyadic_multiply_results_out_svm_;
668 int* dyadic_multiply_tag_out_svm_;
669 int* dyadic_multiply_results_out_valid_svm_;
673 cl_command_queue ntt_load_queue_;
674 cl_command_queue ntt_store_queue_;
675 cl_kernel ntt_load_kernel_;
676 cl_kernel ntt_store_kernel_;
678 uint64_t* NTT_coeff_poly_svm_;
681 cl_command_queue intt_load_queue_;
682 cl_command_queue intt_store_queue_;
683 cl_kernel intt_load_kernel_;
684 cl_kernel intt_store_kernel_;
686 uint64_t* INTT_coeff_poly_svm_;
690 cl_mem KeySwitch_mem_root_of_unity_powers_;
693 bool KeySwitch_load_once_;
694 uint64_t* root_of_unity_powers_ptr_;
697 uint64_t KeySwitch_id_;
698 cl_event KeySwitch_events_write_[2][128];
699 cl_event KeySwitch_events_enqueue_[2][2];
700 std::unordered_map<uint64_t**, KeySwitchMemKeys*> keys_map_;
701 std::unordered_map<uint64_t**, KeySwitchMemKeys*>::iterator keys_map_iter_;
704 static const std::unordered_map<std::string, kernel_t> kernels;
726 uint64_t coeff_size, uint32_t modulus_size,
727 uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt,
728 uint64_t batch_size_intt, uint64_t batch_size_KeySwitch,
736 cl_platform_id platform_;
737 cl_uint device_count_;
738 cl_device_id* cl_devices_;
740 std::shared_future<bool> future_exit_;
742 std::vector<std::thread> runners_;
uint64_t n_moduli_
Definition: fpga.h:199
uint64_t n_
Definition: fpga.h:512
bool fence_
Definition: fpga.h:122
uint64_t barr_lo
Definition: fpga.h:33
bool fence_
Definition: fpga.h:403
uint64_t n_
Definition: fpga.h:197
virtual ~FPGAObject()=default
void set_worksize_KeySwitch(uint64_t ws)
Definition: fpga.h:320
Struct FPGAObject_NTT stores the NTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:425
KeySwitchMemKeys(cl_mem k1=nullptr, cl_mem k2=nullptr, cl_mem k3=nullptr)
uint64_t len
Definition: fpga.h:32
const uint64_t * inv_root_of_unity_powers_
Definition: fpga.h:171
Object(kernel_t type=kernel_t::NONE, bool fence=false)
Parent Struct FPGAObject stores the blob of objects to be transfered to the FPGA. ...
Definition: fpga.h:389
uint64_t n_moduli_
Definition: fpga.h:513
uint64_t * modswitch_factors_
Definition: fpga.h:555
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:439
const uint64_t * operand1_
Definition: fpga.h:195
struct Object_DyadicMultiply Stores the parameters for the multiplication
Definition: fpga.h:188
Class Device.
Definition: fpga.h:603
FPGAObject_INTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)
uint64_t decomp_modulus_size_
Definition: fpga.h:230
uint64_t * inv_n_in_svm_
Definition: fpga.h:474
void attach_fpga_pooling()
attach_fpga_pooling Attach a device to this thread
uint64_t * result_
Definition: fpga.h:227
uint64_t key_component_count_
Definition: fpga.h:552
Struct DyadmultKeys1_t.
Definition: fpga.h:55
virtual void fill_in_data(const std::vector< Object * > &objs)=0
uint64_t * coeff_poly_
Definition: fpga.h:170
void fill_in_data(const std::vector< Object * > &objs) override
Object_INTT(uint64_t *coeff_poly, const uint64_t *inv_root_of_unity_powers, const uint64_t *precon_inv_root_of_unity_powers, uint64_t coeff_modulus, uint64_t inv_n, uint64_t inv_n_w, uint64_t n, bool fence=false)
Struct Object.
Definition: fpga.h:113
const uint64_t * moduli_
Definition: fpga.h:234
Struct Buffer Structure containing information for the polynomial operations.
Definition: fpga.h:273
DEV_TYPE
enum DEV_TYPE Lists the available device mode: emulation mode, FPGA
Definition: fpga.h:584
static unsigned int g_wid_
Definition: fpga.h:123
uint64_t n_
Definition: fpga.h:229
FPGAObject_NTT & operator=(const FPGAObject_NTT &)=delete
uint64_t rns_modulus_size_
Definition: fpga.h:551
virtual ~Object()=default
uint64_t get_worksize_KeySwitch() const
Definition: fpga.h:304
const uint64_t * precon_root_of_unity_powers_
Definition: fpga.h:144
uint64_t n_
Definition: fpga.h:476
bool ready_
Definition: fpga.h:118
uint64_t * results_
Definition: fpga.h:194
cl_mem mem_KeySwitch_results_
Definition: fpga.h:560
DevicePool(int choice, Buffer &buffer, std::future< bool > &exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint64_t batch_size_KeySwitch, uint32_t debug)
FPGAObject(const cl_context &context, uint64_t n_batch, kernel_t type=kernel_t::NONE, bool fence=false)
FPGAObject_KeySwitch & operator=(const FPGAObject_KeySwitch &)=delete
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:473
Struct KeySwitch_invn_t.
Definition: fpga.h:48
Device(const cl_device_id &device, Buffer &buffer, std::shared_future< bool > exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint64_t batch_size_KeySwitch, uint32_t debug)
uint64_t * operand2_in_svm_
Definition: fpga.h:510
void set_worksize_INTT(uint64_t ws)
Definition: fpga.h:316
uint64_t * ms_output_
Definition: fpga.h:557
Struct FPGAObject_DyadicMultiply Stores the multiplication blob of objects to be transfered to the FP...
Definition: fpga.h:496
void fill_out_data(uint64_t *coeff_poly) override
uint64_t * operand1_in_svm_
Definition: fpga.h:509
FPGAObject_DyadicMultiply & operator=(const FPGAObject_DyadicMultiply &)=delete
uint64_t get_worksize_NTT() const
Definition: fpga.h:302
Struct moduli_info_t.
Definition: fpga.h:30
void fill_in_data(const std::vector< Object * > &objs) override
uint64_t coeff_modulus_
Definition: fpga.h:145
int tag_
Definition: fpga.h:399
uint64_t modulus
Definition: fpga.h:31
Object_DyadicMultiply(uint64_t *results, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, const uint64_t *moduli, uint64_t n_moduli, bool fence=false)
cl_mem results_out_ddr_
Definition: fpga.h:515
uint64_t n_
Definition: fpga.h:548
void fill_in_data(const std::vector< Object * > &objs) override
moduli_info_t * moduli_info_
Definition: fpga.h:511
uint64_t n_
Definition: fpga.h:146
cl_mem k_switch_keys_2_
Definition: fpga.h:576
Struct FPGAObject_KeySwitch Stores the keyswitch blob of objects to be transfered to the FPGA...
Definition: fpga.h:538
uint64_t * inv_root_of_unity_powers_in_svm_
Definition: fpga.h:471
int id_
Definition: fpga.h:119
uint64_t coeff_modulus_
Definition: fpga.h:173
uint64_t get_worksize_INTT() const
Definition: fpga.h:303
uint64_t batch_size_
Definition: fpga.h:401
const uint64_t * modswitch_factors_
Definition: fpga.h:236
uint64_t inv_n_
Definition: fpga.h:174
uint64_t key_modulus_size_
Definition: fpga.h:550
~FPGAObject_DyadicMultiply()
kernel_t type_
Definition: fpga.h:402
uint64_t rns_modulus_size_
Definition: fpga.h:232
void set_worksize_DyadicMultiply(uint64_t ws)
Definition: fpga.h:308
uint64_t n_
Definition: fpga.h:440
Struct Object NTT Stores the Number Theoretic Transform parameters.
Definition: fpga.h:136
uint64_t key_component_count_
Definition: fpga.h:233
uint64_t n_batch_
Definition: fpga.h:400
uint64_t * root_of_unity_powers_in_svm_
Definition: fpga.h:437
const uint64_t * twiddle_factors_
Definition: fpga.h:237
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:470
std::vector< Object * > pop()
Object_KeySwitch(uint64_t *result, const uint64_t *t_target_iter_ptr, uint64_t n, uint64_t decomp_modulus_size, uint64_t key_modulus_size, uint64_t rns_modulus_size, uint64_t key_component_count, const uint64_t *moduli, const uint64_t **k_switch_keys, const uint64_t *modswitch_factors, const uint64_t *twiddle_factors, bool fence=false)
uint64_t inv_n_w_
Definition: fpga.h:175
uint64_t * twiddle_factors_
Definition: fpga.h:556
cl_mem k_switch_keys_1_
Definition: fpga.h:575
static std::atomic< int > g_tag_
Definition: fpga.h:407
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:436
uint64_t * moduli_
Definition: fpga.h:553
void fill_out_data(uint64_t *coeff_poly) override
const uint64_t ** k_switch_keys_
Definition: fpga.h:235
const uint64_t * operand2_
Definition: fpga.h:196
uint64_t * precon_root_of_unity_powers_in_svm_
Definition: fpga.h:438
Object_NTT(uint64_t *coeff_poly, const uint64_t *root_of_unity_powers, const uint64_t *precon_root_of_unity_powers, uint64_t coeff_modulus, uint64_t n, bool fence=false)
cl_mem operands_in_ddr_
Definition: fpga.h:514
uint64_t ** k_switch_keys_
Definition: fpga.h:554
const cl_context & context_
Definition: fpga.h:398
uint64_t n_
Definition: fpga.h:176
uint64_t * coeff_poly_
Definition: fpga.h:142
cl_mem k_switch_keys_3_
Definition: fpga.h:577
void fill_out_data(uint64_t *results) override
FPGAObject_INTT & operator=(const FPGAObject_INTT &)=delete
cl_mem mem_t_target_iter_ptr_
Definition: fpga.h:559
const uint64_t * moduli_
Definition: fpga.h:198
uint64_t key_modulus_size_
Definition: fpga.h:231
kernel_t
Definition: fpga.h:97
kernel_t type_
Definition: fpga.h:121
Class DevicePool.
Definition: fpga.h:723
const uint64_t * precon_inv_root_of_unity_powers_
Definition: fpga.h:172
Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply, uint64_t n_batch_ntt, uint64_t n_batch_intt, uint64_t n_batch_KeySwitch)
Definition: fpga.h:275
void detach_fpga_pooling()
detach_fpga_pooling Detach a device from this thread
FPGAObject_KeySwitch(const cl_context &context, uint64_t batch_size)
Struct Object INTT Stores the Inverse Number Theoretic Transform parameters.
Definition: fpga.h:163
FPGAObject_DyadicMultiply(const cl_context &context, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size)
uint64_t * inv_n_w_in_svm_
Definition: fpga.h:475
void fill_out_data(uint64_t *results) override
Struct FPGAObject_INTT stores the INTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:460
void set_worksize_NTT(uint64_t ws)
Definition: fpga.h:312
virtual void fill_out_data(uint64_t *results)=0
const uint64_t * root_of_unity_powers_
Definition: fpga.h:143
void fill_in_data(const std::vector< Object * > &objs) override
uint64_t get_worksize_DyadicMultiply() const
Definition: fpga.h:299
__extension__ typedef unsigned __int128 fpga_uint128_t
Definition: fpga.h:23
struct Object_KeySwitch Stores the parameters for the keyswitch
Definition: fpga.h:218
KeySwitch_Kernels
Definition: fpga.h:91
const uint64_t * t_target_iter_ptr_
Definition: fpga.h:228
uint64_t decomp_modulus_size_
Definition: fpga.h:549
Struct KeySwitch_moduli_t.
Definition: fpga.h:40
uint64_t * precon_inv_root_of_unity_powers_in_svm_
Definition: fpga.h:472
Device & operator=(const Device &)=delete
std::vector< Object * > in_objs_
Definition: fpga.h:405
FPGAObject_NTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)