8 #include <condition_variable>
14 #include <unordered_map>
17 #include "CL/opencl.h"
66 const uint64_t* root_of_unity_powers,
67 const uint64_t* precon_root_of_unity_powers,
68 uint64_t coeff_modulus, uint64_t n);
93 const uint64_t* inv_root_of_unity_powers,
94 const uint64_t* precon_inv_root_of_unity_powers,
95 uint64_t coeff_modulus, uint64_t inv_n,
96 uint64_t inv_n_w, uint64_t n);
118 const uint64_t* operand2, uint64_t n,
119 const uint64_t* moduli, uint64_t n_moduli);
157 Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply,
158 uint64_t n_batch_ntt, uint64_t n_batch_intt)
159 : capacity_(capacity),
160 n_batch_dyadic_multiply_(n_batch_dyadic_multiply),
161 n_batch_ntt_(n_batch_ntt),
162 n_batch_intt_(n_batch_intt),
163 total_worksize_DyadicMultiply_(1),
164 num_DyadicMultiply_(0),
165 total_worksize_NTT_(1),
167 total_worksize_INTT_(1),
172 std::vector<Object*>
pop();
177 return total_worksize_DyadicMultiply_;
183 total_worksize_DyadicMultiply_ = ws;
184 num_DyadicMultiply_ = total_worksize_DyadicMultiply_;
187 total_worksize_NTT_ = ws;
188 num_NTT_ = total_worksize_NTT_;
191 total_worksize_INTT_ = ws;
192 num_INTT_ = total_worksize_INTT_;
196 uint64_t get_worksize_int_DyadicMultiply()
const {
197 return ((num_DyadicMultiply_ > n_batch_dyadic_multiply_)
198 ? n_batch_dyadic_multiply_
199 : num_DyadicMultiply_);
202 uint64_t get_worksize_int_NTT()
const {
203 return ((num_NTT_ > n_batch_ntt_) ? n_batch_ntt_ : num_NTT_);
206 uint64_t get_worksize_int_INTT()
const {
207 return ((num_INTT_ > n_batch_intt_) ? n_batch_intt_ : num_INTT_);
210 void update_work_size(uint64_t ws) { num_DyadicMultiply_ -= ws; }
211 void update_DyadicMultiply_work_size(uint64_t ws) {
212 num_DyadicMultiply_ -= ws;
214 void update_NTT_work_size(uint64_t ws) { num_NTT_ -= ws; }
215 void update_INTT_work_size(uint64_t ws) { num_INTT_ -= ws; }
219 std::condition_variable cond_;
220 std::deque<Object*> buffer_;
221 const uint64_t capacity_;
222 const uint64_t n_batch_dyadic_multiply_;
223 const uint64_t n_batch_ntt_;
224 const uint64_t n_batch_intt_;
226 uint64_t total_worksize_DyadicMultiply_;
227 uint64_t num_DyadicMultiply_;
229 uint64_t total_worksize_NTT_;
232 uint64_t total_worksize_INTT_;
251 FPGAObject(
const cl_context& context, uint64_t n_batch);
253 virtual void fill_in_data(
const std::vector<Object*>& objs) = 0;
283 explicit FPGAObject_NTT(
const cl_context& context, uint64_t coeff_count,
284 uint64_t batch_size);
286 void fill_in_data(
const std::vector<Object*>& objs)
override;
314 explicit FPGAObject_INTT(
const cl_context& context, uint64_t coeff_count,
315 uint64_t batch_size);
317 void fill_in_data(
const std::vector<Object*>& objs)
override;
349 uint32_t modulus_size,
350 uint64_t batch_size);
352 void fill_in_data(
const std::vector<Object*>& objs)
override;
388 std::shared_future<bool> exit_signal, uint64_t coeff_size,
389 uint32_t modulus_size, uint64_t batch_size_dyadic_multiply,
390 uint64_t batch_size_ntt, uint64_t batch_size_intt, uint32_t debug);
399 void process_blocking_api();
400 bool process_input(
int index);
401 bool process_output();
403 bool process_output_dyadic_multiply();
404 bool process_output_NTT();
405 bool process_output_INTT();
407 void enqueue_input_data(
FPGAObject* fpga_obj);
408 void enqueue_input_data_dyadic_multiply(
413 int device_id() {
return id_; }
415 kernel_t get_kernel_type();
416 std::string get_bitstream_name();
418 const cl_device_id& device_;
420 unsigned int credit_;
421 std::shared_future<bool> future_exit_;
423 static int device_id_;
424 kernel_t kernel_type_;
426 std::vector<FPGAObject*> fpgaObjects_;
432 cl_command_queue dyadic_multiply_input_queue_;
433 cl_command_queue dyadic_multiply_output_queue_;
434 cl_kernel dyadic_multiply_input_fifo_kernel_;
435 cl_kernel dyadic_multiply_output_fifo_nb_kernel_;
437 uint64_t* dyadic_multiply_results_out_svm_;
438 int* dyadic_multiply_tag_out_svm_;
439 int* dyadic_multiply_results_out_valid_svm_;
443 cl_command_queue ntt_load_queue_;
444 cl_command_queue ntt_store_queue_;
445 cl_kernel ntt_load_kernel_;
446 cl_kernel ntt_store_kernel_;
448 uint64_t* NTT_coeff_poly_svm_;
451 cl_command_queue intt_INTT_queue_;
452 cl_command_queue intt_load_queue_;
453 cl_command_queue intt_store_queue_;
454 cl_kernel intt_INTT_kernel_;
455 cl_kernel intt_load_kernel_;
456 cl_kernel intt_store_kernel_;
458 uint64_t* INTT_coeff_poly_svm_;
463 static const std::unordered_map<std::string, kernel_t> kernels;
483 uint64_t coeff_size, uint32_t modulus_size,
484 uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt,
485 uint64_t batch_size_intt, uint32_t debug);
492 cl_platform_id platform_;
493 cl_uint device_count_;
494 cl_device_id* cl_devices_;
496 std::shared_future<bool> future_exit_;
498 std::vector<std::thread> runners_;
uint64_t n_moduli_
Definition: fpga.h:126
uint64_t n_
Definition: fpga.h:358
uint64_t barr_lo
Definition: fpga.h:33
uint64_t n_
Definition: fpga.h:124
virtual ~FPGAObject()=default
Struct FPGAObject_NTT stores the NTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:282
uint64_t len
Definition: fpga.h:32
const uint64_t * inv_root_of_unity_powers_
Definition: fpga.h:99
Object_DyadicMultiply(uint64_t *results, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, const uint64_t *moduli, uint64_t n_moduli)
Parent Struct FPGAObject stores the blob of objects to be transfered to the FPGA. ...
Definition: fpga.h:250
uint64_t n_moduli_
Definition: fpga.h:359
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:292
const uint64_t * operand1_
Definition: fpga.h:122
struct Object_DyadicMultiply Stores the parameters for the multiplication
Definition: fpga.h:116
Class Device.
Definition: fpga.h:385
FPGAObject_INTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)
FPGAObject(const cl_context &context, uint64_t n_batch)
uint64_t * inv_n_in_svm_
Definition: fpga.h:324
void attach_fpga_pooling()
attach_fpga_pooling Attach a device to this thread
virtual void fill_in_data(const std::vector< Object * > &objs)=0
uint64_t * coeff_poly_
Definition: fpga.h:98
void fill_in_data(const std::vector< Object * > &objs) override
Struct Object.
Definition: fpga.h:44
Struct Buffer Structure containing information for the polynomial operations.
Definition: fpga.h:155
DEV_TYPE
enum DEV_TYPE Lists the available device mode: CPU, emulation mode, FPGA
Definition: fpga.h:367
static unsigned int g_wid_
Definition: fpga.h:51
virtual ~Object()=default
const uint64_t * precon_root_of_unity_powers_
Definition: fpga.h:72
Device(const cl_device_id &device, Buffer &buffer, std::shared_future< bool > exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint32_t debug)
uint64_t n_
Definition: fpga.h:326
bool ready_
Definition: fpga.h:49
uint64_t * results_
Definition: fpga.h:121
uint64_t * coeff_modulus_in_svm_
Definition: fpga.h:323
Object_INTT(uint64_t *coeff_poly, const uint64_t *inv_root_of_unity_powers, const uint64_t *precon_inv_root_of_unity_powers, uint64_t coeff_modulus, uint64_t inv_n, uint64_t inv_n_w, uint64_t n)
uint64_t * operand2_in_svm_
Definition: fpga.h:356
void set_worksize_INTT(uint64_t ws)
Definition: fpga.h:190
Struct FPGAObject_DyadicMultiply Stores the multiplication blob of objects to be transfered to the FP...
Definition: fpga.h:346
void fill_out_data(uint64_t *coeff_poly) override
uint64_t * operand1_in_svm_
Definition: fpga.h:355
uint64_t get_worksize_NTT() const
Definition: fpga.h:179
Struct moduli_info_t.
Definition: fpga.h:30
uint64_t coeff_modulus_
Definition: fpga.h:73
int tag_
Definition: fpga.h:259
uint64_t modulus
Definition: fpga.h:31
cl_mem results_out_ddr_
Definition: fpga.h:361
void fill_in_data(const std::vector< Object * > &objs) override
moduli_info_t * moduli_info_
Definition: fpga.h:357
uint64_t n_
Definition: fpga.h:74
uint64_t * inv_root_of_unity_powers_in_svm_
Definition: fpga.h:321
int id_
Definition: fpga.h:50
uint64_t coeff_modulus_
Definition: fpga.h:101
uint64_t get_worksize_INTT() const
Definition: fpga.h:180
uint64_t inv_n_
Definition: fpga.h:102
~FPGAObject_DyadicMultiply()
void set_worksize_DyadicMultiply(uint64_t ws)
Definition: fpga.h:182
uint64_t n_
Definition: fpga.h:293
Struct Object NTT Stores the Number Theoretic Transform parameters.
Definition: fpga.h:64
uint64_t n_batch_
Definition: fpga.h:260
uint64_t * root_of_unity_powers_in_svm_
Definition: fpga.h:290
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:320
std::vector< Object * > pop()
uint64_t inv_n_w_
Definition: fpga.h:103
static std::atomic< int > g_tag_
Definition: fpga.h:264
uint64_t * coeff_poly_in_svm_
Definition: fpga.h:289
void fill_out_data(uint64_t *coeff_poly) override
const uint64_t * operand2_
Definition: fpga.h:123
Object_NTT(uint64_t *coeff_poly, const uint64_t *root_of_unity_powers, const uint64_t *precon_root_of_unity_powers, uint64_t coeff_modulus, uint64_t n)
uint64_t * precon_root_of_unity_powers_in_svm_
Definition: fpga.h:291
cl_mem operands_in_ddr_
Definition: fpga.h:360
const cl_context & context_
Definition: fpga.h:258
uint64_t n_
Definition: fpga.h:104
uint64_t * coeff_poly_
Definition: fpga.h:70
void fill_out_data(uint64_t *results) override
const uint64_t * moduli_
Definition: fpga.h:125
DevicePool(int choice, Buffer &buffer, std::future< bool > &exit_signal, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size_dyadic_multiply, uint64_t batch_size_ntt, uint64_t batch_size_intt, uint32_t debug)
Class DevicePool.
Definition: fpga.h:480
const uint64_t * precon_inv_root_of_unity_powers_
Definition: fpga.h:100
void detach_fpga_pooling()
detach_fpga_pooling Detach a device from this thread
Struct Object INTT Stores the Inverse Number Theoretic Transform parameters.
Definition: fpga.h:91
FPGAObject_DyadicMultiply(const cl_context &context, uint64_t coeff_size, uint32_t modulus_size, uint64_t batch_size)
uint64_t * inv_n_w_in_svm_
Definition: fpga.h:325
Struct FPGAObject_INTT stores the INTT blob of objects to be transfered to the FPGA.
Definition: fpga.h:313
void set_worksize_NTT(uint64_t ws)
Definition: fpga.h:186
virtual void fill_out_data(uint64_t *results)=0
const uint64_t * root_of_unity_powers_
Definition: fpga.h:71
void fill_in_data(const std::vector< Object * > &objs) override
Buffer(uint64_t capacity, uint64_t n_batch_dyadic_multiply, uint64_t n_batch_ntt, uint64_t n_batch_intt)
Definition: fpga.h:157
uint64_t get_worksize_DyadicMultiply() const
Definition: fpga.h:176
__extension__ typedef unsigned __int128 fpga_uint128_t
Definition: fpga.h:23
uint64_t * precon_inv_root_of_unity_powers_in_svm_
Definition: fpga.h:322
std::vector< Object * > in_objs_
Definition: fpga.h:262
FPGAObject_NTT(const cl_context &context, uint64_t coeff_count, uint64_t batch_size)