YASK
Yet Another Stencil Kit: a software framework for creating HPC stencil code. Copyright 2014-2023 Intel Corporation.
|
Stencil solution as defined by the generated code from the YASK stencil compiler. More...
#include <yk_solution_api.hpp>
Public Types | |
typedef std::function< void(yk_solution &)> | hook_fn_t |
[Advanced] Callback type with yk_solution parameter. | |
typedef std::function< void(yk_solution &soln, idx_t first_step_index, idx_t last_step_index)> | hook_fn_2idx_t |
[Advanced] Callback type with yk_solution and step-index parameters. | |
Public Member Functions | |
virtual const std::string & | get_name () const =0 |
Get the name of the solution. | |
virtual const std::string & | get_description () const =0 |
Get the description (long name) of the solution. | |
virtual std::string | get_target () const =0 |
Get the target ISA. | |
virtual bool | is_offloaded () const =0 |
Get whether the stencil kernel will be offloaded to a device. | |
virtual int | get_element_bytes () const =0 |
Get the floating-point precision size. | |
virtual std::string | get_step_dim_name () const =0 |
Get the solution step dimension. | |
virtual int | get_num_domain_dims () const =0 |
Get the number of domain dimensions used in this solution. | |
virtual string_vec | get_domain_dim_names () const =0 |
Get all the domain dimension names. | |
virtual string_vec | get_misc_dim_names () const =0 |
Get all the miscellaneous dimension names. | |
virtual void | set_rank_domain_size (const std::string &dim, idx_t size)=0 |
Set the local-domain size in the specified dimension, i.e., the size of the part of the domain that is in this rank. | |
virtual void | set_rank_domain_size_vec (const idx_t_vec &vals)=0 |
Set the local-domain size in all domain dimensions. | |
virtual void | set_rank_domain_size_vec (const idx_t_init_list &vals)=0 |
Set the local-domain size in all domain dimensions. | |
virtual idx_t | get_rank_domain_size (const std::string &dim) const =0 |
Get the local-domain size in the specified dimension, i.e., the size in this rank. | |
virtual idx_t_vec | get_rank_domain_size_vec () const =0 |
Get the local-domain size in all domain dimensions. | |
virtual void | set_overall_domain_size (const std::string &dim, idx_t size)=0 |
Get the global-domain size in the specified dimension, i.e., the total size across all MPI ranks. | |
virtual void | set_overall_domain_size_vec (const idx_t_vec &vals)=0 |
Set the global-domain size in all domain dimensions. | |
virtual void | set_overall_domain_size_vec (const idx_t_init_list &vals)=0 |
Set the global-domain size in all domain dimensions. | |
virtual idx_t | get_overall_domain_size (const std::string &dim) const =0 |
Get the global-domain size in the specified dimension, i.e., the total size across all MPI ranks. | |
virtual idx_t_vec | get_overall_domain_size_vec () const =0 |
Get the global-domain size in all domain dimensions. | |
virtual void | set_block_size (const std::string &dim, idx_t size)=0 |
Set the block size in the given dimension. | |
virtual void | set_block_size_vec (const idx_t_vec &vals)=0 |
Set the block size in all domain dimensions. | |
virtual void | set_block_size_vec (const idx_t_init_list &vals)=0 |
Set the block size in all domain dimensions. | |
virtual idx_t | get_block_size (const std::string &dim) const =0 |
Get the block size. | |
virtual idx_t_vec | get_block_size_vec () const =0 |
Get the block size in all domain dimensions. | |
virtual void | set_num_ranks (const std::string &dim, idx_t num)=0 |
Set the number of MPI ranks in the given dimension. | |
virtual void | set_num_ranks_vec (const idx_t_vec &vals)=0 |
Set the number of MPI ranks in all domain dimensions. | |
virtual void | set_num_ranks_vec (const idx_t_init_list &vals)=0 |
Set the number of all MPI ranks in all domain dimensions. | |
virtual idx_t | get_num_ranks (const std::string &dim) const =0 |
Get the number of MPI ranks in the given dimension. | |
virtual idx_t_vec | get_num_ranks_vec () const =0 |
Get the number of MPI ranks in all domain dimensions. | |
virtual void | set_rank_index (const std::string &dim, idx_t num)=0 |
Set the rank index in the specified dimension. | |
virtual void | set_rank_index_vec (const idx_t_vec &vals)=0 |
Set the rank index in all domain dimensions. | |
virtual void | set_rank_index_vec (const idx_t_init_list &vals)=0 |
Set the rank index in all domain dimensions. | |
virtual idx_t | get_rank_index (const std::string &dim) const =0 |
Get the rank index in the specified dimension. | |
virtual idx_t_vec | get_rank_index_vec () const =0 |
Get the rank index in all domain dimensions. | |
virtual int | get_num_outer_threads () const =0 |
Get the number of outer OpenMP threads. | |
virtual int | get_num_inner_threads () const =0 |
Get the number of inner (nested) OpenMP threads. | |
virtual std::string | apply_command_line_options (const std::string &args)=0 |
Set kernel options from a string. | |
virtual std::string | apply_command_line_options (int argc, char *argv[])=0 |
Set kernel options from standard C or C++ argc and argv parameters to main() . | |
virtual std::string | apply_command_line_options (const string_vec &args)=0 |
Set kernel options from a vector of strings. | |
virtual std::string | get_command_line_help ()=0 |
Return a help-string for the command-line options. | |
virtual std::string | get_command_line_values ()=0 |
Return a description of the current settings of the command-line options. | |
virtual int | get_num_vars () const =0 |
Get the number of vars in the solution. | |
virtual yk_var_ptr | get_var (const std::string &name)=0 |
Get the specified var. | |
virtual std::vector< yk_var_ptr > | get_vars ()=0 |
Get all the vars. | |
virtual void | prepare_solution ()=0 |
Prepare the solution for stencil application. | |
virtual idx_t | get_first_rank_domain_index (const std::string &dim) const =0 |
Get the first index of the sub-domain in this rank in the specified dimension. | |
virtual idx_t_vec | get_first_rank_domain_index_vec () const =0 |
Get the first index of the sub-domain in this rank in all domain dimensions. | |
virtual idx_t | get_last_rank_domain_index (const std::string &dim) const =0 |
Get the last index of the sub-domain in this rank the specified dimension. | |
virtual idx_t_vec | get_last_rank_domain_index_vec () const =0 |
Get the last index of the sub-domain in this rank in all domain dimensions. | |
virtual void | run_solution (idx_t first_step_index, idx_t last_step_index)=0 |
Run the stencil solution for the specified steps. | |
virtual void | run_solution (idx_t step_index)=0 |
Run the stencil solution for the specified step. | |
virtual void | copy_vars_to_device () const =0 |
Update data on the device. | |
virtual void | copy_vars_from_device () const =0 |
Update data on the host. | |
virtual void | exchange_halos ()=0 |
Force a halo exchange now. | |
virtual void | end_solution ()=0 |
Finish using a solution. | |
virtual yk_stats_ptr | get_stats ()=0 |
Get performance statistics associated with preceding calls to run_solution(). | |
virtual void | clear_stats ()=0 |
Clear the internal stats. | |
virtual void | reset_auto_tuner (bool enable, bool verbose=false)=0 |
Start or stop the online auto-tuner on this rank. | |
virtual bool | is_auto_tuner_enabled () const =0 |
Determine whether the online auto-tuner is enabled on this rank. | |
virtual void | run_auto_tuner_now (bool verbose=true)=0 |
Run the offline auto-tuner immediately, not preserving variable data. | |
virtual void | set_min_pad_size (const std::string &dim, idx_t size)=0 |
[Advanced] Set the minimum amount of padding for all vars. | |
virtual idx_t | get_min_pad_size (const std::string &dim) const =0 |
[Advanced] Get the minimum requested amount of padding for all vars. | |
virtual yk_var_ptr | new_var (const std::string &name, const string_vec &dims)=0 |
[Advanced] Add a new var to the solution. | |
virtual yk_var_ptr | new_var (const std::string &name, const std::initializer_list< std::string > &dims)=0 |
[Advanced] Add a new var to the solution. | |
virtual yk_var_ptr | new_fixed_size_var (const std::string &name, const string_vec &dims, const idx_t_vec &dim_sizes)=0 |
[Advanced] Add a new var to the solution with a specified size. | |
virtual yk_var_ptr | new_fixed_size_var (const std::string &name, const std::initializer_list< std::string > &dims, const idx_t_init_list &dim_sizes)=0 |
[Advanced] Add a new var to the solution with a specified size. | |
virtual bool | set_default_numa_preferred (int numa_node)=0 |
[Advanced] Set the default preferred NUMA node on which to allocate data. | |
virtual int | get_default_numa_preferred () const =0 |
[Advanced] Get the default preferred NUMA node on which to allocate data. | |
virtual void | call_before_prepare_solution (hook_fn_t hook_fn)=0 |
[Advanced] Register a function to be called at the beginning of yk_solution::prepare_solution(). | |
virtual void | call_after_prepare_solution (hook_fn_t hook_fn)=0 |
[Advanced] Register a hook function to be called at the end of yk_solution::prepare_solution(). | |
virtual void | call_before_run_solution (hook_fn_2idx_t hook_fn)=0 |
[Advanced] Register a hook function to be called at the beginning of yk_solution::run_solution(). | |
virtual void | call_after_run_solution (hook_fn_2idx_t hook_fn)=0 |
[Advanced] Register a hook function to be called at the end of yk_solution::run_solution(). | |
virtual void | fuse_vars (yk_solution_ptr source)=0 |
[Advanced] Merge YASK variables with another solution. | |
virtual void | set_step_wrap (bool do_wrap)=0 |
[Advanced] Set whether invalid step indices alias to valid ones. | |
virtual bool | get_step_wrap () const =0 |
[Advanced] Get whether invalid step indices alias to valid ones. | |
virtual YASK_DEPRECATED void | set_debug_output (yask_output_ptr debug)=0 |
[Deprecated] Use yk_env::set_debug_output(). | |
YASK_DEPRECATED int | get_num_grids () const |
[Deprecated] Use get_num_vars(). | |
YASK_DEPRECATED yk_var_ptr | get_grid (const std::string &name) |
[Deprecated] Use get_var(). | |
YASK_DEPRECATED std::vector< yk_var_ptr > | get_grids () |
[Deprecated] Use get_vars(). | |
YASK_DEPRECATED yk_var_ptr | new_grid (const std::string &name, const string_vec &dims) |
[Deprecated] Use new_var(). | |
YASK_DEPRECATED yk_var_ptr | new_grid (const std::string &name, const std::initializer_list< std::string > &dims) |
[Deprecated] Use new_var(). | |
YASK_DEPRECATED yk_var_ptr | new_fixed_size_grid (const std::string &name, const string_vec &dims, const idx_t_vec &dim_sizes) |
[Deprecated] Use new_fixed_size_var(). | |
YASK_DEPRECATED yk_var_ptr | new_fixed_size_grid (const std::string &name, const std::initializer_list< std::string > &dims, const idx_t_vec &dim_sizes) |
[Deprecated] Use new_fixed_size_var(). | |
YASK_DEPRECATED void | fuse_grids (yk_solution_ptr source) |
[Deprecated] Use fuse_vars(). | |
Stencil solution as defined by the generated code from the YASK stencil compiler.
Objects of this type contain all the vars and equations that comprise a solution.
Created via yk_factory::new_solution().
|
pure virtual |
Get the name of the solution.
|
pure virtual |
Get the description (long name) of the solution.
|
pure virtual |
Get the target ISA.
|
pure virtual |
Get whether the stencil kernel will be offloaded to a device.
|
pure virtual |
Get the floating-point precision size.
|
pure virtual |
Get the solution step dimension.
|
pure virtual |
Get the number of domain dimensions used in this solution.
The domain dimensions are those over which the stencil is applied in each step. Does not include the step dimension or any miscellaneous dimensions.
|
pure virtual |
Get all the domain dimension names.
|
pure virtual |
Get all the miscellaneous dimension names.
|
pure virtual |
Set the local-domain size in the specified dimension, i.e., the size of the part of the domain that is in this rank.
The domain defines the number of elements that will be evaluated with the stencil(s). If MPI is not enabled, this is equivalent to the global-domain size. If MPI is enabled, this is the domain size for the current rank only, and the global-domain size is the sum of all local-domain sizes in each dimension. The local-domain size in each rank does not have to be the same, but all local-domains in the same column of ranks must have the same width, all local-domains in the same row must have the same height, and so forth, for each domain dimension. The local-domain size does not include the halo area or any padding. For best performance, set the local-domain size to a multiple of the number of elements in a vector in each dimension.
You should set either the local-domain size or the global-domain size in each dimension; the other should be set to zero (unspecified). The unspecified (zero) sizes will be calculated based on the specified ones when prepare_solution() is called.
See the "Detailed Description" for yk_var for more information on var sizes.
[in] | dim | Name of dimension to set. Must be one of the names from get_domain_dim_names(). |
[in] | size | Elements in the domain in this dim . |
|
pure virtual |
Set the local-domain size in all domain dimensions.
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Set the local-domain size in all domain dimensions.
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Get the local-domain size in the specified dimension, i.e., the size in this rank.
See documentation for set_rank_domain_size().
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the local-domain size in all domain dimensions.
|
pure virtual |
Get the global-domain size in the specified dimension, i.e., the total size across all MPI ranks.
You should set either the local-domain size or the global-domain size in each dimension; the other should be set to zero (unspecified). The unspecified (zero) sizes will be calculated based on the specified ones when prepare_solution() is called.
See documentation for set_rank_domain_size(). See the "Detailed Description" for yk_var for more information on var sizes.
[in] | dim | Name of dimension to set. Must be one of the names from get_domain_dim_names(). |
[in] | size | Elements in the domain in this dim . |
|
pure virtual |
Set the global-domain size in all domain dimensions.
See set_overall_domain_size().
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Set the global-domain size in all domain dimensions.
See set_overall_domain_size().
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Get the global-domain size in the specified dimension, i.e., the total size across all MPI ranks.
The global-domain indices in the specified dimension will range from zero (0) to get_overall_domain_size() - 1, inclusive. Call get_first_rank_domain_index() and get_last_rank_domain_index() to find the subset of this domain in each rank.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the global-domain size in all domain dimensions.
See get_overall_domain_size().
|
pure virtual |
Set the block size in the given dimension.
This sets the approximate number of elements that are evaluated in each "block". This is a performance setting and should not affect the functional correctness or total number of elements evaluated. A block is typically the unit of work done by a top-level OpenMP thread. The actual number of elements evaluated in a block may be greater than the specified size due to rounding up to vector sizes. The number of elements in a block may also be smaller than the specified size when the block is at the edge of the domain.
Unless auto-tuning is disabled, the block size will be used as a starting point for an automated search for a higher-performing block size.
This and all other tile sizes (Mega-blocks, blocks, micro-blocks, etc.) can be set via apply_command_line_options(). Only block sizes have a dedicated API.
[in] | dim | Name of dimension to set. Must be one of the names from get_step_dim_name() or get_domain_dim_names(). |
[in] | size | Elements in a block in this dim . |
|
pure virtual |
Set the block size in all domain dimensions.
See set_block_size().
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Set the block size in all domain dimensions.
See set_block_size().
[in] | vals | Elements in all domain dims. |
|
pure virtual |
Get the block size.
Returned value may be slightly larger than the value provided via set_block_size() due to rounding.
[in] | dim | Name of dimension to get. Must be one of the names from get_step_dim_name() or get_domain_dim_names(). |
|
pure virtual |
Get the block size in all domain dimensions.
See get_block_size().
|
pure virtual |
Set the number of MPI ranks in the given dimension.
If set_num_ranks() is set to a non-zero value in all dimensions, then the product of the number of ranks across all dimensions must equal the value returned by yk_env::get_num_ranks(). If the number of ranks is zero in one or more dimensions, those values will be set by a heuristic when prepare_solution() is called.
The curent MPI rank will be assigned a unique location within the overall problem domain based on its MPI rank index. Or, you can set it explicitly via set_rank_index().
The same number of MPI ranks must be set via this API on each constituent MPI rank to ensure a consistent overall configuration. The number of ranks in each dimension must be properly set before calling yk_solution::prepare_solution(). There is no rank setting allowed in the solution-step dimension (usually "t") or in a misc dimension.
In fact, a practical definition of a domain dimension is one that is decomposable across MPI ranks. Specifically, a domain dimension does not have to correspond to a spatial dimension in the physical problem description.
yask_exception | if no legal values are possible given the specified (non-zero) values. |
[in] | dim | Name of dimension to set. Must be one of the names from get_domain_dim_names(). |
[in] | num | Number of ranks in dim . |
|
pure virtual |
Set the number of MPI ranks in all domain dimensions.
See set_num_ranks().
[in] | vals | Number of ranks in all domain dims. |
|
pure virtual |
Set the number of all MPI ranks in all domain dimensions.
See set_num_ranks().
[in] | vals | Number of ranks in all domain dims. |
|
pure virtual |
Get the number of MPI ranks in the given dimension.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the number of MPI ranks in all domain dimensions.
See get_num_ranks();
|
pure virtual |
Set the rank index in the specified dimension.
The overall rank index in the specified dimension must range from zero (0) to get_num_ranks() - 1, inclusive. If you do not call set_rank_index(), a rank index will be assigned when prepare_solution() is called. You should either call set_rank_index() on all ranks or allow YASK to assign on on all ranks, i.e., do not mix-and-match.
Example using 6 MPI ranks in a 2-by-3 x, y domain:
MPI rank index = 0, x rank index = 0, y rank index = 0 | MPI rank index = 1, x rank index = 1, y rank index = 0 |
MPI rank index = 2, x rank index = 0, y rank index = 1 | MPI rank index = 3, x rank index = 1, y rank index = 1 |
MPI rank index = 4, x rank index = 0, y rank index = 2 | MPI rank index = 5, x rank index = 1, y rank index = 2 |
See yk_env::get_num_ranks() and yk_env::get_rank_index() for MPI rank index.
[in] | dim | Name of dimension to set. Must be one of the names from get_domain_dim_names(). |
[in] | num | Rank index in dim . |
|
pure virtual |
Set the rank index in all domain dimensions.
See set_rank_index().
[in] | vals | Index of this rank in all domain dims. |
|
pure virtual |
Set the rank index in all domain dimensions.
See set_rank_index().
[in] | vals | Index of this rank in all domain dims. |
|
pure virtual |
Get the rank index in the specified dimension.
The overall rank index in the specified dimension will range from zero (0) to get_num_ranks() - 1, inclusive.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the rank index in all domain dimensions.
See get_rank_index();
|
pure virtual |
Get the number of outer OpenMP threads.
max_threads
setting.
|
pure virtual |
Get the number of inner (nested) OpenMP threads.
inner_threads
and max_threads
settings.
|
pure virtual |
Set kernel options from a string.
Parses the string for options as if from a command-line. Example: "-bx 64 -inner_threads 4" sets the block-size in the x dimension to 64 and the number of nested OpenMp threads to 4. See the help message from the YASK kernel binary for documentation on the command-line options. Used to set less-common options not directly supported by the APIs above (set_block_size(), etc.).
args
that were not recognized by the parser as options. Thus, a non-empty returned string may be used to signal an error or interpreted by a custom application in another way. [in] | args | String of arguments to parse. |
|
pure virtual |
Set kernel options from standard C or C++ argc
and argv
parameters to main()
.
Discards argv[0]
, which is the executable name. Then, parses the remaining argv
values for options as described in apply_command_line_options() with a string argument.
argv
that were not recognized by the parser as options.
|
pure virtual |
Set kernel options from a vector of strings.
Parses args
values for options as described in apply_command_line_options() with a string argument.
args
that were not recognized by the parser as options.
|
pure virtual |
Return a help-string for the command-line options.
|
pure virtual |
Return a description of the current settings of the command-line options.
If options have been modified from the originally-requrested ones to legal ones, the updated ones will be shown. This occurs most frequently with tile-size options.
|
pure virtual |
Get the number of vars in the solution.
Vars may be pre-defined by the stencil compiler (e.g., via yc_solution::new_var()) or created explicitly via yk_solution::new_var() or yk_solution::new_fixed_size_var().
|
pure virtual |
Get the specified var.
This cannot be used to access scratch vars.
yask_exception | if named var does not exist. |
[in] | name | Name of the var. |
|
pure virtual |
Get all the vars.
|
pure virtual |
Prepare the solution for stencil application.
Calculates the position of each rank in the overall problem domain if not previsouly specified. Calculates the sizes of each rank if not previsously specified. Allocates data in vars that do not already have storage allocated. Sets many other data structures needed for proper stencil application. Since this function initiates MPI communication, it must be called on all MPI ranks, and it will block until all ranks have completed. Must be called before applying any stencils.
|
pure virtual |
Get the first index of the sub-domain in this rank in the specified dimension.
This returns the first overall index at the beginning of the domain in this rank. Elements within the domain in this rank lie between the values returned by get_first_rank_domain_index() and get_last_rank_domain_index(), inclusive. If there is only one MPI rank, this is typically zero (0). If there is more than one MPI rank, the value depends on the the rank's position within the overall problem domain.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the first index of the sub-domain in this rank in all domain dimensions.
See get_first_rank_domain_index().
|
pure virtual |
Get the last index of the sub-domain in this rank the specified dimension.
This returns the last overall index within the domain in this rank (not one past the end). If there is only one MPI rank, this is typically one less than the value provided by set_rank_domain_size(). If there is more than one MPI rank, the value depends on the the rank's position within the overall problem domain. See get_first_rank_domain_index() for more information.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
Get the last index of the sub-domain in this rank in all domain dimensions.
See get_last_rank_domain_index().
|
pure virtual |
Run the stencil solution for the specified steps.
The stencil(s) in the solution are applied to the var data, setting the index variables as follows:
t
for "time") will be sequentially set to values from first_step_index
to last_step_index
, inclusive.t+1
depends on t
, then last_step_index
should be greater than or equal to first_step_index
(forward solution).t-1
depends on t
, then last_step_index
should be less than or equal to first_step_index
(reverse solution).t
for "time") will be sequentially set to values from first_step_index
to last_step_index
, inclusive, within each area configured for temporal tiling.This function should be called only after calling prepare_solution().
Since this function initiates MPI communication, it must be called on all MPI ranks, and it will block until all ranks have completed.
[in] | first_step_index | First index in the step dimension |
[in] | last_step_index | Last index in the step dimension |
|
pure virtual |
Run the stencil solution for the specified step.
This function is simply an alias for run_solution(step_index, step_index)
, i.e., the solution will be applied for exactly one step across the domain.
Typical C++ usage:
As written, the above loop is identical to
[in] | step_index | Index in the step dimension |
|
pure virtual |
Update data on the device.
Copies any YASK var data that has been modified on the host but not on the device from the host to the device.
This is done automatically as needed, so calling this function is only needed when you want to control when the copy is done.
If the kernel has been compiled for offloading using unified shared memory, calling this function will have no effect. Similarly, if the kernel has not been compiled for offloading, calling this function will have no effect.
|
pure virtual |
Update data on the host.
Copies any YASK var data that has been modified on the device but not on the host from the device to the host.
This is done automatically as needed, so calling this function is only needed when you want to control when the copy is done.
If the kernel has been compiled for offloading using unified shared memory, calling this function will have no effect. Similarly, if the kernel has not been compiled for offloading, calling this function will have no effect.
|
pure virtual |
Force a halo exchange now.
Can be used to exchange data between ranks before run_solution() is called.
|
pure virtual |
Finish using a solution.
Releases shared ownership of memory used by the vars. This will result in deallocating each memory block that is not referenced by another shared pointer.
|
pure virtual |
Get performance statistics associated with preceding calls to run_solution().
|
pure virtual |
Start or stop the online auto-tuner on this rank.
This function is used to apply the current best-known settings if the tuner is currently running, reset the state of the auto-tuner, and either restart its search (if enable==true
) or stop it (if enable==false
). This call must be made on each rank where the change is desired.
This mode of running the auto-tuner is called "online" or "in-situ" because changes are made to the tile sizes between calls to run_solution(). It will stop automatically when it converges. Call is_auto_tuner_enabled() to determine if it has converged.
[in] | enable | If true, start or restart the auto-tuner search on this rank. If false, stop the auto-tuner. |
[in] | verbose | If true, print progress information to the debug object set via set_debug_output(). |
|
pure virtual |
Determine whether the online auto-tuner is enabled on this rank.
The "online" or "in-situ" auto-tuner is disabled by default. It can be enabled by calling reset_auto_tuner(true). It will also become disabled after it has converged or after reset_auto_tuner(false) has been called. Auto-tuners run independently on each rank, so they will not generally finish at the same step across all ranks.
|
pure virtual |
Run the offline auto-tuner immediately, not preserving variable data.
This runs the auto-tuner in "offline" mode. (Under "online" operation, an auto-tuner is invoked during calls to run_solution(); see reset_auto_tuner() and is_auto_tuner_enabled() for more information on running in online mode.)
This function causes the stencil solution to be run immediately until the auto-tuner converges on all ranks. It is useful for benchmarking, where performance is to be timed for a given number of steps after the best settings are found. This function should be called only after calling prepare_solution(). This call must be made on each rank.
[in] | verbose | If true, print progress information to the debug object set via set_debug_output(). |
|
pure virtual |
[Advanced] Set the minimum amount of padding for all vars.
This sets the minimum number of elements in each var that is reserved outside of the rank domain in the given dimension. This padding area can be used for required halo areas. At least the specified number of elements will be added to both sides, i.e., both "before" and "after" the domain.
The actual padding size will be the largest of the following values, additionally rounded up based on the vector-folding dimensions, cache-line alignment, and/or extensions needed for wave-front tiles:
Setting the minimum pad size is useful when an application needs to copy data back and forth between YASK vars and legacy C-style arrays that include a certain halo size that may be larger than the halo calculated by the YASK compiler. For example, for a given stencil problem, one or more YASK variables might need a halo of width 2 in the x dimension, but only 1 in the y dimension due to the stencil radii in the respective dimensions. However, an application might have an existing C-style array with halo data of width 2 in both x and y dimensions. By calling set_min_pad_size("y", 2)
, all YASK vars will be created with padding widths of at least 2 in the y dimension, making it easier to copy data to and from the C-style arrays using yk_var::get_elements_in_slice() and yk_var::set_elements_in_slice().
The padding size cannot be changed after data storage has been allocated for a given var; attempted changes to the pad size for such vars will be ignored.
Use yk_var::set_left_min_pad_size and yk_var::set_right_min_pad_size() for individual setting of each var. Call yk_var::get_left_pad_size() and yk_var::get_right_pad_size() to determine the actual padding sizes for a given var. See the "Detailed Description" for yk_var for more information on var sizes. Padding is only allowed in the domain dimensions.
[in] | dim | Name of dimension to set. Must be one of the names from get_domain_dim_names(). |
[in] | size | Elements in this dim applied to both sides of the domain. |
|
pure virtual |
[Advanced] Get the minimum requested amount of padding for all vars.
[in] | dim | Name of dimension to get. Must be one of the names from get_domain_dim_names(). |
|
pure virtual |
[Advanced] Add a new var to the solution.
This is typically not needed because vars used by the stencils are pre-defined by the solution itself via the stencil compiler. However, a var may be created explicitly via this function in order to use it for purposes other than by the pre-defined stencils within the current solution.
Vars created by this function will behave [mostly] like a pre-defined var. For example,
Some behaviors are different from pre-defined vars. For example,
dims
argument. Any dimension name that is not a step or domain dimension will become a misc dimension, whether or not it was defined via yc_node_factory::new_misc_index().If you want a var that is not automatically resized based on the solution settings, use new_fixed_size_var() instead.
[in] | name | Name of the var; must be unique within the solution. |
[in] | dims | List of names of all dimensions. Names must be valid C++ identifiers and not repeated within this var. |
|
pure virtual |
[Advanced] Add a new var to the solution.
See documentation for the version of new_var() with a vector of dimension names as a parameter.
[in] | name | Name of the var; must be unique within the solution. |
[in] | dims | List of names of all dimensions. Names must be valid C++ identifiers and not repeated within this var. |
|
pure virtual |
[Advanced] Add a new var to the solution with a specified size.
This is typically not needed because vars used by the stencils are pre-defined by the solution itself via the stencil compiler. However, a var may be created explicitly via this function in order to use it for purposes other than by the pre-defined stencils within the current solution.
The following behaviors are different from both pre-defined vars and those created via new_var():
The following behaviors are the same as those of a pre-defined var and those created via new_var():
The following behaviors are different than a pre-defined var but the same as those created via new_var():
dims
argument. Any dimension name that is not a step or domain dimension will become a misc dimension, whether or not it was defined via yc_node_factory::new_misc_index().[in] | name | Name of the var; must be unique within the solution. |
[in] | dims | List of names of all dimensions. Names must be valid C++ identifiers and not repeated within this var. |
[in] | dim_sizes | Initial allocation in each dimension. Must be exatly one size for each dimension. |
|
pure virtual |
[Advanced] Add a new var to the solution with a specified size.
See documentation for the version of new_fixed_size_var() with a vector of dimension names as a parameter.
[in] | name | Name of the var; must be unique within the solution. |
[in] | dims | List of names of all dimensions. Names must be valid C++ identifiers and not repeated within this var. |
[in] | dim_sizes | Initial allocation in each dimension. Must be exatly one size for each dimension. |
|
pure virtual |
[Advanced] Set the default preferred NUMA node on which to allocate data.
This value is used when allocating vars and MPI buffers. The NUMA "preferred node allocation" policy is used, meaning that memory will be allocated in an alternative node if the preferred one doesn't have enough space available or is otherwise restricted. Instead of specifying a NUMA node, a special value may be used to specify another policy as listed. This setting may be overridden for any specific var.
true
if NUMA preference was set; false
if NUMA preferences are not enabled. [in] | numa_node | Preferred NUMA node for data allocation. Alternatively, use yask_numa_local for explicit local-node allocation, yask_numa_interleave for interleaving pages across all nodes, or yask_numa_none for no explicit NUMA policy. These constants are defined in the Variable Documentation section of yk_solution_api.hpp. |
|
pure virtual |
[Advanced] Get the default preferred NUMA node on which to allocate data.
|
pure virtual |
[Advanced] Register a function to be called at the beginning of yk_solution::prepare_solution().
A reference to the yk_solution is passed to the hook_fn
.
If this method is called more than once, the hook functions will be called in the order registered.
[in] | hook_fn | callback function |
|
pure virtual |
[Advanced] Register a hook function to be called at the end of yk_solution::prepare_solution().
A reference to the yk_solution is passed to the hook_fn
.
If this method is called more than once, the hook functions will be called in the order registered.
[in] | hook_fn | callback function |
|
pure virtual |
[Advanced] Register a hook function to be called at the beginning of yk_solution::run_solution().
A reference to the yk_solution and the first_step_index
and last_step_index
passed to run_solution() are passed to the hook_fn
.
If this method is called more than once, the hook functions will be called in the order registered.
[in] | hook_fn | callback function |
|
pure virtual |
[Advanced] Register a hook function to be called at the end of yk_solution::run_solution().
A reference to the yk_solution and the first_step_index
and last_step_index
passed to run_solution() are passed to the hook_fn
.
If this method is called more than once, the hook functions will be called in the order registered.
[in] | hook_fn | callback function |
|
pure virtual |
[Advanced] Merge YASK variables with another solution.
Calls yk_var::fuse_vars() for each pair of vars that have the same name in this solution and the source solution. All conditions listed in yk_var::fuse_vars() must hold for each pair.
[in] | source | Solution from which vars will be merged. |
|
pure virtual |
[Advanced] Set whether invalid step indices alias to valid ones.
[in] | do_wrap | Whether to allow any step index. |
|
pure virtual |
[Advanced] Get whether invalid step indices alias to valid ones.