Provided by: libfabric-dev_1.17.0-3ubuntu1_amd64 bug

NAME

       fi_atomic - Remote atomic functions

       fi_atomic / fi_atomicv / fi_atomicmsg / fi_inject_atomic
              Initiates an atomic operation to remote memory

       fi_fetch_atomic / fi_fetch_atomicv / fi_fetch_atomicmsg
              Initiates an atomic operation to remote memory, retrieving the initial value.

       fi_compare_atomic / fi_compare_atomicv / fi_compare_atomicmsg
              Initiates  an  atomic  compare-operation  to  remote memory, retrieving the initial
              value.

       fi_atomicvalid  /  fi_fetch_atomicvalid  /  fi_compare_atomicvalid  /  fi_query_atomic   :
       Indicates if a provider supports a specific atomic operation

SYNOPSIS

              #include <rdma/fi_atomic.h>

              ssize_t fi_atomic(struct fid_ep *ep, const void *buf,
                  size_t count, void *desc, fi_addr_t dest_addr,
                  uint64_t addr, uint64_t key,
                  enum fi_datatype datatype, enum fi_op op, void *context);

              ssize_t fi_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
                  void **desc, size_t count, fi_addr_t dest_addr,
                  uint64_t addr, uint64_t key,
                  enum fi_datatype datatype, enum fi_op op, void *context);

              ssize_t fi_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg,
                  uint64_t flags);

              ssize_t fi_inject_atomic(struct fid_ep *ep, const void *buf,
                  size_t count, fi_addr_t dest_addr,
                  uint64_t addr, uint64_t key,
                  enum fi_datatype datatype, enum fi_op op);

              ssize_t fi_fetch_atomic(struct fid_ep *ep, const void *buf,
                  size_t count, void *desc, void *result, void *result_desc,
                  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
                  enum fi_datatype datatype, enum fi_op op, void *context);

              ssize_t fi_fetch_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
                  void **desc, size_t count, struct fi_ioc *resultv,
                  void **result_desc, size_t result_count, fi_addr_t dest_addr,
                  uint64_t addr, uint64_t key, enum fi_datatype datatype,
                  enum fi_op op, void *context);

              ssize_t fi_fetch_atomicmsg(struct fid_ep *ep,
                  const struct fi_msg_atomic *msg, struct fi_ioc *resultv,
                  void **result_desc, size_t result_count, uint64_t flags);

              ssize_t fi_compare_atomic(struct fid_ep *ep, const void *buf,
                  size_t count, void *desc, const void *compare,
                  void *compare_desc, void *result, void *result_desc,
                  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
                  enum fi_datatype datatype, enum fi_op op, void *context);

              size_t fi_compare_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
                     void **desc, size_t count, const struct fi_ioc *comparev,
                     void **compare_desc, size_t compare_count, struct fi_ioc *resultv,
                     void **result_desc, size_t result_count, fi_addr_t dest_addr,
                     uint64_t addr, uint64_t key, enum fi_datatype datatype,
                     enum fi_op op, void *context);

              ssize_t fi_compare_atomicmsg(struct fid_ep *ep,
                  const struct fi_msg_atomic *msg, const struct fi_ioc *comparev,
                  void **compare_desc, size_t compare_count,
                  struct fi_ioc *resultv, void **result_desc, size_t result_count,
                  uint64_t flags);

              int fi_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
                  enum fi_op op, size_t *count);

              int fi_fetch_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
                  enum fi_op op, size_t *count);

              int fi_compare_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
                  enum fi_op op, size_t *count);

              int fi_query_atomic(struct fid_domain *domain,
                  enum fi_datatype datatype, enum fi_op op,
                  struct fi_atomic_attr *attr, uint64_t flags);

ARGUMENTS

       ep     Fabric endpoint on which to initiate atomic operation.

       buf    Local data buffer that specifies first operand of atomic operation

       iov / comparev / resultv
              Vectored data buffer(s).

       count / compare_count / result_count
              Count  of  vectored  data  entries.   The number of elements referenced, where each
              element is the indicated datatype.

       addr   Address of remote memory to access.

       key    Protection key associated with the remote memory.

       datatype
              Datatype associated with atomic operands

       op     Atomic operation to perform

       compare
              Local compare buffer, containing comparison data.

       result Local data buffer to store initial value of remote buffer

       desc / compare_desc / result_desc
              Data descriptor associated with the local data buffer, local  compare  buffer,  and
              local result buffer, respectively.  See fi_mr(3).

       dest_addr
              Destination  address  for  connectionless atomic operations.  Ignored for connected
              endpoints.

       msg    Message descriptor for atomic operations

       flags  Additional flags to apply for the atomic operation

       context
              User specified pointer to associate with the operation.  This parameter is  ignored
              if  the  operation  will  not  generate  a successful completion, unless an op flag
              specifies the context parameter be used for required input.

DESCRIPTION

       Atomic transfers are used to read and update data located in remote memory regions  in  an
       atomic  fashion.   Conceptually,  they are similar to local atomic operations of a similar
       nature (e.g. atomic increment, compare and swap, etc.).  Updates to  remote  data  involve
       one of several operations on the data, and act on specific types of data, as listed below.
       As such, atomic transfers have knowledge of the format of  the  data  being  accessed.   A
       single atomic function may operate across an array of data applying an atomic operation to
       each entry, but the atomicity of an operation is limited to a single datatype or entry.

   Atomic Data Types
       Atomic functions may operate on one of the  following  identified  data  types.   A  given
       atomic function may support any datatype, subject to provider implementation constraints.

       FI_INT8
              Signed 8-bit integer.

       FI_UINT8
              Unsigned 8-bit integer.

       FI_INT16
              Signed 16-bit integer.

       FI_UINT16
              Unsigned 16-bit integer.

       FI_INT32
              Signed 32-bit integer.

       FI_UINT32
              Unsigned 32-bit integer.

       FI_INT64
              Signed 64-bit integer.

       FI_UINT64
              Unsigned 64-bit integer.

       FI_INT128
              Signed 128-bit integer.

       FI_UINT128
              Unsigned 128-bit integer.

       FI_FLOAT
              A single-precision floating point value (IEEE 754).

       FI_DOUBLE
              A double-precision floating point value (IEEE 754).

       FI_FLOAT_COMPLEX
              An  ordered  pair  of  single-precision  floating point values (IEEE 754), with the
              first value representing the real portion  of  a  complex  number  and  the  second
              representing the imaginary portion.

       FI_DOUBLE_COMPLEX
              An  ordered  pair  of  double-precision  floating point values (IEEE 754), with the
              first value representing the real portion  of  a  complex  number  and  the  second
              representing the imaginary portion.

       FI_LONG_DOUBLE
              A double-extended precision floating point value (IEEE 754).  Note that the size of
              a long double and number of bits used for precision is compiler,  platform,  and/or
              provider specific.  Developers that use long double should ensure that libfabric is
              built using a long double format that is compatible  with  their  application,  and
              that  format  is supported by the provider.  The mechanism used for this validation
              is currently beyond the scope of the libfabric API.

       FI_LONG_DOUBLE_COMPLEX
              An ordered pair of double-extended precision floating point values (IEEE 754), with
              the  first  value  representing the real portion of a complex number and the second
              representing the imaginary portion.

   Atomic Operations
       The following atomic operations are defined.  An atomic operation  often  acts  against  a
       target  value  in  the  remote  memory  buffer  and  source value provided with the atomic
       function.  It may also carry source data to replace the target value in compare  and  swap
       operations.  A conceptual description of each operation is provided.

       FI_MIN Minimum

              if (buf[i] < addr[i])
                  addr[i] = buf[i]

       FI_MAX Maximum

              if (buf[i] > addr[i])
                  addr[i] = buf[i]

       FI_SUM Sum

              addr[i] = addr[i] + buf[i]

       FI_PROD
              Product

              addr[i] = addr[i] * buf[i]

       FI_LOR Logical OR

              addr[i] = (addr[i] || buf[i])

       FI_LAND
              Logical AND

              addr[i] = (addr[i] && buf[i])

       FI_BOR Bitwise OR

              addr[i] = addr[i] | buf[i]

       FI_BAND
              Bitwise AND

              addr[i] = addr[i] & buf[i]

       FI_LXOR
              Logical exclusive-OR (XOR)

              addr[i] = ((addr[i] && !buf[i]) || (!addr[i] && buf[i]))

       FI_BXOR
              Bitwise exclusive-OR (XOR)

              addr[i] = addr[i] ^ buf[i]

       FI_ATOMIC_READ
              Read data atomically

              result[i] = addr[i]

       FI_ATOMIC_WRITE
              Write data atomically

              addr[i] = buf[i]

       FI_CSWAP
              Compare values and if equal swap with data

              if (compare[i] == addr[i])
                  addr[i] = buf[i]

       FI_CSWAP_NE
              Compare values and if not equal swap with data

              if (compare[i] != addr[i])
                  addr[i] = buf[i]

       FI_CSWAP_LE
              Compare values and if less than or equal swap with data

              if (compare[i] <= addr[i])
                  addr[i] = buf[i]

       FI_CSWAP_LT
              Compare values and if less than swap with data

              if (compare[i] < addr[i])
                  addr[i] = buf[i]

       FI_CSWAP_GE
              Compare values and if greater than or equal swap with data

              if (compare[i] >= addr[i])
                  addr[i] = buf[i]

       FI_CSWAP_GT
              Compare values and if greater than swap with data

              if (compare[i] > addr[i])
                  addr[i] = buf[i]

       FI_MSWAP
              Swap masked bits with data

              addr[i] = (buf[i] & compare[i]) | (addr[i] & ~compare[i])

   Base Atomic Functions
       The  base  atomic  functions  – fi_atomic, fi_atomicv, fi_atomicmsg – are used to transmit
       data to a remote node, where the specified  atomic  operation  is  performed  against  the
       target  data.  The result of a base atomic function is stored at the remote memory region.
       The main difference between atomic functions are the number and type  of  parameters  that
       they accept as input.  Otherwise, they perform the same general function.

       The  call  fi_atomic  transfers  the data contained in the user-specified data buffer to a
       remote node.  For connectionless endpoints, the destination endpoint is specified  through
       the  dest_addr  parameter.   Unless the endpoint has been configured differently, the data
       buffer passed into fi_atomic must not be touched by the application  until  the  fi_atomic
       call  completes  asynchronously.   The target buffer of a base atomic operation must allow
       for remote read an/or write access, as appropriate.

       The fi_atomicv call adds support for a scatter-gather list to fi_atomic.   The  fi_atomicv
       transfers  the  set of data buffers referenced by the ioc parameter to the remote node for
       processing.

       The fi_inject_atomic call is an optimized  version  of  fi_atomic.   The  fi_inject_atomic
       function  behaves  as if the FI_INJECT transfer flag were set, and FI_COMPLETION were not.
       That is, the data buffer is  available  for  reuse  immediately  on  returning  from  from
       fi_inject_atomic,  and  no  completion  event  will  be  generated  for  this atomic.  The
       completion event will be suppressed even if the endpoint  has  not  been  configured  with
       FI_SELECTIVE_COMPLETION.   See the flags discussion below for more details.  The requested
       message size that can be used with fi_inject_atomic is limited by inject_size.

       The fi_atomicmsg call supports atomic functions over  both  connected  and  connectionless
       endpoints,  with  the  ability to control the atomic operation per call through the use of
       flags.  The fi_atomicmsg function takes a struct fi_msg_atomic as input.

              struct fi_msg_atomic {
                  const struct fi_ioc *msg_iov; /* local scatter-gather array */
                  void                **desc;   /* local access descriptors */
                  size_t              iov_count;/* # elements in ioc */
                  const void          *addr;    /* optional endpoint address */
                  const struct fi_rma_ioc *rma_iov; /* remote SGL */
                  size_t              rma_iov_count;/* # elements in remote SGL */
                  enum fi_datatype    datatype; /* operand datatype */
                  enum fi_op          op;       /* atomic operation */
                  void                *context; /* user-defined context */
                  uint64_t            data;     /* optional data */
              };

              struct fi_ioc {
                  void        *addr;    /* local address */
                  size_t      count;    /* # target operands */
              };

              struct fi_rma_ioc {
                  uint64_t    addr;     /* target address */
                  size_t      count;    /* # target operands */
                  uint64_t    key;      /* access key */
              };

       The following list of atomic operations are usable with base  atomic  operations:  FI_MIN,
       FI_MAX,   FI_SUM,  FI_PROD,  FI_LOR,  FI_LAND,  FI_BOR,  FI_BAND,  FI_LXOR,  FI_BXOR,  and
       FI_ATOMIC_WRITE.

   Fetch-Atomic Functions
       The fetch atomic functions – fi_fetch_atomic, fi_fetch_atomicv, and fi_fetch  atomicmsg  –
       behave  similar  to the equivalent base atomic function.  The difference between the fetch
       and base atomic calls are the fetch atomic routines return  the  initial  value  that  was
       stored at the target to the user.  The initial value is read into the user provided result
       buffer.  The target buffer of fetch-atomic operations must  be  enabled  for  remote  read
       access.

       The  following  list of atomic operations are usable with fetch atomic operations: FI_MIN,
       FI_MAX,  FI_SUM,  FI_PROD,   FI_LOR,   FI_LAND,   FI_BOR,   FI_BAND,   FI_LXOR,   FI_BXOR,
       FI_ATOMIC_READ, and FI_ATOMIC_WRITE.

       For  FI_ATOMIC_READ  operations,  the  source  buffer  operand  (e.g.  fi_fetch_atomic buf
       parameter) is ignored and may be NULL.  The results are written into the result buffer.

   Compare-Atomic Functions
       The compare atomic  functions  –  fi_compare_atomic,  fi_compare_atomicv,  and  fi_compare
       atomicmsg – are used for operations that require comparing the target data against a value
       before performing a swap operation.   The  compare  atomic  functions  support:  FI_CSWAP,
       FI_CSWAP_NE, FI_CSWAP_LE, FI_CSWAP_LT, FI_CSWAP_GE, FI_CSWAP_GT, and FI_MSWAP.

   Atomic Valid Functions
       The    atomic    valid    functions    –    fi_atomicvalid,    fi_fetch_atomicvalid,   and
       fi_compare_atomicvalid –indicate which operations the  local  provider  supports.   Needed
       operations  not supported by the provider must be emulated by the application.  Each valid
       call corresponds to a set of atomic functions.  fi_atomicvalid checks whether  a  provider
       supports   a   specific  base  atomic  operation  for  a  given  datatype  and  operation.
       fi_fetch_atomicvalid indicates if a provider supports a  specific  fetch-atomic  operation
       for  a  given  datatype  and  operation.   And fi_compare_atomicvalid checks if a provider
       supports a specified compare-atomic operation for a given datatype and operation.

       If an operation is supported, an atomic valid call will return 0, along with  a  count  of
       atomic data units that a single function call will operate on.

   Query Atomic Attributes
       The  fi_query_atomic call acts as an enhanced atomic valid operation (see the atomic valid
       function definitions above).  It is provided, in  part,  for  future  extensibility.   The
       query  operation reports which atomic operations are supported by the domain, for suitably
       configured endpoints.

       The behavior of fi_query_atomic is adjusted based on the flags parameter.  If flags is  0,
       then  the  operation  reports  the supported atomic attributes for base atomic operations,
       similar to fi_atomicvalid for endpoints.  If flags has the FI_FETCH_ATOMIC  bit  set,  the
       operation   behaves   similar   to   fi_fetch_atomicvalid.    Similarly,   the   flag  bit
       FI_COMPARE_ATOMIC results in query acting as fi_compare_atomicvalid.  The  FI_FETCH_ATOMIC
       and FI_COMPARE_ATOMIC bits may not both be set.

       If  the  FI_TAGGED bit is set, the provider will indicate if it supports atomic operations
       to tagged receive buffers.  The FI_TAGGED bit may be used by  itself,  or  in  conjunction
       with the FI_FETCH_ATOMIC and FI_COMPARE_ATOMIC flags.

       The output of fi_query_atomic is struct fi_atomic_attr:

              struct fi_atomic_attr {
                  size_t count;
                  size_t size;
              };

       The  count  attribute  field  is  as  defined  for the atomic valid calls.  The size field
       indicates the size in bytes of  the  atomic  datatype.   The  size  field  is  useful  for
       datatypes that may differ in sizes based on the platform or compiler, such FI_LONG_DOUBLE.

   Completions
       Completed  atomic  operations  are  reported  to  the  initiator of the request through an
       associated completion queue or counter.  Any user  provided  context  specified  with  the
       request  will  be returned as part of any completion event written to a CQ.  See fi_cq for
       completion event details.

       Any results returned to the initiator as part of an atomic  operation  will  be  available
       prior  to  a  completion  event  being generated.  This will be true even if the requested
       completion semantic provides a weaker guarantee.  That is, atomic  fetch  operations  have
       FI_DELIVERY_COMPLETE   semantics.    Completions  generated  for  other  types  of  atomic
       operations indicate that it is safe to re-use the source data buffers.

       Any updates to data at the target of an atomic operation will be visible  to  agents  (CPU
       processes,  NICs,  and  other  devices)  on  the target node prior to one of the following
       occurring.  If the atomic operation generates a completion event or updates  a  completion
       counter  at  the  target  endpoint,  the results will be available prior to the completion
       notification.  After processing a completion for the atomic, if the  initiator  submits  a
       transfer between the same endpoints that generates a completion at the target, the results
       will be available prior to the subsequent transfer’s event.  Or, if a fenced data transfer
       from  the  initiator  follows the atomic request, the results will be available prior to a
       completion at the target for the fenced transfer.

       The correctness of atomic operations on a target memory region  is  guaranteed  only  when
       performed  by  a single actor for a given window of time.  An actor is defined as a single
       libfabric domain (identified by the domain name, and not an open instance of that domain),
       a coherent CPU complex, or other device (e.g. GPU) capable of performing atomic operations
       on the target memory.  The results of  atomic  operations  performed  by  multiple  actors
       simultaneously  are  undefined.   For  example,  issuing  CPU based atomic operations to a
       target region concurrently being updated by NIC based atomics may leave the region’s  data
       in  an unknown state.  The results of a first actor’s atomic operations must be visible to
       a second actor prior to the second actor issuing its own atomics.

FLAGS

       The fi_atomicmsg, fi_fetch_atomicmsg, and fi_compare_atomicmsg calls  allow  the  user  to
       specify  flags which can change the default data transfer operation.  Flags specified with
       atomic message operations override most flags previously  configured  with  the  endpoint,
       except  where  noted (see fi_control).  The following list of flags are usable with atomic
       message calls.

       FI_COMPLETION
              Indicates that a completion entry should be generated for the specified  operation.
              The  endpoint must be bound to a completion queue with FI_SELECTIVE_COMPLETION that
              corresponds to the specified operation, or this flag is ignored.

       FI_MORE
              Indicates that the user has additional requests that  will  immediately  be  posted
              after  the  current  call  returns.   Use  of  this flag may improve performance by
              enabling the provider to optimize its access to the fabric hardware.

       FI_INJECT
              Indicates that the control of constant data buffers should be returned to the  user
              immediately   after   the   call   returns,   even  if  the  operation  is  handled
              asynchronously.  This may require that the underlying provider implementation  copy
              the  data  into  a  local  buffer  and  transfer out of that buffer.  Constant data
              buffers refers to any data buffer or iovec used by the atomic APIs that are  marked
              as  `const'.  Non-constant or output buffers are unaffected by this flag and may be
              accessed by the provider at anytime until the operation has completed.   This  flag
              can only be used with messages smaller than inject_size.

       FI_FENCE
              Applies  to  transmits.   Indicates that the requested operation, also known as the
              fenced operation, and any operation posted  after  the  fenced  operation  will  be
              deferred  until  all  previous  operations  targeting  the  same peer endpoint have
              completed.  Operations posted after the fencing will see and/or replace the results
              of any operations initiated prior to the fenced operation.

       The  ordering of operations starting at the posting of the fenced operation (inclusive) to
       the posting of a subsequent fenced operation (exclusive) is controlled by  the  endpoint’s
       ordering semantics.

       FI_TAGGED
              Specifies  that  the  target  of  the  atomic  operation is a tagged receive buffer
              instead of an RMA buffer.  When a tagged buffer is the target  memory  region,  the
              addr  parameter  is  used as a 0-based byte offset into the tagged buffer, with the
              key parameter specifying the tag.

RETURN VALUE

       Returns 0 on success.  On error,  a  negative  value  corresponding  to  fabric  errno  is
       returned.  Fabric errno values are defined in rdma/fi_errno.h.

ERRORS

       -FI_EAGAIN
              See fi_msg(3) for a detailed description of handling FI_EAGAIN.

       -FI_EOPNOTSUPP
              The requested atomic operation is not supported on this endpoint.

       -FI_EMSGSIZE
              The  number  of atomic operations in a single request exceeds that supported by the
              underlying provider.

NOTES

       Atomic operations operate on an array of values of a specific  data  type.   Atomicity  is
       only  guaranteed for each data type operation, not across the entire array.  The following
       pseudo-code   demonstrates   this   operation   for   64-bit   unsigned   atomic    write.
       ATOMIC_WRITE_U64  is  a  platform  dependent  macro  that  atomically writes 8 bytes to an
       aligned memory location.

              fi_atomic(ep, buf, count, NULL, dest_addr, addr, key,
                    FI_UINT64, FI_ATOMIC_WRITE, context)
              {
                  for (i = 1; i < count; i ++)
                      ATOMIC_WRITE_U64(((uint64_t *) addr)[i],
                               ((uint64_t *) buf)[i]);
              }

       The number of array elements to operate on is specified through a count  parameter.   This
       must  be  between  1  and  the  maximum  returned  through  the  relevant valid operation,
       inclusive.  The requested operation and data  type  must  also  be  valid  for  the  given
       provider.

       The ordering of atomic operations carried as part of different request messages is subject
       to the message and data ordering definitions assigned to the  transmitting  and  receiving
       endpoints.   Both  message  and  data  ordering  are required if the results of two atomic
       operations to the same memory buffers are to reflect the second operation  acting  on  the
       results   of  the  first.   See  fi_endpoint(3)  for  further  details  and  message  size
       restrictions.

SEE ALSO

       fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_cq(3), fi_rma(3)

AUTHORS

       OpenFabrics.