hippo

Search:
Group by:
Source   Edit  

Experimental Nim GPU 'macro macros' for generating GPU kernels and helpers to use them. these macros accept a kernel body and produce a macro for easily using the kernel.Nim Library to enable writing CUDA and HIP kernels in Nim All cuda and hip structures and functions are re-exported and can be used

  • There are 3 sets of function prefixes.
  • hippo* prefixed functions are friendly nim interfaces for either HIP or CUDA
    • This is the recommended way to use this library, as it is the most nim-like
    • These functions check for errors and raise them as exceptions
  • hip* prefixed functions are the raw HIP C++ functions
  • cuda* prefixed functions are the raw CUDA C functions

Types

BlockDim {.importcpp: "const __HIP_Coordinates<__HIP_BlockDim>",
           header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
BlockIdx {.importcpp: "const __HIP_Coordinates<__HIP_BlockIdx>",
           header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
ConstCString {.importc: "const char*".} = object
Source   Edit  
Dim3 {.importcpp: "dim3", header: "hip/hip_runtime.h", bycopy.} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
GridDim {.importcpp: "const __HIP_Coordinates<__HIP_GridDim>",
          header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
hipDeviceProp_t {.importcpp: "hipDeviceProp_t", header: "hip/hip_runtime.h".} = object
  deviceOverlap*: cint
Source   Edit  
hipError_t {.importcpp: "hipError_t", header: "hip/hip_runtime.h".} = cint
Source   Edit  
hipEvent_t {.importcpp: "hipEvent_t", header: "hip/hip_runtime.h".} = pointer
Source   Edit  
hipMemcpyKind {.size: 4, header: "hip/hip_runtime.h", importcpp: "hipMemcpyKind".} = enum
  hipMemcpyHostToHost = 0,  ## < Host-to-Host Copy
  hipMemcpyHostToDevice = 1, ## < Host-to-Device Copy
  hipMemcpyDeviceToHost = 2, ## < Device-to-Host Copy
  hipMemcpyDeviceToDevice = 3, ## < Device-to-Device Copy
  hipMemcpyDefault = 4       ## < Runtime will automatically determine copy-kind based on virtual addresses.
Source   Edit  
hipStream_t {.importcpp: "hipStream_t", header: "hip/hip_runtime.h".} = pointer
Source   Edit  
size_t = uint64
Source   Edit  
ThreadIdx {.importcpp: "const __HIP_Coordinates<__HIP_ThreadIdx>",
            header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
uint8_t = uint8
Source   Edit  
uint16_t = uint16
Source   Edit  
uint32_t = uint32
Source   Edit  
uint64_t = uint64
Source   Edit  

Lets

blockDim {.importc, inject, header: "hip/hip_runtime.h".}: BlockDim
Source   Edit  
blockIdx {.importc, inject, header: "hip/hip_runtime.h".}: BlockIdx
Source   Edit  
gridDim {.importc, inject, header: "hip/hip_runtime.h".}: GridDim
Source   Edit  
threadIdx {.importc, inject, header: "hip/hip_runtime.h".}: ThreadIdx
Source   Edit  

Consts

HippoEventDefault = 0'u32
Source   Edit  
HippoMemcpyDefault = hipMemcpyDefault
Source   Edit  
HippoMemcpyDeviceToDevice = hipMemcpyDeviceToDevice
Source   Edit  
HippoMemcpyDeviceToHost = hipMemcpyDeviceToHost
Source   Edit  
HippoMemcpyHostToDevice = hipMemcpyHostToDevice
Source   Edit  
HippoMemcpyHostToHost = hipMemcpyHostToHost
Source   Edit  
HippoPlatform {.strdefine.} = ""
Source   Edit  
HippoRuntime {.strdefine.} = "HIP"
Source   Edit  
HippoWarpSize = 32
Warp/wavefront size for the current backend. Source   Edit  
WarpSize {.intdefine.} = 32
AMD wavefront size. Defaults to 32 (RDNA 3+). Set -d:WarpSize=64 for GCN/CDNA GPUs (e.g. MI250, MI300) which use wave64. Source   Edit  

Procs

proc `$`(self: ConstCString): string {....raises: [], tags: [], forbids: [].}
Source   Edit  
proc `=destroy`(mem: var GpuMemory) {....raises: [], tags: [], forbids: [].}
Automatically free device memory when the object goes out of scope Source   Edit  
proc cos(x: cdouble): cdouble {.header: "hip/hip_runtime.h",
                                importcpp: "cos(@)", ...raises: [], tags: [],
                                forbids: [].}
Source   Edit  
proc cosf(x: cfloat): cfloat {.header: "hip/hip_runtime.h",
                               importcpp: "cosf(@)", ...raises: [], tags: [],
                               forbids: [].}
Source   Edit  
proc exp(x: cdouble): cdouble {.header: "hip/hip_runtime.h",
                                importcpp: "exp(@)", ...raises: [], tags: [],
                                forbids: [].}
Source   Edit  
proc expf(x: cfloat): cfloat {.header: "hip/hip_runtime.h",
                               importcpp: "expf(@)", ...raises: [], tags: [],
                               forbids: [].}
Source   Edit  
proc floatToHalf(f: cfloat): uint16 {.header: "hip/hip_fp16.h", importcpp: "__half_raw(__float2half(#)).x",
                                      ...raises: [], tags: [], forbids: [].}
Convert float32 to IEEE 754 half-precision (uint16) using HIP hardware intrinsic. Source   Edit  
proc halfToFloat(h: uint16): cfloat {.header: "hip/hip_fp16.h", importcpp: "[&]{ __half_raw r; r.x = (#); return __half2float(r); }()",
                                      ...raises: [], tags: [], forbids: [].}
Convert IEEE 754 half-precision (uint16) to float32 using HIP hardware intrinsic. Source   Edit  
proc handleError(err: hipError_t) {....raises: [Exception], tags: [], forbids: [].}
Source   Edit  
proc hipAtomicAdd(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicAdd(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicAdd(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicAdd(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicAnd(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicAnd(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicAnd(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicAnd(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicCAS(address: ptr int32; compare: int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicCAS(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicCAS(address: ptr uint32; compare: uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicCAS(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicExch(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicExch(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicExch(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicExch(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicMax(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicMax(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicMax(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicMax(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicMin(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicMin(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicMin(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicMin(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicOr(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicOr(@)", ...raises: [], tags: [],
    forbids: [].}
Source   Edit  
proc hipAtomicOr(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicOr(@)", ...raises: [], tags: [],
    forbids: [].}
Source   Edit  
proc hipAtomicSub(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicSub(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicSub(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicSub(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicXor(address: ptr int32; val: int32): int32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicXor(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipAtomicXor(address: ptr uint32; val: uint32): uint32 {.
    header: "hip/hip_runtime.h", importcpp: "atomicXor(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipDeviceSynchronize(): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipDeviceSynchronize(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipEventCreate(event: ptr hipEvent_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventCreate(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipEventCreateWithFlags(event: ptr hipEvent_t; flags: uint32_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventCreateWithFlags(@)",
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipEventDestroy(event: hipEvent_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventDestroy(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipEventElapsedTime(ms: ptr cfloat; start: hipEvent_t; stop: hipEvent_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventElapsedTime(@)",
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipEventQuery(event: hipEvent_t): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipEventQuery(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipEventRecord(event: hipEvent_t; stream: hipStream_t = nil): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventRecord(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipEventSynchronize(event: hipEvent_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipEventSynchronize(@)",
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipFree(ptr: pointer): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipFree(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipGetDevice(device: ptr cint): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipGetDevice(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipGetDeviceProperties(prop: ptr hipDeviceProp_t; device: cint): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipGetDeviceProperties(@)",
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipGetErrorString(err: hipError_t): ConstCString {.
    header: "hip/hip_runtime.h", importcpp: "hipGetErrorString(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipGetLastError(): hipError_t {.header: "hip/hip_runtime.h",
                                     importcpp: "hipGetLastError()", ...raises: [],
                                     tags: [], forbids: [].}
Source   Edit  
proc hipHostAlloc(p: ptr pointer; size: csize_t; flags: uint32_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipHostAlloc(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipHostFree(p: pointer): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipHostFree(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernel(function_address: pointer; numBlocks: Dim3;
                     dimBlocks: Dim3; args: ptr pointer): hipError_t {.
    importcpp: "hipLaunchKernel(@)", header: "hip/hip_runtime.h", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernel(function_address: pointer; numBlocks: Dim3;
                     dimBlocks: Dim3; args: ptr pointer;
                     sharedMemBytes: csize_t; stream: hipStream_t): cint {.
    importcpp: "hipLaunchKernel(@)", header: "hip/hip_runtime.h", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernelGGL(function_address: proc; numBlocks: Dim3;
                        dimBlocks: Dim3; sharedMemBytes: uint32_t;
                        stream: hipStream_t) {.
    importcpp: "hipLaunchKernelGGL(@)", header: "hip/hip_runtime.h", varargs,
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipMalloc(ptr: ptr pointer; size: cint): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMalloc(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipMemcpy(dst: pointer; src: pointer; size: cint; kind: hipMemcpyKind): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMemcpy(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipMemcpyAsync(dst: pointer; src: pointer; sizeBytes: csize_t;
                    kind: hipMemcpyKind; stream: hipStream_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMemcpyAsync(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipMemcpyToSymbol(symbol: pointer; src: pointer; sizeBytes: csize_t;
                       offset: csize_t = 0;
                       kind: hipMemcpyKind = hipMemcpyHostToDevice): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMemcpyToSymbol(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hippoSyncthreads() {.importcpp: "__syncthreads()",
                          header: "hip/hip_runtime.h", ...raises: [], tags: [],
                          forbids: [].}
Source   Edit  
proc hipStreamCreate(stream: ptr hipStream_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipStreamCreate(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipStreamDestroy(stream: hipStream_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipStreamDestroy(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipStreamSynchronize(stream: hipStream_t): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipStreamSynchronize(@)",
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipStreamWaitEvent(stream: hipStream_t; event: hipEvent_t;
                        flags: uint32_t = 0'u32): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipStreamWaitEvent(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipSymbol[T](sym: var T): pointer {.header: "hip/hip_runtime.h",
    importcpp: "HIP_SYMBOL(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipSyncthreads() {.importcpp: "__syncthreads()",
                        header: "hip/hip_runtime.h", ...raises: [], tags: [],
                        forbids: [].}
Source   Edit  
proc log(x: cdouble): cdouble {.header: "hip/hip_runtime.h",
                                importcpp: "log(@)", ...raises: [], tags: [],
                                forbids: [].}
Source   Edit  
proc logf(x: cfloat): cfloat {.header: "hip/hip_runtime.h",
                               importcpp: "logf(@)", ...raises: [], tags: [],
                               forbids: [].}
Source   Edit  
proc newDim3(x: uint32_t = 1; y: uint32_t = 1; z: uint32_t = 1): Dim3 {.
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc pow(base: cdouble; exp: cdouble): cdouble {.header: "hip/hip_runtime.h",
    importcpp: "pow(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc powf(base: cfloat; exp: cfloat): cfloat {.header: "hip/hip_runtime.h",
    importcpp: "powf(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc shfl(val: cfloat; srcLane: cint): cfloat {.header: "hip/hip_runtime.h",
    importcpp: "__shfl(@)", ...raises: [], tags: [], forbids: [].}
Warp shuffle: read float32 from srcLane (broadcast). Source   Edit  
proc shfl(val: cint; srcLane: cint): cint {.header: "hip/hip_runtime.h",
    importcpp: "__shfl(@)", ...raises: [], tags: [], forbids: [].}
Warp shuffle: read int32 from srcLane (broadcast). Source   Edit  
proc shflDown(val: cfloat; delta: cint): cfloat {.header: "hip/hip_runtime.h",
    importcpp: "__shfl_down(@)", ...raises: [], tags: [], forbids: [].}
Warp shuffle down for float32. Source   Edit  
proc shflDown(val: cint; delta: cint): cint {.header: "hip/hip_runtime.h",
    importcpp: "__shfl_down(@)", ...raises: [], tags: [], forbids: [].}
Warp shuffle down for int32. Source   Edit  
proc sin(x: cdouble): cdouble {.header: "hip/hip_runtime.h",
                                importcpp: "sin(@)", ...raises: [], tags: [],
                                forbids: [].}
Source   Edit  
proc sinf(x: cfloat): cfloat {.header: "hip/hip_runtime.h",
                               importcpp: "sinf(@)", ...raises: [], tags: [],
                               forbids: [].}
Source   Edit  
proc sqrt(x: cdouble): cdouble {.header: "hip/hip_runtime.h",
                                 importcpp: "sqrt(@)", ...raises: [], tags: [],
                                 forbids: [].}
Source   Edit  
proc sqrtf(x: cfloat): cfloat {.header: "hip/hip_runtime.h",
                                importcpp: "sqrtf(@)", ...raises: [], tags: [],
                                forbids: [].}
Source   Edit  

Converters

converter toConstCString(self: cstring): ConstCString {.
    importc: "(const char*)", noconv, nodecl, ...raises: [], tags: [], forbids: [].}
Source   Edit  
converter toCString(self: ConstCString): cstring {.importc: "(char*)", noconv,
    nodecl, ...raises: [], tags: [], forbids: [].}
Source   Edit  

Macros

macro generateForLoopMacro(name: static[string]; theType: typedesc;
                           body: untyped): untyped
Generate a GPU kernel and a host launcher proc using a custom name. Source   Edit  
macro hippoArgs(args: varargs[untyped]): untyped
Automatically convert varargs for use with CUDA/HIP. CUDA/HIP expects an array of arguments or pointers depending on platform. Source   Edit  
macro hippoConstant(v: untyped): untyped
Declared a variable as __constant__. Constants are read-only globals that are cached on-chip. constants are useful for data that is being read by all threads in a warp at the same time. if each thread in a warp accesses different addresses in constant memory, the accesses are serialized and this may cause a 16x slowdown. eg: const N {.hippoConstant.} = 1024 Source   Edit  
macro hippoDevice(fn: untyped): untyped
Declare fuctions for use on the __device__ (the gpu), to be called by either device or global functions. Source   Edit  
macro hippoGlobal(fn: untyped): untyped
Declare a function as __global__. global functions are called from the host and run on the device. Source   Edit  
macro hippoHost(fn: untyped): untyped
Explicitly declare a function as a __host__ function (cpu side). All functions default to host functions, so this is not required. Source   Edit  
macro hippoHostDevice(fn: untyped): untyped
Declare a function as both __host__ and __device__. This is useful for functions that are usable from either the host and the device. eg: proc add(a: int, b: int) {.hippoHostDevice.} = a + b Source   Edit  
macro hippoShared(v: untyped): untyped
Declared a variable as static shared memory __shared__. Shared memory is shared between threads in the same block. It is faster than global memory, but is limited in size. They are located on-chip. eg: var cache {.hippoShared.}: array[256, float] Source   Edit  

Templates

template hippoAtomicAdd(address: ptr [int32]; val: int32): int32
Atomically add and return the previous value. Source   Edit  
template hippoAtomicAdd(address: ptr [uint32]; val: uint32): uint32
Atomically add and return the previous value. Source   Edit  
template hippoAtomicAnd(address: ptr [int32]; val: int32): int32
Atomically AND and return the previous value. Source   Edit  
template hippoAtomicAnd(address: ptr [uint32]; val: uint32): uint32
Atomically AND and return the previous value. Source   Edit  
template hippoAtomicCAS(address: ptr [int32]; compare: int32; val: int32): int32
Atomically compare-and-swap and return the previous value. Source   Edit  
template hippoAtomicCAS(address: ptr [uint32]; compare: uint32; val: uint32): uint32
Atomically compare-and-swap and return the previous value. Source   Edit  
template hippoAtomicExch(address: ptr [int32]; val: int32): int32
Atomically exchange and return the previous value. Source   Edit  
template hippoAtomicExch(address: ptr [uint32]; val: uint32): uint32
Atomically exchange and return the previous value. Source   Edit  
template hippoAtomicMax(address: ptr [int32]; val: int32): int32
Atomically update with max and return the previous value. Source   Edit  
template hippoAtomicMax(address: ptr [uint32]; val: uint32): uint32
Atomically update with max and return the previous value. Source   Edit  
template hippoAtomicMin(address: ptr [int32]; val: int32): int32
Atomically update with min and return the previous value. Source   Edit  
template hippoAtomicMin(address: ptr [uint32]; val: uint32): uint32
Atomically update with min and return the previous value. Source   Edit  
template hippoAtomicOr(address: ptr [int32]; val: int32): int32
Atomically OR and return the previous value. Source   Edit  
template hippoAtomicOr(address: ptr [uint32]; val: uint32): uint32
Atomically OR and return the previous value. Source   Edit  
template hippoAtomicSub(address: ptr [int32]; val: int32): int32
Atomically subtract and return the previous value. Source   Edit  
template hippoAtomicSub(address: ptr [uint32]; val: uint32): uint32
Atomically subtract and return the previous value. Source   Edit  
template hippoAtomicXor(address: ptr [int32]; val: int32): int32
Atomically XOR and return the previous value. Source   Edit  
template hippoAtomicXor(address: ptr [uint32]; val: uint32): uint32
Atomically XOR and return the previous value. Source   Edit  
template hippoCos(x: cfloat): cfloat
Cosine function for single-precision float Source   Edit  
template hippoEventCreate(): HippoEvent
Create a timing event. Source   Edit  
template hippoEventCreateWithFlags(flags: uint32 = 0'u32): HippoEvent
Create a timing event with explicit runtime flags. Source   Edit  
template hippoEventDestroy(event: HippoEvent)
Destroy a timing event. Source   Edit  
template hippoEventElapsedTime(start: HippoEvent; stop: HippoEvent): float32
Get elapsed time between two events. Source   Edit  
template hippoEventQuery(event: HippoEvent): bool
Return true when the event has completed and false when it is still pending. On HIP-CPU, this behaves as a blocking synchronize because hipEventQuery is not declared. Source   Edit  
template hippoEventRecord(event: HippoEvent; stream: HippoStream = nil)
Record an event on a stream. Source   Edit  
template hippoEventSynchronize(event: HippoEvent)
Wait for an event to complete. Source   Edit  
template hippoExp(x: cdouble): cdouble
Exponential function (e^x) for double-precision float Source   Edit  
template hippoExp(x: cfloat): cfloat
Exponential function (e^x) for single-precision float Source   Edit  
template hippoFloatToHalf(f: cfloat): uint16
Convert float32 to IEEE 754 half-precision (uint16). Uses hardware intrinsic on HIP/CUDA, software fallback on SIMPLE. Source   Edit  
template hippoFree(p: pointer)
Free memory on the GPU Source   Edit  
template hippoGetDevice(): cint
Get the current device index. Source   Edit  
template hippoGetDeviceProperties(prop: var HippoDeviceProp; device: cint)
Get device properties for the given device. Source   Edit  
template hippoHalfToFloat(h: uint16): cfloat
Convert IEEE 754 half-precision (uint16) to float32. Uses hardware intrinsic on HIP/CUDA, software fallback on SIMPLE. Source   Edit  
template hippoHostAlloc(size: int): pointer
Allocate page-locked host memory Source   Edit  
template hippoHostFree(p: pointer)
Free page-locked host memory. Source   Edit  
template hippoLaunchKernel(kernel: untyped; ## The GPU kernel procedure to launch
                           gridDim: Dim3 = newDim3(1, 1, 1); ## default to a grid of 1 block
                           blockDim: Dim3 = newDim3(1, 1, 1); ## default to 1 thread per block
                           sharedMemBytes: uint32 = 0; ## dynamic shared memory amount to allocate
                           stream: HippoStream = nil; ## Which device stream to run under (defaults to null)
                           args: untyped)
Source   Edit  
template hippoLog(x: cdouble): cdouble
Natural logarithm for double-precision float Source   Edit  
template hippoLog(x: cfloat): cfloat
Natural logarithm for single-precision float Source   Edit  
template hippoMalloc(size: int): GpuRef
Allocate memory on the GPU and return a GpuRef object. GpuMemory is a wrapper around Gpu allocated pointers. It will automatically free the memory when it goes out of scope. Source   Edit  
template hippoMemcpy(dst: GpuRef; src: GpuRef; size: int; kind: HippoMemcpyKind)
device -> device memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: GpuRef; src: pointer; size: int; kind: HippoMemcpyKind)
device -> host memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: pointer; src: GpuRef; size: int; kind: HippoMemcpyKind)
host -> device memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: pointer; src: pointer; size: int;
                     kind: HippoMemcpyKind)
host -> host memory copy hippoMemcpy is broken out as 4 separate templates to make it easier to work with GpuRef objects Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpyAsync(dst: pointer; src: pointer; size: int;
                          kind: HippoMemcpyKind; stream: HippoStream)
Asynchronous memory copy on a stream Source   Edit  
template hippoMemcpyToSymbol(symbol: untyped; src: pointer; size: int;
                             offset: int = 0;
                             kind: HippoMemcpyKind = HippoMemcpyHostToDevice)
Copy data from host to a device symbol (eg: __constant__ storage). Source   Edit  
template hippoPow(base: cdouble; exp: cdouble): cdouble
Power function (base^exp) for double-precision float Source   Edit  
template hippoPow(base: cfloat; exp: cfloat): cfloat
Power function (base^exp) for single-precision float Source   Edit  
template hippoShfl(val: cfloat; srcLane: int): cfloat
Warp shuffle: read float32 from srcLane (broadcast). Source   Edit  
template hippoShfl(val: cint; srcLane: int): cint
Warp shuffle: read int32 from srcLane (broadcast). Source   Edit  
template hippoShflDown(val: cfloat; delta: int): cfloat
Warp shuffle down for float32. Returns the value from the lane delta positions below the calling lane within the same warp. Source   Edit  
template hippoShflDown(val: cint; delta: int): cint
Warp shuffle down for int32. Source   Edit  
template hippoSin(x: cfloat): cfloat
Sine function for single-precision float Source   Edit  
template hippoSqrt(x: cfloat): cfloat
Square root function for single-precision float Source   Edit  
template hippoStreamCreate(): HippoStream
Create a new stream Source   Edit  
template hippoStreamDestroy(stream: HippoStream)
Destroy a stream Source   Edit  
template hippoStreamSynchronize(stream: HippoStream)
Synchronize a specific stream Source   Edit  
template hippoStreamWaitEvent(stream: HippoStream; event: pointer;
                              flags: uint32 = 0'u32)
Block stream work until an event is recorded and completed. Source   Edit  
template hippoSynchronize()
Synchronize the device Source   Edit