src/hippo

Search:
Group by:
Source   Edit  

Nim Library to enable writing CUDA and HIP kernels in Nim All cuda and hip structures and functions are re-exported and can be used

  • There are 3 sets of function prefixes.
  • hippo* prefixed functions are friendly nim interfaces for either HIP or CUDA
    • This is the recommended way to use this library, as it is the most nim-like
    • These functions check for errors and raise them as exceptions
  • hip* prefixed functions are the raw HIP C++ functions
  • cuda* prefixed functions are the raw CUDA C functions

Types

BlockDim {.importcpp: "const __HIP_Coordinates<__HIP_BlockDim>",
           header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
BlockIdx {.importcpp: "const __HIP_Coordinates<__HIP_BlockIdx>",
           header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
ConstCString {.importc: "const char*".} = object
Source   Edit  
Dim3 {.importcpp: "dim3", header: "hip/hip_runtime.h", bycopy.} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
GridDim {.importcpp: "const __HIP_Coordinates<__HIP_GridDim>",
          header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
hipError_t {.importcpp: "hipError_t", header: "hip/hip_runtime.h".} = cint
Source   Edit  
hipMemcpyKind {.size: 4, header: "hip/hip_runtime.h", importcpp: "hipMemcpyKind".} = enum
  hipMemcpyHostToHost = 0,  ## < Host-to-Host Copy
  hipMemcpyHostToDevice = 1, ## < Host-to-Device Copy
  hipMemcpyDeviceToHost = 2, ## < Device-to-Host Copy
  hipMemcpyDeviceToDevice = 3, ## < Device-to-Device Copy
  hipMemcpyDefault = 4       ## < Runtime will automatically determine copy-kind based on virtual addresses.
Source   Edit  
hipStream_t = pointer
Source   Edit  
size_t = uint64
Source   Edit  
ThreadIdx {.importcpp: "const __HIP_Coordinates<__HIP_ThreadIdx>",
            header: "hip/hip_runtime.h".} = object
  x* {.importc: "x".}: uint32_t ## < x
  y* {.importc: "y".}: uint32_t ## < y
  z* {.importc: "z".}: uint32_t ## < z
Source   Edit  
uint8_t = uint8
Source   Edit  
uint16_t = uint16
Source   Edit  
uint32_t = uint32
Source   Edit  
uint64_t = uint64
Source   Edit  

Lets

blockDim {.importc, inject, header: "hip/hip_runtime.h".}: BlockDim
Source   Edit  
blockIdx {.importc, inject, header: "hip/hip_runtime.h".}: BlockIdx
Source   Edit  
gridDim {.importc, inject, header: "hip/hip_runtime.h".}: GridDim
Source   Edit  
threadIdx {.importc, inject, header: "hip/hip_runtime.h".}: ThreadIdx
Source   Edit  

Consts

HippoMemcpyDefault = hipMemcpyDefault
Source   Edit  
HippoMemcpyDeviceToDevice = hipMemcpyDeviceToDevice
Source   Edit  
HippoMemcpyDeviceToHost = hipMemcpyDeviceToHost
Source   Edit  
HippoMemcpyHostToDevice = hipMemcpyHostToDevice
Source   Edit  
HippoMemcpyHostToHost = hipMemcpyHostToHost
Source   Edit  
HippoRuntime {.strdefine.} = "HIP"
Source   Edit  

Procs

proc `$`(self: ConstCString): string {....raises: [], tags: [], forbids: [].}
Source   Edit  
proc `=destroy`(mem: var GpuMemory) {....raises: [], tags: [], forbids: [].}
Automatically free device memory when the object goes out of scope Source   Edit  
proc handleError(err: hipError_t) {....raises: [Exception], tags: [], forbids: [].}
Source   Edit  
proc hipDeviceSynchronize(): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipDeviceSynchronize(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipFree(ptr: pointer): hipError_t {.header: "hip/hip_runtime.h",
    importcpp: "hipFree(@)", ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipGetErrorString(err: hipError_t): ConstCString {.
    header: "hip/hip_runtime.h", importcpp: "hipGetErrorString(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipGetLastError(): hipError_t {.header: "hip/hip_runtime.h",
                                     importcpp: "hipGetLastError()", ...raises: [],
                                     tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernel(function_address: pointer; numBlocks: Dim3;
                     dimBlocks: Dim3; args: ptr pointer): hipError_t {.
    importcpp: "hipLaunchKernel(@)", header: "hip/hip_runtime.h", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernel(function_address: pointer; numBlocks: Dim3;
                     dimBlocks: Dim3; args: ptr pointer;
                     sharedMemBytes: csize_t; stream: hipStream_t): cint {.
    importcpp: "hipLaunchKernel(@)", header: "hip/hip_runtime.h", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipLaunchKernelGGL(function_address: proc; numBlocks: Dim3;
                        dimBlocks: Dim3; sharedMemBytes: uint32_t;
                        stream: hipStream_t) {.
    importcpp: "hipLaunchKernelGGL(@)", header: "hip/hip_runtime.h", varargs,
    ...raises: [], tags: [], forbids: [].}
Source   Edit  
proc hipMalloc(ptr: ptr pointer; size: cint): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMalloc(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hipMemcpy(dst: pointer; src: pointer; size: cint; kind: hipMemcpyKind): hipError_t {.
    header: "hip/hip_runtime.h", importcpp: "hipMemcpy(@)", ...raises: [],
    tags: [], forbids: [].}
Source   Edit  
proc hippoSyncthreads() {.importcpp: "__syncthreads()",
                          header: "hip/hip_runtime.h", ...raises: [], tags: [],
                          forbids: [].}
Source   Edit  
proc hipSyncthreads() {.importcpp: "__syncthreads()",
                        header: "hip/hip_runtime.h", ...raises: [], tags: [],
                        forbids: [].}
Source   Edit  
proc newDim3(x: uint32_t = 1; y: uint32_t = 1; z: uint32_t = 1): Dim3 {.
    ...raises: [], tags: [], forbids: [].}
Source   Edit  

Converters

converter toConstCString(self: cstring): ConstCString {.
    importc: "(const char*)", noconv, nodecl, ...raises: [], tags: [], forbids: [].}
Source   Edit  
converter toCString(self: ConstCString): cstring {.importc: "(char*)", noconv,
    nodecl, ...raises: [], tags: [], forbids: [].}
Source   Edit  

Macros

macro hippoArgs(args: varargs[untyped]): untyped
Automatically convert varargs for use with CUDA/HIP. CUDA/HIP expects an array of arguments or pointers depending on platform. Source   Edit  
macro hippoConstant(v: untyped): untyped
Declared a variable as __constant__. Constants are read-only globals that are cached on-chip. constants are useful for data that is being read by all threads in a warp at the same time. if each thread in a warp accesses different addresses in constant memory, the accesses are serialized and this may cause a 16x slowdown. eg: const N {.hippoConstant.} = 1024 Source   Edit  
macro hippoDevice(fn: untyped): untyped
Declare fuctions for use on the __device__ (the gpu), to be called by either device or global functions. Source   Edit  
macro hippoGlobal(fn: untyped): untyped
Declare a function as __global__. global functions are called from the host and run on the device. Source   Edit  
macro hippoHost(fn: untyped): untyped
Explicitly declare a function as a __host__ function (cpu side). All functions default to host functions, so this is not required. Source   Edit  
macro hippoHostDevice(fn: untyped): untyped
Declare a function as both __host__ and __device__. This is useful for functions that are usable from either the host and the device. eg: proc add(a: int, b: int) {.hippoHostDevice.} = a + b Source   Edit  
macro hippoShared(v: untyped): untyped
Declared a variable as static shared memory __shared__. Shared memory is shared between threads in the same block. It is faster than global memory, but is limited in size. They are located on-chip. eg: var cache {.hippoShared.}: array[256, float] Source   Edit  

Templates

template hippoFree(p: pointer)
Free memory on the GPU Source   Edit  
template hippoLaunchKernel(kernel: proc; ## The GPU kernel procedure to launch
                           gridDim: Dim3 = newDim3(1, 1, 1); ## default to a grid of 1 block
                           blockDim: Dim3 = newDim3(1, 1, 1); ## default to 1 thread per block
                           sharedMemBytes: uint32 = 0; ## dynamic shared memory amount to allocate
                           stream: HippoStream = nil; ## Which device stream to run under (defaults to null)
                           args: untyped)
Source   Edit  
template hippoMalloc(size: int): GpuRef
Allocate memory on the GPU and return a GpuRef object. GpuMemory is a wrapper around Gpu allocated pointers. It will automatically free the memory when it goes out of scope. Source   Edit  
template hippoMemcpy(dst: GpuRef; src: GpuRef; size: int; kind: HippoMemcpyKind)
device -> device memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: GpuRef; src: pointer; size: int; kind: HippoMemcpyKind)
device -> host memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: pointer; src: GpuRef; size: int; kind: HippoMemcpyKind)
host -> device memory copy Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoMemcpy(dst: pointer; src: pointer; size: int;
                     kind: HippoMemcpyKind)
host -> host memory copy hippoMemcpy is broken out as 4 separate templates to make it easier to work with GpuRef objects Copy memory from src to dst. direction of device and host is determined by kind. Source   Edit  
template hippoSynchronize()
Synchronize the device Source   Edit