Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inline-c-cuda package for CUDA #147

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions inline-c-cuda/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Copyright (c) 2015 FP Complete Corporation.

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1 change: 1 addition & 0 deletions inline-c-cuda/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Small set of utilities to inline CUDA code. See tests for example.
2 changes: 2 additions & 0 deletions inline-c-cuda/Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
50 changes: 50 additions & 0 deletions inline-c-cuda/inline-c-cuda.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
cabal-version: 2.2
name: inline-c-cuda
version: 0.1.0.0
synopsis: Lets you embed CUDA code into Haskell.
description: Utilities to inline CUDA code into Haskell using inline-c. See
tests for example on how to build.
license: MIT
license-file: LICENSE
author: Francesco Mazzoli
maintainer: f@mazzo.li
copyright: (c) 2015-2016 FP Complete Corporation, (c) 2017-2019 Francesco Mazzoli
category: FFI
tested-with: GHC == 9.2.2
build-type: Simple

source-repository head
type: git
location: https://github.com/fpco/inline-c

library
exposed-modules: Language.C.Inline.Cuda
build-depends: base >=4.7 && <5
, bytestring
, inline-c >= 0.9.0.0
, inline-c-cpp
, template-haskell
, text
, safe-exceptions
, containers
, process
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall
extra-libraries: cudart

test-suite tests
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: tests.hs
build-depends: base >=4 && <5
, bytestring
, inline-c
, inline-c-cpp
, inline-c-cuda
, safe-exceptions
, hspec
, containers
, template-haskell
, vector
default-language: Haskell2010
53 changes: 53 additions & 0 deletions inline-c-cuda/src/Language/C/Inline/Cuda.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Module exposing a 'Context' to inline CUDA code. We only have used
-- this for experiments, so use with caution. See the CUDA tests to see
-- how to build inline CUDA code.
module Language.C.Inline.Cuda
( module Language.C.Inline
, cudaCtx
, Cpp.cppTypePairs
, Cpp.using
, Cpp.AbstractCppExceptionPtr
) where

import qualified Language.Haskell.TH as TH
import qualified Language.Haskell.TH.Syntax as TH

import Language.C.Inline
import Language.C.Inline.Context
import qualified Language.C.Types as CT
import qualified Language.C.Inline.Cpp as Cpp

import qualified Data.Map as Map
import Control.Monad.IO.Class (liftIO)
import System.Exit (ExitCode(..))
import System.Process (readProcessWithExitCode)

compileCuda :: String -> TH.Q FilePath
compileCuda src = do
cuFile <- TH.addTempFile "cu"
oFile <- TH.addTempFile "o"
let (cmd,args) = ("nvcc", ["-c","-o",oFile, cuFile])
(code, stdout, stderr) <- liftIO $ do
writeFile cuFile src
readProcessWithExitCode cmd args ""
case code of
ExitFailure _ -> fail $ "Compile Command: " ++ (foldl (\a b -> a ++ " " ++ b) " " (cmd : args)) ++ "\n" ++ " Output: " ++ stdout ++ "\n" ++ " Error: " ++ stderr
ExitSuccess -> return oFile

-- | The equivalent of 'C.baseCtx' for CUDA. It specifies the @.cu@
-- file extension for the CUDA file, so that nvcc will decide to build CUDA
-- instead of C. See the @.cabal@ test target for an example on how to
-- build.
cudaCtx :: Context
cudaCtx = Cpp.cppCtx <> mempty
{ ctxForeignSrcLang = Just TH.RawObject
, ctxOutput = Just $ \s -> "extern \"C\" {\n" ++ s ++ "\n}"
, ctxEnableCpp = True
, ctxRawObjectCompile = Just compileCuda
, ctxTypesTable = Map.singleton (CT.TypeName "std::exception_ptr") [t|Cpp.AbstractCppExceptionPtr|]
}

133 changes: 133 additions & 0 deletions inline-c-cuda/test/tests.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE TypeApplications #-}
{-# OPTIONS_GHC -Wno-deprecations #-}

import Control.Exception.Safe
import Control.Monad
import qualified Language.C.Inline.Context as CC
import qualified Language.C.Types as CT
import qualified Language.C.Inline.Cuda as C
import qualified Test.Hspec as Hspec
import Test.Hspec (shouldBe)
import Foreign.Ptr (Ptr)
import Data.Monoid
import Foreign.Marshal.Array
import Foreign.Marshal.Alloc
import Foreign.Storable


C.context $ C.cudaCtx

C.include "<iostream>"
C.include "<stdexcept>"

[C.emitBlock|
__global__ void
vectorAdd(const float *A, const float *B, float *C, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
C[i] = A[i] + B[i];
}
}
|]

cudaAllocaArray :: forall b. Int -> (Ptr C.CFloat -> IO b) -> IO b
cudaAllocaArray size func = do
let csize = fromIntegral size
alloca $ \(ptr_d_A :: Ptr (Ptr C.CFloat)) -> do
[C.block| void {
cudaError_t err = cudaMalloc((void **)$(float** ptr_d_A), $(int csize) * sizeof(float));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
} |]
d_A <- peekElemOff ptr_d_A 0
ret <- func d_A
[C.block| void {
cudaError_t err = cudaFree($(float* d_A));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
} |]
return ret

cudaMemcpyHostToDevice :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO ()
cudaMemcpyHostToDevice num host device = do
let cnum = fromIntegral num
[C.block| void {
cudaError_t err = cudaMemcpy($(float* device), $(float* host), $(int cnum) * sizeof(float), cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
} |]

cudaMemcpyDeviceToHost :: Int -> Ptr C.CFloat -> Ptr C.CFloat -> IO ()
cudaMemcpyDeviceToHost num device host = do
let cnum = fromIntegral num
[C.block| void {
cudaError_t err = cudaMemcpy($(float* host), $(float* device), $(int cnum) * sizeof(float), cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector C from device to host (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
} |]


main :: IO ()
main = Hspec.hspec $ do
Hspec.describe "Basic CUDA" $ do
Hspec.it "Add vectors on device" $ do
let numElements = 50000
cNumElements = fromIntegral numElements
allocaArray numElements $ \(h_A :: Ptr C.CFloat) -> do
allocaArray numElements $ \(h_B :: Ptr C.CFloat) -> do
allocaArray numElements $ \(h_C :: Ptr C.CFloat) -> do
cudaAllocaArray numElements $ \(d_A :: Ptr C.CFloat) -> do
cudaAllocaArray numElements $ \(d_B :: Ptr C.CFloat) -> do
cudaAllocaArray numElements $ \(d_C :: Ptr C.CFloat) -> do
[C.block| void {
for (int i = 0; i < $(int cNumElements); ++i)
{
$(float* h_A)[i] = rand()/(float)RAND_MAX;
$(float* h_B)[i] = rand()/(float)RAND_MAX;
}
} |]
cudaMemcpyHostToDevice numElements h_A d_A
cudaMemcpyHostToDevice numElements h_B d_B
[C.block| void {
int threadsPerBlock = 256;
int blocksPerGrid =($(int cNumElements) + threadsPerBlock - 1) / threadsPerBlock;
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>($(float* d_A), $(float* d_B), $(float* d_C), $(int cNumElements));
} |]
cudaMemcpyDeviceToHost numElements d_C h_C
lA <- peekArray numElements h_A
lB <- peekArray numElements h_B
lC <- peekArray numElements h_C
all (< 1e-5) (map (\((a,b),c) -> abs(a + b - c)) (zip (zip lA lB) lC)) `shouldBe` True
1 change: 1 addition & 0 deletions inline-c/src/Language/C/Inline.hs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ module Language.C.Inline
, block
, include
, verbatim
, emitBlock

-- * 'Ptr' utils
, withPtr
Expand Down
5 changes: 5 additions & 0 deletions inline-c/src/Language/C/Inline/Context.hs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ data Context = Context
, ctxForeignSrcLang :: Maybe TH.ForeignSrcLang
-- ^ TH.LangC by default
, ctxEnableCpp :: Bool
-- ^ Compile source code to raw object.
, ctxRawObjectCompile :: Maybe (String -> TH.Q FilePath)
}


Expand All @@ -172,6 +174,7 @@ instance Semigroup Context where
, ctxOutput = ctxOutput ctx1 <|> ctxOutput ctx2
, ctxForeignSrcLang = ctxForeignSrcLang ctx1 <|> ctxForeignSrcLang ctx2
, ctxEnableCpp = ctxEnableCpp ctx1 || ctxEnableCpp ctx2
, ctxRawObjectCompile = ctxRawObjectCompile ctx1 <|> ctxRawObjectCompile ctx2
}
#endif

Expand All @@ -182,6 +185,7 @@ instance Monoid Context where
, ctxOutput = Nothing
, ctxForeignSrcLang = Nothing
, ctxEnableCpp = False
, ctxRawObjectCompile = Nothing
}

#if !MIN_VERSION_base(4,11,0)
Expand All @@ -191,6 +195,7 @@ instance Monoid Context where
, ctxOutput = ctxOutput ctx1 <|> ctxOutput ctx2
, ctxForeignSrcLang = ctxForeignSrcLang ctx1 <|> ctxForeignSrcLang ctx2
, ctxEnableCpp = ctxEnableCpp ctx1 || ctxEnableCpp ctx2
, ctxRawObjectCompile = ctxRawObjectCompile ctx1 <|> ctxRawObjectCompile ctx2
}
#endif

Expand Down
19 changes: 17 additions & 2 deletions inline-c/src/Language/C/Inline/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module Language.C.Inline.Internal

-- ** Emitting C code
, emitVerbatim
, emitBlock

-- ** Inlining C code
-- $embedding
Expand Down Expand Up @@ -168,11 +169,16 @@ initialiseModuleState mbContext = do
Nothing -> fail "inline-c: ModuleState not present (initialiseModuleState)"
Just ms -> return ms
let lang = fromMaybe TH.LangC (ctxForeignSrcLang context)
addForeignSource =
#if MIN_VERSION_base(4,12,0)
TH.addForeignSource lang (concat (reverse (msFileChunks ms)))
TH.addForeignSource
#else
TH.addForeignFile lang (concat (reverse (msFileChunks ms)))
TH.addForeignFile
#endif
src = (concat (reverse (msFileChunks ms)))
case (lang, ctxRawObjectCompile context) of
(TH.RawObject, Just compile) -> compile src >>= TH.addForeignFilePath lang
(_, _) -> addForeignSource lang src
let moduleState = ModuleState
{ msContext = context
, msGeneratedNames = 0
Expand Down Expand Up @@ -234,6 +240,15 @@ emitVerbatim s = do
(ms{msFileChunks = chunk : msFileChunks ms}, ())
return []

-- | Simply appends some string of block to the module's C file. Use with care.
emitBlock :: TH.QuasiQuoter
emitBlock = TH.QuasiQuoter
{ TH.quoteExp = const $ fail "inline-c: quoteExp not implemented (quoteCode)"
, TH.quotePat = const $ fail "inline-c: quotePat not implemented (quoteCode)"
, TH.quoteType = const $ fail "inline-c: quoteType not implemented (quoteCode)"
, TH.quoteDec = emitVerbatim
}

------------------------------------------------------------------------
-- Inlining

Expand Down
Loading