diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 96ebc9192c708e..4ee0d62cb49d9a 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -160,9 +160,9 @@ effectiveStdenv.mkDerivation ( }; postPatch = '' - substituteInPlace ./ggml-metal.m \ + substituteInPlace ./ggml/src/ggml-metal.m \ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./ggml-metal.m \ + substituteInPlace ./ggml/src/ggml-metal.m \ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" ''; @@ -244,7 +244,7 @@ effectiveStdenv.mkDerivation ( # if they haven't been added yet. postInstall = '' mkdir -p $out/include - cp $src/llama.h $out/include/ + cp $src/include/llama.h $out/include/ ''; # Define the shells here, but don't add in the inputsFrom to avoid recursion. diff --git a/.github/labeler.yml b/.github/labeler.yml index 72c7959cc6ffea..9c0397d16e1a03 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -2,25 +2,25 @@ Kompute: - changed-files: - any-glob-to-any-file: - - ggml/src/ggml-kompute.h + - ggml/include/ggml-kompute.h - ggml/src/ggml-kompute.cpp - README-kompute.md Apple Metal: - changed-files: - any-glob-to-any-file: - - ggml/src/ggml-metal.h + - ggml/include/ggml-metal.h - ggml/src/ggml-metal.cpp - README-metal.md SYCL: - changed-files: - any-glob-to-any-file: - - ggml/src/ggml-sycl.h + - ggml/include/ggml-sycl.h - ggml/src/ggml-sycl.cpp - README-sycl.md Nvidia GPU: - changed-files: - any-glob-to-any-file: - - ggml/src/ggml-cuda.h + - ggml/include/ggml-cuda.h - ggml/src/ggml-cuda/** Vulkan: - changed-files: diff --git a/Makefile b/Makefile index 3fae21b02b2128..64a6e6ff00e5ae 100644 --- a/Makefile +++ b/Makefile @@ -680,7 +680,7 @@ ggml/src/ggml-cuda/%.o: \ ggml/src/ggml-cuda.o: \ ggml/src/ggml-cuda.cu \ - ggml/src/ggml-cuda.h \ + ggml/include/ggml-cuda.h \ ggml/include/ggml.h \ ggml/include/ggml-backend.h \ ggml/src/ggml-backend-impl.h \ @@ -716,7 +716,7 @@ endif ggml/src/ggml-vulkan.o: \ ggml/src/ggml-vulkan.cpp \ - ggml/src/ggml-vulkan.h + ggml/include/ggml-vulkan.h $(CXX) $(CXXFLAGS) -c $< -o $@ endif # GGML_VULKAN @@ -764,7 +764,7 @@ endif # GGML_CUDA_NO_PEER_COPY ggml/src/ggml-cuda.o: \ ggml/src/ggml-cuda.cu \ - ggml/src/ggml-cuda.h \ + ggml/include/ggml-cuda.h \ ggml/include/ggml.h \ ggml/include/ggml-backend.h \ ggml/src/ggml-backend-impl.h \ @@ -796,7 +796,7 @@ endif # GGML_METAL ifdef GGML_METAL ggml/src/ggml-metal.o: \ ggml/src/ggml-metal.m \ - ggml/src/ggml-metal.h \ + ggml/include/ggml-metal.h \ ggml/include/ggml.h $(CC) $(CFLAGS) -c $< -o $@ @@ -957,7 +957,7 @@ ggml/src/ggml-quants.o: \ ggml/src/ggml-blas.o: \ ggml/src/ggml-blas.cpp \ - ggml/src/ggml-blas.h + ggml/include/ggml-blas.h $(CXX) $(CXXFLAGS) -c $< -o $@ ifndef GGML_NO_LLAMAFILE @@ -971,7 +971,7 @@ endif # GGML_NO_LLAMAFILE ifdef GGML_RPC ggml/src/ggml-rpc.o: \ ggml/src/ggml-rpc.cpp \ - ggml/src/ggml-rpc.h + ggml/include/ggml-rpc.h $(CXX) $(CXXFLAGS) -c $< -o $@ endif # GGML_RPC @@ -999,8 +999,8 @@ src/llama.o: \ src/llama.cpp \ src/unicode.h \ include/llama.h \ - ggml/src/ggml-cuda.h \ - ggml/src/ggml-metal.h \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml-metal.h \ ggml/include/ggml.h \ ggml/include/ggml-alloc.h \ ggml/include/ggml-backend.h diff --git a/ggml/src/ggml-blas.h b/ggml/include/ggml-blas.h similarity index 100% rename from ggml/src/ggml-blas.h rename to ggml/include/ggml-blas.h diff --git a/ggml/src/ggml-cuda.h b/ggml/include/ggml-cuda.h similarity index 100% rename from ggml/src/ggml-cuda.h rename to ggml/include/ggml-cuda.h diff --git a/ggml/src/ggml-kompute.h b/ggml/include/ggml-kompute.h similarity index 100% rename from ggml/src/ggml-kompute.h rename to ggml/include/ggml-kompute.h diff --git a/ggml/src/ggml-metal.h b/ggml/include/ggml-metal.h similarity index 100% rename from ggml/src/ggml-metal.h rename to ggml/include/ggml-metal.h diff --git a/ggml/src/ggml-rpc.h b/ggml/include/ggml-rpc.h similarity index 100% rename from ggml/src/ggml-rpc.h rename to ggml/include/ggml-rpc.h diff --git a/ggml/src/ggml-sycl.h b/ggml/include/ggml-sycl.h similarity index 95% rename from ggml/src/ggml-sycl.h rename to ggml/include/ggml-sycl.h index 451938fc4151de..43ab1519cd05df 100644 --- a/ggml/src/ggml-sycl.h +++ b/ggml/include/ggml-sycl.h @@ -8,7 +8,9 @@ #include "ggml.h" #include "ggml-backend.h" -#include "ggml-sycl/presets.hpp" + +#define GGML_SYCL_NAME "SYCL" +#define GGML_SYCL_MAX_DEVICES 48 #ifdef __cplusplus extern "C" { diff --git a/ggml/src/ggml-vulkan.h b/ggml/include/ggml-vulkan.h similarity index 100% rename from ggml/src/ggml-vulkan.h rename to ggml/include/ggml-vulkan.h diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index a2f93fb88201b1..ba341d3749050b 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -47,7 +47,7 @@ if (GGML_METAL) find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) message(STATUS "Metal framework found") - set(GGML_HEADERS_METAL ggml-metal.h) + set(GGML_HEADERS_METAL ../include/ggml-metal.h) set(GGML_SOURCES_METAL ggml-metal.m) list(APPEND GGML_CDEF_PUBLIC GGML_USE_METAL) @@ -225,7 +225,7 @@ if (GGML_BLAS) add_compile_definitions(GGML_BLAS_USE_MKL) endif() - set(GGML_HEADERS_BLAS ggml-blas.h) + set(GGML_HEADERS_BLAS ../include/ggml-blas.h) set(GGML_SOURCES_BLAS ggml-blas.cpp) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES}) @@ -271,7 +271,7 @@ if (GGML_CUDA) enable_language(CUDA) file(GLOB GGML_HEADERS_CUDA "ggml-cuda/*.cuh") - list(APPEND GGML_HEADERS_CUDA "ggml-cuda.h") + list(APPEND GGML_HEADERS_CUDA "../include/ggml-cuda.h") file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") @@ -396,7 +396,7 @@ if (GGML_HIPBLAS) message(STATUS "HIP and hipBLAS found") file(GLOB GGML_HEADERS_ROCM "ggml-cuda/*.cuh") - list(APPEND GGML_HEADERS_ROCM "ggml-cuda.h") + list(APPEND GGML_HEADERS_ROCM "../include/ggml-cuda.h") file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu") list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu") @@ -489,7 +489,7 @@ if (GGML_SYCL) endif() file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp") - list(APPEND GGML_HEADERS_SYCL "ggml-sycl.h") + list(APPEND GGML_HEADERS_SYCL "../include/ggml-sycl.h") file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp") list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") @@ -517,7 +517,7 @@ if (GGML_RPC) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32) endif() - set(GGML_HEADERS_RPC ggml-rpc.h) + set(GGML_HEADERS_RPC ../include/ggml-rpc.h) set(GGML_SOURCES_RPC ggml-rpc.cpp) endif() @@ -527,7 +527,7 @@ if (GGML_VULKAN) if (Vulkan_FOUND) message(STATUS "Vulkan found") - set(GGML_HEADERS_VULKAN ggml-vulkan.h) + set(GGML_HEADERS_VULKAN ../include/ggml-vulkan.h) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) list(APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN) @@ -712,8 +712,8 @@ if (GGML_KOMPUTE) ) # Add the stamp to the main sources to ensure dependency tracking - set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + set(GGML_HEADERS_KOMPUTE ../include/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE) @@ -1155,8 +1155,9 @@ if (EMSCRIPTEN) endif() target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) -target_include_directories(ggml PUBLIC . ../include ${GGML_EXTRA_INCLUDES}) -target_compile_features (ggml PUBLIC c_std_11) # don't bump +target_include_directories(ggml PUBLIC ../include) +target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) +target_compile_features (ggml PRIVATE c_std_11) # don't bump target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) diff --git a/ggml/src/ggml-sycl.cpp b/ggml/src/ggml-sycl.cpp index db045336f1edb3..4a668a2c34d3ea 100644 --- a/ggml/src/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl.cpp @@ -37,6 +37,7 @@ #include "ggml-backend-impl.h" #include "ggml-sycl/backend.hpp" +#include "ggml-sycl/presets.hpp" bool ggml_sycl_loaded(void); void ggml_sycl_free_data(struct ggml_tensor * tensor); diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 414c37eed0d5da..e01f91633a4bff 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -17,6 +17,7 @@ #include #include "dpct/helper.hpp" +#include "ggml-sycl.h" #include "presets.hpp" #define GGML_COMMON_DECL_SYCL diff --git a/ggml/src/ggml-sycl/presets.hpp b/ggml/src/ggml-sycl/presets.hpp index 5e6b61813ab491..fe9d41770b76a4 100644 --- a/ggml/src/ggml-sycl/presets.hpp +++ b/ggml/src/ggml-sycl/presets.hpp @@ -15,8 +15,6 @@ #define GGML_SYCL_MAX_STREAMS 8 #define GGML_SYCL_MAX_BUFFERS 256 -#define GGML_SYCL_MAX_DEVICES 48 -#define GGML_SYCL_NAME "SYCL" #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 35a8f79c7257ad..35104e8e0cfe16 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -107,24 +107,24 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # src/ggml-common.h -> ggml/src/ggml-common.h # src/ggml-cuda/* -> ggml/src/ggml-cuda/ # src/ggml-cuda.cu -> ggml/src/ggml-cuda.cu - # src/ggml-cuda.h -> ggml/src/ggml-cuda.h # src/ggml-impl.h -> ggml/src/ggml-impl.h # src/ggml-kompute.cpp -> ggml/src/ggml-kompute.cpp - # src/ggml-kompute.h -> ggml/src/ggml-kompute.h - # src/ggml-metal.h -> ggml/src/ggml-metal.h # src/ggml-metal.m -> ggml/src/ggml-metal.m # src/ggml-quants.c -> ggml/src/ggml-quants.c # src/ggml-quants.h -> ggml/src/ggml-quants.h # src/ggml-rpc.cpp -> ggml/src/ggml-rpc.cpp - # src/ggml-rpc.h -> ggml/src/ggml-rpc.h # src/ggml-sycl.cpp -> ggml/src/ggml-sycl.cpp - # src/ggml-sycl.h -> ggml/src/ggml-sycl.h # src/ggml-vulkan.cpp -> ggml/src/ggml-vulkan.cpp - # src/ggml-vulkan.h -> ggml/src/ggml-vulkan.h # # include/ggml/ggml.h -> ggml/include/ggml.h # include/ggml/ggml-alloc.h -> ggml/include/ggml-alloc.h # include/ggml/ggml-backend.h -> ggml/include/ggml-backend.h + # include/ggml-cuda.h -> ggml/include/ggml-cuda.h + # include/ggml-kompute.h -> ggml/include/ggml-kompute.h + # include/ggml-metal.h -> ggml/include/ggml-metal.h + # include/ggml-rpc.h -> ggml/include/ggml-rpc.h + # include/ggml-sycl.h -> ggml/include/ggml-sycl.h + # include/ggml-vulkan.h -> ggml/include/ggml-vulkan.h # # tests/test-opt.cpp -> tests/test-opt.cpp # tests/test-grad0.cpp -> tests/test-grad0.cpp @@ -146,20 +146,20 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/src\/ggml-common\.h/ggml/src/ggml-common.h/g' \ -e 's/src\/ggml-cuda\//ggml-cuda\//g' \ -e 's/src\/ggml-cuda\.cu/ggml/src/ggml-cuda.cu/g' \ - -e 's/src\/ggml-cuda\.h/ggml/src/ggml-cuda.h/g' \ -e 's/src\/ggml-impl\.h/ggml/src/ggml-impl.h/g' \ -e 's/src\/ggml-kompute\.cpp/ggml/src/ggml-kompute.cpp/g' \ - -e 's/src\/ggml-kompute\.h/ggml/src/ggml-kompute.h/g' \ - -e 's/src\/ggml-metal\.h/ggml/src/ggml-metal.h/g' \ -e 's/src\/ggml-metal\.m/ggml/src/ggml-metal.m/g' \ -e 's/src\/ggml-quants\.c/ggml/src/ggml-quants.c/g' \ -e 's/src\/ggml-quants\.h/ggml/src/ggml-quants.h/g' \ -e 's/src\/ggml-rpc\.cpp/ggml/src/ggml-rpc.cpp/g' \ - -e 's/src\/ggml-rpc\.h/ggml/src/ggml-rpc.h/g' \ -e 's/src\/ggml-sycl\.cpp/ggml/src/ggml-sycl.cpp/g' \ - -e 's/src\/ggml-sycl\.h/ggml/src/ggml-sycl.h/g' \ -e 's/src\/ggml-vulkan\.cpp/ggml/src/ggml-vulkan.cpp/g' \ - -e 's/src\/ggml-vulkan\.h/ggml/src/ggml-vulkan.h/g' \ + -e 's/include\/ggml-cuda\.h/ggml/include/ggml-cuda.h/g' \ + -e 's/include\/ggml-kompute\.h/ggml/include/ggml-kompute.h/g' \ + -e 's/include\/ggml-metal\.h/ggml/include/ggml-metal.h/g' \ + -e 's/include\/ggml-rpc\.h/ggml/include/ggml-rpc.h/g' \ + -e 's/include\/ggml-sycl\.h/ggml/include/ggml-sycl.h/g' \ + -e 's/include\/ggml-vulkan\.h/ggml/include/ggml-vulkan.h/g' \ -e 's/include\/ggml\/ggml\.h/ggml/include/ggml.h/g' \ -e 's/include\/ggml\/ggml-alloc\.h/ggml/include/ggml-alloc.h/g' \ -e 's/include\/ggml\/ggml-backend\.h/ggml/include/ggml-backend.h/g' \ diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index b48771044225cb..927554de11106c 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -11,25 +11,25 @@ cp -rpv ../ggml/src/ggml-backend.c ./ggml/src/ggml-backend.c cp -rpv ../ggml/src/ggml-common.h ./ggml/src/ggml-common.h cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/ cp -rpv ../ggml/src/ggml-cuda.cu ./ggml/src/ggml-cuda.cu -cp -rpv ../ggml/src/ggml-cuda.h ./ggml/src/ggml-cuda.h cp -rpv ../ggml/src/ggml-impl.h ./ggml/src/ggml-impl.h cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml/src/ggml-kompute.cpp -cp -rpv ../ggml/src/ggml-kompute.h ./ggml/src/ggml-kompute.h -cp -rpv ../ggml/src/ggml-metal.h ./ggml/src/ggml-metal.h cp -rpv ../ggml/src/ggml-metal.m ./ggml/src/ggml-metal.m cp -rpv ../ggml/src/ggml-metal.metal ./ggml/src/ggml-metal.metal cp -rpv ../ggml/src/ggml-quants.c ./ggml/src/ggml-quants.c cp -rpv ../ggml/src/ggml-quants.h ./ggml/src/ggml-quants.h cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml/src/ggml-rpc.cpp -cp -rpv ../ggml/src/ggml-rpc.h ./ggml/src/ggml-rpc.h cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml/src/ggml-sycl.cpp -cp -rpv ../ggml/src/ggml-sycl.h ./ggml/src/ggml-sycl.h cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml/src/ggml-vulkan.cpp -cp -rpv ../ggml/src/ggml-vulkan.h ./ggml/src/ggml-vulkan.h cp -rpv ../ggml/include/ggml.h ./ggml/include/ggml.h cp -rpv ../ggml/include/ggml-alloc.h ./ggml/include/ggml-alloc.h cp -rpv ../ggml/include/ggml-backend.h ./ggml/include/ggml-backend.h +cp -rpv ../ggml/include/ggml-cuda.h ./ggml/include/ggml-cuda.h +cp -rpv ../ggml/include/ggml-kompute.h ./ggml/include/ggml-kompute.h +cp -rpv ../ggml/include/ggml-metal.h ./ggml/include/ggml-metal.h +cp -rpv ../ggml/include/ggml-rpc.h ./ggml/include/ggml-rpc.h +cp -rpv ../ggml/include/ggml-sycl.h ./ggml/include/ggml-sycl.h +cp -rpv ../ggml/include/ggml-vulkan.h ./ggml/include/ggml-vulkan.h cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp diff --git a/spm-headers/ggml-metal.h b/spm-headers/ggml-metal.h index e0f107b124cf30..aefad5fa04ced7 120000 --- a/spm-headers/ggml-metal.h +++ b/spm-headers/ggml-metal.h @@ -1 +1 @@ -../ggml/src/ggml-metal.h \ No newline at end of file +../ggml/include/ggml-metal.h \ No newline at end of file diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 1ed74e543dd9f9..f74c0db475e2e2 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include