antirez · Lourdle · Nov 3, 2024 · Nov 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,6 @@
 gguf-tools
+out
+build
+.vs
+.vscode
+CMakeSettings.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,4 @@
+cmake_minimum_required(VERSION 3.5)
+project(gguf-tools)
+set(SRC_LIST gguf-tools.c gguflib.c sds.c fp16.c)
+add_executable(gguf-tools ${SRC_LIST})
diff --git a/Makefile b/Makefile
diff --git a/gguf-tools.c b/gguf-tools.c
@@ -11,6 +11,26 @@
 #include "sds.h"
 #include "fp16.h"
 
+#ifdef _WIN32
+#include <Windows.h>
+
+static void win_perror(const char* s) {
+    if (errno != 0) {
+        perror(s);
+        return;
+    }
+
+    char* msg;
+    FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+        NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+        (LPSTR)&msg, 0, NULL);
+    fprintf(stderr, "%s: %s", s, msg);
+    LocalFree(msg);
+}
+
+#define perror win_perror
+#endif
+
 /* Global options that can could be used for all the subcommands. */
 struct {
     int verbose;        // --verbose option
@@ -19,7 +39,7 @@ struct {
 /* ========================== Utility functions  ============================ */
 
 /* Glob-style pattern matching. Return 1 on match, 0 otherwise. */
-int strmatch(const char *pattern, int patternLen,
+static int strmatch(const char *pattern, int patternLen,
              const char *string, int stringLen, int nocase)
 {
     while(patternLen && stringLen) {
@@ -141,7 +161,7 @@ int strmatch(const char *pattern, int patternLen,
 
 /* ========================== 'show' subcommand ============================= */
 
-void gguf_tools_show(const char *filename) {
+static void gguf_tools_show(const char *filename) {
     gguf_ctx *ctx = gguf_open(filename);
     if (ctx == NULL) {
         perror(filename);
@@ -190,7 +210,7 @@ void gguf_tools_show(const char *filename) {
 /* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
  * on the weights of the experts with IDs in the array of 'experts_id'.
  * The array must contain 32 integers, one for each layer. */
-void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
+static void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
     gguf_ctx *mixtral = gguf_open(mixtral_filename);
     if (mixtral == NULL) {
         perror(mixtral_filename);
@@ -331,7 +351,7 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con
 
 /* ====================== 'inspect-weights' subcommand ====================== */
 
-void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
+static void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
     gguf_ctx *ctx = gguf_open(filename);
     if (ctx == NULL) {
         perror(filename);
@@ -421,7 +441,7 @@ void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_
  *
  * Returns 1 on success, 0 if one or both the provided tensors can't be
  * dequantized. */
-int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
+static int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
     float *weights1 = gguf_tensor_to_float(t1);
     float *weights2 = gguf_tensor_to_float(t2);
     if (weights1 == NULL || weights2 == NULL) {
@@ -453,7 +473,7 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
     return 1;
 }
 
-void gguf_tools_compare(const char *file1, const char *file2) {
+static void gguf_tools_compare(const char *file1, const char *file2) {
     gguf_ctx *ctx1 = gguf_open(file1);
     if (ctx1 == NULL) {
         perror(file1);
@@ -498,7 +518,7 @@ void gguf_tools_compare(const char *file1, const char *file2) {
 
 /* ======================= Main and CLI options parsing ===================== */
 
-void gguf_tools_usage(const char *progname) {
+static void gguf_tools_usage(const char *progname) {
     printf("Usage: %s <subcommand> [arguments...] [options...]\n"
 "Subcommands:\n"
 "  show <filename> -- show GGUF model keys and tensors.\n"

diff --git a/gguflib.c b/gguflib.c
@@ -1,15 +1,23 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#ifndef _WIN32
 #include <sys/mman.h>
-#include <fcntl.h>
+#include <unistd.h>
 #include <sys/stat.h>
+#endif
+#include <fcntl.h>
 #include <errno.h>
-#include <unistd.h>
 #include <string.h>
 #include <assert.h>
 #include <inttypes.h>
 
+#ifdef _WIN32
+#include <Windows.h>
+
+typedef UINT_PTR ssize_t;
+#endif
+
 #include "gguflib.h"
 #include "fp16.h"
 #include "bf16.h"
@@ -108,18 +116,34 @@ uint64_t gguf_value_len(uint32_t type, union gguf_value *val) {
 }
 
 /* =============================== GGUF file API ============================ */
-
 /* Open a GGUF file and return a parsing context. */
-gguf_ctx *gguf_open(const char *filename) {
-    int fd = open(filename,O_RDWR|O_APPEND);
+gguf_ctx*gguf_open(const char *filename) {
+#ifdef _WIN32
+    HANDLE fd = CreateFileA(filename, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (fd == INVALID_HANDLE_VALUE) return NULL;
+#else
+    int fd = open(filename, O_RDWR | O_APPEND);
     if (fd == -1) return NULL;
+#endif
 
     /* Mapping successful. We can create our context object. */
-    gguf_ctx *ctx = calloc(1, sizeof(*ctx));
+    gguf_ctx*ctx = calloc(1, sizeof(*ctx));
     if (!ctx) return NULL;
     ctx->fd = fd;
     ctx->alignment = 32; // Default alignment of GGUF files.
     ctx->data_off = 0;   // Set later.
+
+#ifdef _WIN32
+    /* We must create file mapping object under Windows. */
+    HANDLE mapping = CreateFileMappingA(fd, NULL, PAGE_READWRITE, 0, 0, NULL);
+    if (mapping == NULL) {
+        CloseHandle(fd);
+        free(ctx);
+        return 0;
+    }
+    ctx->mapping = mapping;
+#endif
+
     if (gguf_remap(ctx) == 0) {
         gguf_close(ctx);
         return NULL;
@@ -146,6 +170,7 @@ void gguf_rewind(gguf_ctx *ctx) {
  *
  * Return 1 on success, 0 on error. */
 int gguf_remap(gguf_ctx *ctx) {
+#ifndef _WIN32
     struct stat sb;
 
     /* Unmap if the file was already memory mapped. */
@@ -159,23 +184,50 @@ int gguf_remap(gguf_ctx *ctx) {
 
     /* Minimal sanity check... */
     if (sb.st_size < (signed)sizeof(struct gguf_header) ||
-        memcmp(mapped,"GGUF",4) != 0)
+        memcmp(mapped, "GGUF", 4) != 0)
     {
         errno = EINVAL;
         return 0;
     }
+    ctx->size = sb.st_size;
+#else
+    if (ctx->data) UnmapViewOfFile(ctx->data);
+
+    /* Get the size of the file. */
+    LARGE_INTEGER size;
+    if (!GetFileSizeEx(ctx->fd, &size)) return 0;
+
+    /* Map the file by the handle to the file mapping object. */
+    LPVOID mapped = MapViewOfFile(ctx->mapping, FILE_MAP_ALL_ACCESS, 0, 0, size.QuadPart);
+    if (mapped == NULL) return 0;
+
+    if (size.QuadPart < (signed)sizeof(struct gguf_header) ||
+        memcmp(mapped, "GGUF", 4) != 0)
+    {
+        errno = EINVAL;
+        return 0;
+    }
+    ctx->size = size.QuadPart;
+#endif
+
     ctx->data = mapped;
     ctx->header = mapped;
-    ctx->size = sb.st_size;
     return 1;
 }
 
 /* Cleanup needed after gguf_open() and gguf_create(), to terminate the context
  * and cleanup resources. */
 void gguf_close(gguf_ctx *ctx) {
     if (ctx == NULL) return;
+#ifndef _WIN32
     if (ctx->data) munmap(ctx->data,ctx->size);
     close(ctx->fd);
+#else
+    if (ctx->data) UnmapViewOfFile(ctx->data);
+    /* Don't forget to close the handle to the file mapping object to destory this kernel object. */
+    CloseHandle(ctx->mapping);
+    CloseHandle(ctx->fd);
+#endif
     free(ctx);
 }
 
@@ -222,7 +274,7 @@ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) {
  * all the key-values are consumed, in the context of the first call of
  * gguf_get_tensor(): this way we will be able to return tensor offsets
  * as absolute positions and pointers to the mmapped file. */
-void gguf_set_data_offset(gguf_ctx *ctx) {
+static void gguf_set_data_offset(gguf_ctx *ctx) {
     assert(ctx->left_kv == 0 && ctx->left_tensors == ctx->header->tensor_count);
 
     uint64_t offset = ctx->off;
@@ -373,7 +425,7 @@ struct gguf_print_options {
  * may be NULL if no options are provided.
  *
  * The function is designed to be used as a callback of gguf_do_with_value(). */
-void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
+static void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
     struct gguf_print_options *po = privdata;
     if (po && po->max_array_items && in_array > po->max_array_items) {
         if (in_array-1 == po->max_array_items)
@@ -525,20 +577,20 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) {
 typedef void (*store_float_callback)(void *dst, uint64_t idx, float f);
 
 /* Callback used to store F16 when dequantizing. */
-void gguf_store_f16_callback(void *dst, uint64_t idx, float f) {
+static void gguf_store_f16_callback(void *dst, uint64_t idx, float f) {
     uint16_t *f16 = dst;
     f16[idx] = to_half(f);
 }
 
 /* Callback used to store BF16 when dequantizing. */
-void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) {
+static void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) {
     uint16_t *f16 = dst;
     f16[idx] = to_brain(f);
 }
 
 /* Q8_0 blocks dequantization to floats.
  * 'dst' is supposed to have enough space for 'count' weights. */
-void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     struct gguf_tensor_type_features *tf =
         gguf_get_tensor_type_features(GGUF_TYPE_Q8_0);
@@ -565,7 +617,7 @@ void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 
 /* Q4_K blocks dequantization to floats.
  * 'y' is supposed to have enough space for 'count' weights. */
-void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     uint8_t *block = weights_data;
     uint64_t i = 0; // i-th weight to dequantize.
@@ -655,7 +707,7 @@ void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 
 /* Q6_K blocks dequantization to floats.
  * 'y' is supposed to have enough space for 'count' weights. */
-void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     uint8_t *block = weights_data;
     uint64_t i = 0; // i-th weight to dequantize.
@@ -735,7 +787,7 @@ void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 
 /* Q2_K blocks dequantization to floats.
  * 'y' is supposed to have enough space for 'count' weights. */
-void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     uint8_t *block = weights_data;
     uint64_t i = 0; // i-th weight to dequantize.
@@ -800,7 +852,7 @@ void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 
 /* Q4_0 blocks dequantization to floats.
  * 'dst' is supposed to have enough space for 'count' weights. */
-void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     struct gguf_tensor_type_features *tf =
         gguf_get_tensor_type_features(GGUF_TYPE_Q4_0);
@@ -841,7 +893,7 @@ void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 
 /* Q4_1 blocks dequantization to floats.
  * 'dst' is supposed to have enough space for 'count' weights. */
-void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
     float *f = dst;
     struct gguf_tensor_type_features *tf =
         gguf_get_tensor_type_features(GGUF_TYPE_Q4_1);

diff --git a/gguflib.h b/gguflib.h
@@ -10,6 +10,7 @@
 
 #include <stdint.h>
 
+
 /* ============================ Enums and structures ======================== */
 
 /* Flags that can be used in different functions with the same meaning. */
@@ -110,13 +111,22 @@ union gguf_value {
     double float64;
     uint8_t boolval;
     struct gguf_string string;
+
+#ifdef _MSC_VER
+#pragma pack(push, 1)
+#define __attribute__(x)
+#endif
     struct {
         // Any value type is valid, including arrays.
         uint32_t type;
         // Number of elements, not bytes
         uint64_t len;
         // The array of values follow...
     } __attribute__((packed)) array;
+#ifdef _MSC_VER
+#pragma pack(pop)
+#undef __attribute__
+#endif
 };
 
 // Header
@@ -159,7 +169,12 @@ typedef struct {
 
 /* The context you get after opening a GGUF file with gguf_init(). */
 typedef struct {
+#ifdef _WIN32
+	void* fd;
+    void* mapping;
+#else
     int fd;
+#endif
     uint8_t *data;  // Memory mapped data.
     uint64_t size;  // Total file size.
     struct gguf_header *header;     // GUFF file header info.