Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for Windows and Visual Studio and replace Makefile with CMake #17

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
gguf-tools
out
build
.vs
.vscode
CMakeSettings.json
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cmake_minimum_required(VERSION 3.5)
project(gguf-tools)
set(SRC_LIST gguf-tools.c gguflib.c sds.c fp16.c)
add_executable(gguf-tools ${SRC_LIST})
9 changes: 0 additions & 9 deletions Makefile

This file was deleted.

34 changes: 27 additions & 7 deletions gguf-tools.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,26 @@
#include "sds.h"
#include "fp16.h"

#ifdef _WIN32
#include <Windows.h>

static void win_perror(const char* s) {
if (errno != 0) {
perror(s);
return;
}

char* msg;
FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPSTR)&msg, 0, NULL);
fprintf(stderr, "%s: %s", s, msg);
LocalFree(msg);
}

#define perror win_perror
#endif

/* Global options that can could be used for all the subcommands. */
struct {
int verbose; // --verbose option
Expand All @@ -19,7 +39,7 @@ struct {
/* ========================== Utility functions ============================ */

/* Glob-style pattern matching. Return 1 on match, 0 otherwise. */
int strmatch(const char *pattern, int patternLen,
static int strmatch(const char *pattern, int patternLen,
const char *string, int stringLen, int nocase)
{
while(patternLen && stringLen) {
Expand Down Expand Up @@ -141,7 +161,7 @@ int strmatch(const char *pattern, int patternLen,

/* ========================== 'show' subcommand ============================= */

void gguf_tools_show(const char *filename) {
static void gguf_tools_show(const char *filename) {
gguf_ctx *ctx = gguf_open(filename);
if (ctx == NULL) {
perror(filename);
Expand Down Expand Up @@ -190,7 +210,7 @@ void gguf_tools_show(const char *filename) {
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
* on the weights of the experts with IDs in the array of 'experts_id'.
* The array must contain 32 integers, one for each layer. */
void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
static void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
gguf_ctx *mixtral = gguf_open(mixtral_filename);
if (mixtral == NULL) {
perror(mixtral_filename);
Expand Down Expand Up @@ -331,7 +351,7 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con

/* ====================== 'inspect-weights' subcommand ====================== */

void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
static void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
gguf_ctx *ctx = gguf_open(filename);
if (ctx == NULL) {
perror(filename);
Expand Down Expand Up @@ -421,7 +441,7 @@ void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_
*
* Returns 1 on success, 0 if one or both the provided tensors can't be
* dequantized. */
int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
static int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
float *weights1 = gguf_tensor_to_float(t1);
float *weights2 = gguf_tensor_to_float(t2);
if (weights1 == NULL || weights2 == NULL) {
Expand Down Expand Up @@ -453,7 +473,7 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
return 1;
}

void gguf_tools_compare(const char *file1, const char *file2) {
static void gguf_tools_compare(const char *file1, const char *file2) {
gguf_ctx *ctx1 = gguf_open(file1);
if (ctx1 == NULL) {
perror(file1);
Expand Down Expand Up @@ -498,7 +518,7 @@ void gguf_tools_compare(const char *file1, const char *file2) {

/* ======================= Main and CLI options parsing ===================== */

void gguf_tools_usage(const char *progname) {
static void gguf_tools_usage(const char *progname) {
printf("Usage: %s <subcommand> [arguments...] [options...]\n"
"Subcommands:\n"
" show <filename> -- show GGUF model keys and tensors.\n"
Expand Down
88 changes: 70 additions & 18 deletions gguflib.c
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifndef _WIN32
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#endif
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <inttypes.h>

#ifdef _WIN32
#include <Windows.h>

typedef UINT_PTR ssize_t;
#endif

#include "gguflib.h"
#include "fp16.h"
#include "bf16.h"
Expand Down Expand Up @@ -108,18 +116,34 @@ uint64_t gguf_value_len(uint32_t type, union gguf_value *val) {
}

/* =============================== GGUF file API ============================ */

/* Open a GGUF file and return a parsing context. */
gguf_ctx *gguf_open(const char *filename) {
int fd = open(filename,O_RDWR|O_APPEND);
gguf_ctx*gguf_open(const char *filename) {
#ifdef _WIN32
HANDLE fd = CreateFileA(filename, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (fd == INVALID_HANDLE_VALUE) return NULL;
#else
int fd = open(filename, O_RDWR | O_APPEND);
if (fd == -1) return NULL;
#endif

/* Mapping successful. We can create our context object. */
gguf_ctx *ctx = calloc(1, sizeof(*ctx));
gguf_ctx*ctx = calloc(1, sizeof(*ctx));
if (!ctx) return NULL;
ctx->fd = fd;
ctx->alignment = 32; // Default alignment of GGUF files.
ctx->data_off = 0; // Set later.

#ifdef _WIN32
/* We must create file mapping object under Windows. */
HANDLE mapping = CreateFileMappingA(fd, NULL, PAGE_READWRITE, 0, 0, NULL);
if (mapping == NULL) {
CloseHandle(fd);
free(ctx);
return 0;
}
ctx->mapping = mapping;
#endif

if (gguf_remap(ctx) == 0) {
gguf_close(ctx);
return NULL;
Expand All @@ -146,6 +170,7 @@ void gguf_rewind(gguf_ctx *ctx) {
*
* Return 1 on success, 0 on error. */
int gguf_remap(gguf_ctx *ctx) {
#ifndef _WIN32
struct stat sb;

/* Unmap if the file was already memory mapped. */
Expand All @@ -159,23 +184,50 @@ int gguf_remap(gguf_ctx *ctx) {

/* Minimal sanity check... */
if (sb.st_size < (signed)sizeof(struct gguf_header) ||
memcmp(mapped,"GGUF",4) != 0)
memcmp(mapped, "GGUF", 4) != 0)
{
errno = EINVAL;
return 0;
}
ctx->size = sb.st_size;
#else
if (ctx->data) UnmapViewOfFile(ctx->data);

/* Get the size of the file. */
LARGE_INTEGER size;
if (!GetFileSizeEx(ctx->fd, &size)) return 0;

/* Map the file by the handle to the file mapping object. */
LPVOID mapped = MapViewOfFile(ctx->mapping, FILE_MAP_ALL_ACCESS, 0, 0, size.QuadPart);
if (mapped == NULL) return 0;

if (size.QuadPart < (signed)sizeof(struct gguf_header) ||
memcmp(mapped, "GGUF", 4) != 0)
{
errno = EINVAL;
return 0;
}
ctx->size = size.QuadPart;
#endif

ctx->data = mapped;
ctx->header = mapped;
ctx->size = sb.st_size;
return 1;
}

/* Cleanup needed after gguf_open() and gguf_create(), to terminate the context
* and cleanup resources. */
void gguf_close(gguf_ctx *ctx) {
if (ctx == NULL) return;
#ifndef _WIN32
if (ctx->data) munmap(ctx->data,ctx->size);
close(ctx->fd);
#else
if (ctx->data) UnmapViewOfFile(ctx->data);
/* Don't forget to close the handle to the file mapping object to destory this kernel object. */
CloseHandle(ctx->mapping);
CloseHandle(ctx->fd);
#endif
free(ctx);
}

Expand Down Expand Up @@ -222,7 +274,7 @@ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) {
* all the key-values are consumed, in the context of the first call of
* gguf_get_tensor(): this way we will be able to return tensor offsets
* as absolute positions and pointers to the mmapped file. */
void gguf_set_data_offset(gguf_ctx *ctx) {
static void gguf_set_data_offset(gguf_ctx *ctx) {
assert(ctx->left_kv == 0 && ctx->left_tensors == ctx->header->tensor_count);

uint64_t offset = ctx->off;
Expand Down Expand Up @@ -373,7 +425,7 @@ struct gguf_print_options {
* may be NULL if no options are provided.
*
* The function is designed to be used as a callback of gguf_do_with_value(). */
void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
static void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
struct gguf_print_options *po = privdata;
if (po && po->max_array_items && in_array > po->max_array_items) {
if (in_array-1 == po->max_array_items)
Expand Down Expand Up @@ -525,20 +577,20 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) {
typedef void (*store_float_callback)(void *dst, uint64_t idx, float f);

/* Callback used to store F16 when dequantizing. */
void gguf_store_f16_callback(void *dst, uint64_t idx, float f) {
static void gguf_store_f16_callback(void *dst, uint64_t idx, float f) {
uint16_t *f16 = dst;
f16[idx] = to_half(f);
}

/* Callback used to store BF16 when dequantizing. */
void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) {
static void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) {
uint16_t *f16 = dst;
f16[idx] = to_brain(f);
}

/* Q8_0 blocks dequantization to floats.
* 'dst' is supposed to have enough space for 'count' weights. */
void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
struct gguf_tensor_type_features *tf =
gguf_get_tensor_type_features(GGUF_TYPE_Q8_0);
Expand All @@ -565,7 +617,7 @@ void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo

/* Q4_K blocks dequantization to floats.
* 'y' is supposed to have enough space for 'count' weights. */
void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
uint8_t *block = weights_data;
uint64_t i = 0; // i-th weight to dequantize.
Expand Down Expand Up @@ -655,7 +707,7 @@ void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo

/* Q6_K blocks dequantization to floats.
* 'y' is supposed to have enough space for 'count' weights. */
void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
uint8_t *block = weights_data;
uint64_t i = 0; // i-th weight to dequantize.
Expand Down Expand Up @@ -735,7 +787,7 @@ void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo

/* Q2_K blocks dequantization to floats.
* 'y' is supposed to have enough space for 'count' weights. */
void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
uint8_t *block = weights_data;
uint64_t i = 0; // i-th weight to dequantize.
Expand Down Expand Up @@ -800,7 +852,7 @@ void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo

/* Q4_0 blocks dequantization to floats.
* 'dst' is supposed to have enough space for 'count' weights. */
void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
struct gguf_tensor_type_features *tf =
gguf_get_tensor_type_features(GGUF_TYPE_Q4_0);
Expand Down Expand Up @@ -841,7 +893,7 @@ void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo

/* Q4_1 blocks dequantization to floats.
* 'dst' is supposed to have enough space for 'count' weights. */
void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
static void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
float *f = dst;
struct gguf_tensor_type_features *tf =
gguf_get_tensor_type_features(GGUF_TYPE_Q4_1);
Expand Down
15 changes: 15 additions & 0 deletions gguflib.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <stdint.h>


/* ============================ Enums and structures ======================== */

/* Flags that can be used in different functions with the same meaning. */
Expand Down Expand Up @@ -110,13 +111,22 @@ union gguf_value {
double float64;
uint8_t boolval;
struct gguf_string string;

#ifdef _MSC_VER
#pragma pack(push, 1)
#define __attribute__(x)
#endif
struct {
// Any value type is valid, including arrays.
uint32_t type;
// Number of elements, not bytes
uint64_t len;
// The array of values follow...
} __attribute__((packed)) array;
#ifdef _MSC_VER
#pragma pack(pop)
#undef __attribute__
#endif
};

// Header
Expand Down Expand Up @@ -159,7 +169,12 @@ typedef struct {

/* The context you get after opening a GGUF file with gguf_init(). */
typedef struct {
#ifdef _WIN32
void* fd;
void* mapping;
#else
int fd;
#endif
uint8_t *data; // Memory mapped data.
uint64_t size; // Total file size.
struct gguf_header *header; // GUFF file header info.
Expand Down
Loading