Update torch package to 2.3.0.dev20240311+cu121

countzero · Mar 12, 2024 · 55f3821 · 55f3821
1 parent 24304bf
commit 55f3821
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 61 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,116 +4,125 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.13.0] - 2024-03-12
+
+### Added
+- [Server] Add -port option
+- [Build] Add list of installed python packages
+
+### Changed
+- [Build] Update "torch" package to 2.3.0.dev20240311+cu121
+
 ## [1.12.0] - 2024-03-01
 
 ### Added
-- Add fallback for empty head_count_kv values
-- Add fallback if model details could not be read by gguf-dump.py
+- [Server] Add fallback for empty head_count_kv values
+- [Server] Add fallback if model details could not be read by gguf-dump.py
 
 ## [1.11.0] - 2024-02-20
 
 ### Added
-- Add filename of the model path as an alias
-- Add support for self extending the context window (SelfExtend)
+- [Server] Add filename of the model path as an alias
+- [Server] Add support for self extending the context window (SelfExtend)
 
 ## [1.10.0] - 2024-02-19
 
 ### Added
-- Add automatic calculation of numberOfGPULayers option
-- Add formatted output of computed memory details
+- [Server] Add automatic calculation of numberOfGPULayers option
+- [Server] Add formatted output of computed memory details
 
 ### Fixed
-- Fix numberOfGPULayers option override
+- [Server] Fix numberOfGPULayers option override
 
 ## [1.9.0] - 2024-02-11
 
 ### Added
-- Add contextSize option
-- Add numberOfGPULayers option
+- [Server] Add contextSize option
+- [Server] Add numberOfGPULayers option
 
 ## [1.8.0] - 2024-01-31
 
 ### Added
-- Add parallel option
-- Add support for executing the server example script from any directory
+- [Server] Add parallel option
+- [Server] Add support for executing the server example script from any directory
 
 ## [1.7.0] - 2024-01-29
 
 ### Added
-- Add listing available models if model path is missing
-- Add KV cache placeholder
-- Add polling for server before starting the browser
-- Add maximum of 10 parallel job executions
+- [Server] Add listing available models if model path is missing
+- [Server] Add KV cache placeholder
+- [Server] Add polling for server before starting the browser
+- [Server] Add maximum of 10 parallel job executions
 
 ## [1.6.0] - 2024-01-25
 
 ### Added
-- Add automatic NVIDIA GPU detection in the build context
+- [Build] Add automatic NVIDIA GPU detection in the build context
 
 ### Changed
-- Replace all server examples with one generic server.ps1 script
-- Update OpenBLAS to v0.3.26
+- [Server] Replace all server examples with one generic server.ps1 script
+- [Build] Update OpenBLAS to v0.3.26
 
 ### Fixed
-- Fix python requirements installation
+- [Build] Fix python requirements installation
 
 ## [1.5.0] - 2023-09-28
 
 ### Added
-- Add Falcon 180B convert script
-- Add additional convert requirements for Falcon models
-- Add example for Falcon 40B model
-- Add example for FashionGPT 70B model
-- Add example for Llama 2 7B model
-- Add example for Llama 2 13B model
-- Add example for Upstage Llama 2 70B
-- Add example for Phind CodeLlama 34B model
-- Add example for Phind CodeLlama 34B model with 16k context
-- Add example for Phind CodeLlama 34B model with 32k context
-- Add example for WizardCoder 15B model
-- Add example for Mistral 7B model
-- Add prompt to chat with Llama 2
+- [Build] Add Falcon 180B convert script
+- [Build] Add additional convert requirements for Falcon models
+- [Server] Add example for Falcon 40B model
+- [Server] Add example for FashionGPT 70B model
+- [Server] Add example for Llama 2 7B model
+- [Server] Add example for Llama 2 13B model
+- [Server] Add example for Upstage Llama 2 70B
+- [Server] Add example for Phind CodeLlama 34B model
+- [Server] Add example for Phind CodeLlama 34B model with 16k context
+- [Server] Add example for Phind CodeLlama 34B model with 32k context
+- [Server] Add example for WizardCoder 15B model
+- [Server] Add example for Mistral 7B model
+- [Prompt] Add prompt to chat with Llama 2
 
 ## [1.4.0] - 2023-09-01
 
 ### Added
-- Add german language prompt
-- Add JSON grammar with floating point numbers support
-- Add RoPE parameter to documentation
-- Add JSON response to documentation
-- Add version parameter to documentation
-- Add prompt cache to documentation
-- Add enabling of Hardware Accelerated GPU Scheduling to documentation
+- [Prompt] Add german language prompt
+- [Grammar] Add JSON grammar with floating point numbers support
+- [Documentation] Add RoPE parameter to documentation
+- [Documentation] Add JSON response to documentation
+- [Documentation] Add version parameter to documentation
+- [Documentation] Add prompt cache to documentation
+- [Documentation] Add enabling of Hardware Accelerated GPU Scheduling to documentation
 
 ### Fixed
-- Fix python requirements installation
+- [Build] Fix python requirements installation
 
 ## [1.3.0] - 2023-07-13
 
 ### Added
-- Add optional version parameter
-- Add console output and execution duration
+- [Build] Add optional version parameter
+- [Build] Add console output and execution duration
 
 ### Changed
-- Default llama.cpp version to latest release tag
+- [Build] Default llama.cpp version to latest release tag
 
 ## [1.2.0] - 2023-07-06
 
 ### Added
-- Add server example to the build
-- Add documentation on how to use the webinterface
+- [Build] Add server example to the build
+- [Build] Add documentation on how to use the webinterface
 
 ### Fixed
-- Fix automatic update of the submodules
+- [Build] Fix automatic update of the submodules
 
 ## [1.1.0] - 2023-07-03
 
 ### Added
-- Add dataset "wikitext-2-raw-v1"
-- Add documentation on how to measure model perplexity
+- [Build] Add dataset "wikitext-2-raw-v1"
+- [Build] Add documentation on how to measure model perplexity
 
 ## [1.0.0] - 2023-06-28
 
 ### Added
-- OpenBLAS workaround for Windows
-- Rebuild script
+- [Build] OpenBLAS workaround for Windows
+- [Build] Rebuild script
diff --git a/examples/server.ps1 b/examples/server.ps1
@@ -32,7 +32,7 @@ Specifies the models context length it was trained on.
 .\server.ps1 -model "C:\models\openchat-3.5-0106.Q5_K_M.gguf" -contextSize 4096 -numberOfGPULayers 10
 
 .EXAMPLE
-.\examples\server.ps1 -model ".\vendor\llama.cpp\models\openchat-3.5-0106.Q5_K_M.gguf"
+.\server.ps1 -model "C:\models\openchat-3.5-0106.Q5_K_M.gguf" -port 8081
 #>
 
 Param (
@@ -62,6 +62,12 @@ Param (
     [Int]
     $numberOfGPULayers=-1,
 
+    [Parameter(
+        HelpMessage="The server port."
+    )]
+    [Int]
+    $port=8080,
+
     [Parameter(
         HelpMessage="Specifies the models context length it was trained on."
     )]
@@ -225,13 +231,13 @@ if ($contextSize -gt $modelContextLength) {
     $groupAttentionWidth = $modelContextLength / 2
 }
 
-Write-Host "Waiting for server to start Chrome in incognito mode at http://127.0.0.1:8080..." -ForegroundColor "Yellow"
+Write-Host "Waiting for server to start Chrome in incognito mode at http://127.0.0.1:${port}..." -ForegroundColor "Yellow"
 
 Get-Job -Name 'BrowserJob' -ErrorAction SilentlyContinue | Remove-Job -Force -ErrorAction SilentlyContinue
 Start-Job -Name 'BrowserJob' -ScriptBlock {
     do { Start-Sleep -Milliseconds 1000 }
-    while((curl.exe -s -o /dev/null -I -w '%{http_code}' 'http://127.0.0.1:8080') -ne 200)
-    Start-Process 'chrome' -ArgumentList '--incognito --new-window http://127.0.0.1:8080'
+    while((curl.exe -s -o /dev/null -I -w '%{http_code}' "http://127.0.0.1:${port}") -ne 200)
+    Start-Process 'chrome' -ArgumentList "--incognito --new-window http://127.0.0.1:${port}"
 } | Format-List -Property Id, Name, State, Command | Out-String | ForEach-Object { $_.Trim("`r","`n") }
 
 Write-Host "Starting llama.cpp server with custom options..." -ForegroundColor "Yellow"
@@ -247,6 +253,7 @@ Write-Host "Starting llama.cpp server with custom options..." -ForegroundColor "
 
 Invoke-Expression "${llamaCppPath}\build\bin\Release\server ``
     --log-disable ``
+    --port '${port}' ``
     --model '${model}' ``
     --alias '${alias}' ``
     --ctx-size '${contextSize}' ``

diff --git a/rebuild_llama.cpp.ps1 b/rebuild_llama.cpp.ps1
@@ -168,13 +168,15 @@ Set-Location -Path "../"
 
 conda activate llama.cpp
 
-# We are making sure to always use the latest version of the "gguf" package.
+# We are installing the latest version of the dependencies.
 pip install --ignore-installed -r ./requirements.txt
 
 Set-Location -Path "../../"
 
-# We want to install specific versions of some packages to avoid unexpected behaviour.
-pip install -r ./requirements.txt
+# We are enforcing specific versions on some packages.
+pip install --force-reinstall -r ./requirements.txt
+
+conda list
 
 $stopwatch.Stop()
 $durationInSeconds = [Math]::Floor([Decimal]($stopwatch.Elapsed.TotalSeconds))

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,4 @@
 # We are using a specific version of the "torch"
 # package which supports a specific CUDA version.
 --extra-index-url https://download.pytorch.org/whl/nightly/cu121
-torch==2.3.0.dev20240110+cu121
-
-transformers==4.36.2
+torch==2.3.0.dev20240311+cu121
diff --git a/vendor/llama.cpp b/vendor/llama.cpp