Skip to content

Commit

Permalink
adjusted layer estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Jul 24, 2024
1 parent b7fc8e6 commit e28c42d
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/kcpp-build-release-win-cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
mkdir build
cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
- name: Save artifact
uses: actions/upload-artifact@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kcpp-build-release-win-full-cu12.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
mkdir build
cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd ..
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kcpp-build-release-win-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
mkdir build
cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd ..
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kcpp-build-release-win-oldcpu-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
mkdir build
cd build
cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd ..
Expand Down
6 changes: 4 additions & 2 deletions klite.embd
Original file line number Diff line number Diff line change
Expand Up @@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below.
if(found == 0)
{
gentxt = gentxt.slice(st2.length);
found = gentxt.indexOf(st2);
}
else if (found != -1) //if found, truncate to it
if (found != -1) //if found, truncate to it
{
splitresponse = gentxt.split(st2);
gentxt = splitresponse[0];
Expand All @@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below.
if(found == 0)
{
gentxt = gentxt.slice(et2.length);
found = gentxt.indexOf(et2);
}
else if (found != -1) //if found, truncate to it
if (found != -1) //if found, truncate to it
{
splitresponse = gentxt.split(et2);
gentxt = splitresponse[0];
Expand Down
2 changes: 1 addition & 1 deletion koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
headcount = ggufmeta[1]
headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
ratio = mem/(fsize*csmul*1.5)
computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
contextmem = layers*headcount*headkvlen*cs*4
reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
if headcount > 0:
Expand Down

0 comments on commit e28c42d

Please sign in to comment.