Bump to version 2.0.0

knaw-huc · Apr 3, 2024 · 38deceb · 38deceb
1 parent 071116a
commit 38deceb
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 6 deletions.
diff --git a/laypa b/laypa
diff --git a/loghi-htr b/loghi-htr
diff --git a/loghi-tooling b/loghi-tooling
diff --git a/scripts/create-train-data.sh b/scripts/create-train-data.sh
@@ -41,6 +41,8 @@ fi
 inputdir=$(realpath $1/)
 outputdir=$(realpath $2/)
 
+mkdir -p $outputdir
+
 # Prepare file lists
 filelist=$outputdir/training_all.txt
 filelisttrain=$outputdir/training_all_train.txt
@@ -107,6 +109,6 @@ done
 
 # Create training and validation file lists
 echo "Splitting data into training and validation sets..."
-shuf $filelist | split -l $(( $(wc -l < $filelist) * $trainsplit / 100 ))
+shuf $filelist | split -l $(( $(wc -l <$filelist) * $trainsplit / 100 ))
 mv xab $filelistval
 mv xaa $filelisttrain
diff --git a/scripts/htr-train-pipeline.sh b/scripts/htr-train-pipeline.sh
@@ -21,7 +21,7 @@ channels=1
 GPU=0
 
 # Dataset and training configuration
-listdir=/data/ovdr/gt-split/
+listdir=PATH_TO_LISTDIR
 trainlist=$listdir/val.txt
 validationlist=$listdir/val_150hs.txt
 
@@ -41,6 +41,9 @@ learning_rate=0.0003
 # DO NOT MODIFY BELOW THIS LINE
 # ------------------------------
 
+#DO NOT REMOVE THIS LINE, IT IS USED FOR AUTOMATIC TESTING"
+#PLACEHOLDER#
+
 DOCKERLOGHIHTR=loghi/docker.htr:$VERSION
 
 tmpdir=$(mktemp -d)

diff --git a/scripts/inference-pipeline.sh b/scripts/inference-pipeline.sh
@@ -20,7 +20,7 @@ LAYPAREGIONMODELWEIGHTS=INSERT_FULLPATH_TO_PTH_HERE
 
 # Set to 1 if you want to enable, 0 otherwise, select just one
 HTRLOGHI=1
-HTRLOGHIMODEL=INSERT_FULL_PATH_TO_HTR_MODEL_HERE
+HTRLOGHIMODEL=INSERT_FULL_PATH_TO_LOGHI_HTR_MODEL_HERE
 
 # Set this to 1 for recalculating reading order, line clustering and cleaning.
 # WARNING this will remove regions found by Laypa
@@ -184,6 +184,22 @@ if [[ $HTRLOGHI -eq 1 ]]; then
     echo "Running HTR"
     LOGHIDIR="$(dirname "${HTRLOGHIMODEL}")"
 
+    echo docker run $DOCKERGPUPARAMS -u $(id -u ${USER}):$(id -g ${USER}) --rm -m 32000m --shm-size 10240m -ti \
+        -v /tmp:/tmp \
+        -v $tmpdir:$tmpdir \
+        -v $LOGHIDIR:$LOGHIDIR \
+        $DOCKERLOGHIHTR \
+            bash -c "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4 python3 /src/loghi-htr/src/main.py \
+            --model $HTRLOGHIMODEL  \
+            --batch_size 64 \
+            --use_mask \
+            --inference_list $tmpdir/lines.txt \
+            --results_file $tmpdir/results.txt \
+            --gpu $GPU \
+            --output $tmpdir/output/ \
+            --beam_width $BEAMWIDTH " | tee -a $tmpdir/log.txt
+
+
     docker run $DOCKERGPUPARAMS -u $(id -u ${USER}):$(id -g ${USER}) --rm -m 32000m --shm-size 10240m -ti \
         -v /tmp:/tmp \
         -v $tmpdir:$tmpdir \