Skip to content

Commit

Permalink
Bump to version 2.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rvankoert committed Apr 3, 2024
1 parent 071116a commit 38deceb
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 6 deletions.
2 changes: 1 addition & 1 deletion loghi-htr
Submodule loghi-htr updated 85 files
+24 −0 .github/workflows/pylint.yml
+23 −9 .github/workflows/run_tests.yml
+174 −94 README.md
+82 −0 configs/default.json
+59 −0 configs/finetuning.json
+28 −0 configs/inference.json
+26 −0 configs/testing.json
+57 −0 configs/training.json
+27 −0 configs/validation.json
+1 −1 environment.yml
+13 −12 requirements.txt
+0 −182 src/analyze.py
+35 −36 src/api/app.py
+105 −27 src/api/app_utils.py
+190 −0 src/api/batch_decoder.py
+207 −286 src/api/batch_predictor.py
+0 −120 src/api/gunicorn_app.py
+330 −198 src/api/image_preparator.py
+73 −9 src/api/routes.py
+222 −0 src/api/simple_security.py
+3 −3 src/api/start_local_app.sh
+17 −0 src/api/start_local_app_with_security.sh
+0 −202 src/arg_parser.py
+0 −21 src/config.py
+0 −0 src/data/__init__.py
+623 −0 src/data/augment_layers.py
+285 −0 src/data/augmentation.py
+145 −0 src/data/data_handling.py
+183 −0 src/data/loader.py
+480 −0 src/data/manager.py
+0 −188 src/data_generator.py
+0 −292 src/data_loader.py
+0 −95 src/inference.py
+0 −57 src/loghi_custom_callback.py
+160 −941 src/main.py
+0 −344 src/model.py
+0 −0 src/model/__init__.py
+14 −9 src/model/conversion.py
+176 −0 src/model/custom_callback.py
+9 −23 src/model/custom_layers.py
+104 −0 src/model/losses.py
+288 −0 src/model/management.py
+103 −0 src/model/metrics.py
+253 −0 src/model/optimization.py
+164 −0 src/model/replacing.py
+41 −41 src/model/vgsl_model_generator.py
+0 −0 src/modes/__init__.py
+81 −0 src/modes/inference.py
+220 −0 src/modes/test.py
+133 −0 src/modes/training.py
+234 −0 src/modes/validation.py
+0 −91 src/sample_processor.py
+0 −0 src/setup/__init__.py
+381 −0 src/setup/arg_parser.py
+347 −0 src/setup/config.py
+183 −0 src/setup/environment.py
+0 −231 src/utils.py
+0 −0 src/utils/__init__.py
+260 −0 src/utils/calculate.py
+123 −0 src/utils/decoding.py
+197 −0 src/utils/print.py
+242 −0 src/utils/text.py
+115 −0 src/utils/wbs.py
+193 −0 src/visualize/PdfMaker.py
+0 −0 src/visualize/__init__.py
+66 −0 src/visualize/main.py
+41 −0 src/visualize/vis_arg_parser.py
+143 −0 src/visualize/vis_utils.py
+317 −0 src/visualize/visualize_filters_activations.py
+ src/visualize/visualize_plots/sample_image.jpg
+ src/visualize/visualize_plots/sample_image2.png
+503 −0 src/visualize/visualize_timestep_predictions.py
+0 −231 src/visualize_filter_result.py
+0 −165 src/visualize_network.py
+0 −209 src/visualize_network_saliency.py
+0 −131 src/visualize_siamese_network_dense.py
+258 −0 tests/test_data_augments.py
+0 −140 tests/test_datagenerator.py
+101 −308 tests/test_dataloader.py
+256 −0 tests/test_datamanager.py
+290 −0 tests/test_lr_schedule.py
+6 −7 tests/test_model_creation.py
+14 −11 tests/test_model_replacement.py
+2 −2 tests/test_model_to_vgsl.py
+28 −32 tests/test_tokenizer.py
4 changes: 3 additions & 1 deletion scripts/create-train-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ fi
inputdir=$(realpath $1/)
outputdir=$(realpath $2/)

mkdir -p $outputdir

# Prepare file lists
filelist=$outputdir/training_all.txt
filelisttrain=$outputdir/training_all_train.txt
Expand Down Expand Up @@ -107,6 +109,6 @@ done

# Create training and validation file lists
echo "Splitting data into training and validation sets..."
shuf $filelist | split -l $(( $(wc -l < $filelist) * $trainsplit / 100 ))
shuf $filelist | split -l $(( $(wc -l <$filelist) * $trainsplit / 100 ))
mv xab $filelistval
mv xaa $filelisttrain
5 changes: 4 additions & 1 deletion scripts/htr-train-pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ channels=1
GPU=0

# Dataset and training configuration
listdir=/data/ovdr/gt-split/
listdir=PATH_TO_LISTDIR
trainlist=$listdir/val.txt
validationlist=$listdir/val_150hs.txt

Expand All @@ -41,6 +41,9 @@ learning_rate=0.0003
# DO NOT MODIFY BELOW THIS LINE
# ------------------------------

#DO NOT REMOVE THIS LINE, IT IS USED FOR AUTOMATIC TESTING"
#PLACEHOLDER#

DOCKERLOGHIHTR=loghi/docker.htr:$VERSION

tmpdir=$(mktemp -d)
Expand Down
18 changes: 17 additions & 1 deletion scripts/inference-pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ LAYPAREGIONMODELWEIGHTS=INSERT_FULLPATH_TO_PTH_HERE

# Set to 1 if you want to enable, 0 otherwise, select just one
HTRLOGHI=1
HTRLOGHIMODEL=INSERT_FULL_PATH_TO_HTR_MODEL_HERE
HTRLOGHIMODEL=INSERT_FULL_PATH_TO_LOGHI_HTR_MODEL_HERE

# Set this to 1 for recalculating reading order, line clustering and cleaning.
# WARNING this will remove regions found by Laypa
Expand Down Expand Up @@ -184,6 +184,22 @@ if [[ $HTRLOGHI -eq 1 ]]; then
echo "Running HTR"
LOGHIDIR="$(dirname "${HTRLOGHIMODEL}")"

echo docker run $DOCKERGPUPARAMS -u $(id -u ${USER}):$(id -g ${USER}) --rm -m 32000m --shm-size 10240m -ti \
-v /tmp:/tmp \
-v $tmpdir:$tmpdir \
-v $LOGHIDIR:$LOGHIDIR \
$DOCKERLOGHIHTR \
bash -c "LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4 python3 /src/loghi-htr/src/main.py \
--model $HTRLOGHIMODEL \
--batch_size 64 \
--use_mask \
--inference_list $tmpdir/lines.txt \
--results_file $tmpdir/results.txt \
--gpu $GPU \
--output $tmpdir/output/ \
--beam_width $BEAMWIDTH " | tee -a $tmpdir/log.txt


docker run $DOCKERGPUPARAMS -u $(id -u ${USER}):$(id -g ${USER}) --rm -m 32000m --shm-size 10240m -ti \
-v /tmp:/tmp \
-v $tmpdir:$tmpdir \
Expand Down

0 comments on commit 38deceb

Please sign in to comment.