-
Notifications
You must be signed in to change notification settings - Fork 31
/
Makefile
441 lines (375 loc) · 15.7 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
export
SHELL = /bin/bash
PYTHON ?= python
PIP ?= pip
LOG_LEVEL = INFO
PYTHONIOENCODING=utf8
TESTDIR = $(CURDIR)/tests
PYTEST_ARGS = --continue-on-collection-errors
VERSION = $(shell cat VERSION)
DOCKER_COMPOSE = docker compose
SPHINX_APIDOC =
BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd_network ocrd
reverse = $(if $(wordlist 2,2,$(1)),$(call reverse,$(wordlist 2,$(words $(1)),$(1))) $(firstword $(1)),$(1))
# BEGIN-EVAL makefile-parser --make-help Makefile
help:
@echo ""
@echo " Targets"
@echo ""
@echo " deps-cuda Dependencies for deployment with GPU support via Conda"
@echo " deps-ubuntu Dependencies for deployment in an Ubuntu/Debian Linux"
@echo " deps-test Install test python deps via pip"
@echo " build (Re)build source and binary distributions of pkges"
@echo " install (Re)install the packages"
@echo " install-dev Install with pip install -e"
@echo " uninstall Uninstall the packages"
@echo " generate-page Regenerate python code from PAGE XSD"
@echo " spec Copy JSON Schema, OpenAPI from OCR-D/spec"
@echo " assets Setup test assets"
@echo " test Run all unit tests"
@echo " docs Build documentation"
@echo " docs-clean Clean docs"
@echo " docs-coverage Calculate docstring coverage"
@echo " docker Build docker image"
@echo " docker-cuda Build docker image for GPU / CUDA"
@echo " pypi Build wheels and source dist and twine upload them"
@echo " ocrd network tests"
@echo " network-module-test Run all ocrd_network module tests"
@echo " network-integration-test Run all ocrd_network integration tests (docker and docker compose required)"
@echo ""
@echo " Variables"
@echo ""
@echo " DOCKER_TAG Docker target image tag. Default: '$(DOCKER_TAG)'."
@echo " DOCKER_BASE_IMAGE Docker source image tag. Default: '$(DOCKER_BASE_IMAGE)'."
@echo " DOCKER_ARGS Additional arguments to docker build. Default: '$(DOCKER_ARGS)'"
@echo " PIP_INSTALL pip install command. Default: $(PIP_INSTALL)"
@echo " PYTEST_ARGS arguments for pytest. Default: $(PYTEST_ARGS)"
# END-EVAL
# pip install command. Default: $(PIP_INSTALL)
PIP_INSTALL ?= $(PIP) install
PIP_INSTALL_CONFIG_OPTION ?=
.PHONY: deps-cuda deps-ubuntu deps-test
deps-cuda: CONDA_EXE ?= /usr/local/bin/conda
deps-cuda: export CONDA_PREFIX ?= /conda
deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'
deps-cuda:
curl --retry 6 -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
mv bin/micromamba $(CONDA_EXE)
# Install Conda system-wide (for interactive / login shells)
echo 'export MAMBA_EXE=$(CONDA_EXE) MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) CONDA_PREFIX=$(CONDA_PREFIX) PATH=$(CONDA_PREFIX)/bin:$$PATH' >> /etc/profile.d/98-conda.sh
# workaround for tf-keras#62
echo 'export XLA_FLAGS=--xla_gpu_cuda_data_dir=$(CONDA_PREFIX)/' >> /etc/profile.d/98-conda.sh
mkdir -p $(CONDA_PREFIX)/lib $(CONDA_PREFIX)/include
echo $(CONDA_PREFIX)/lib >> /etc/ld.so.conf.d/conda.conf
# Get CUDA toolkit, including compiler and libraries with dev,
# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc):
#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
#conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a
#
# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!),
# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space),
# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway):
MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \
conda install -c nvidia/label/cuda-11.8.0 \
cuda-nvcc \
cuda-cccl \
&& conda clean -a \
&& find $(CONDA_PREFIX) -name "*_static.a" -delete
#conda install -c conda-forge \
# cudatoolkit=11.8.0 \
# cudnn=8.8.* && \
#conda clean -a && \
#find $(CONDA_PREFIX) -name "*_static.a" -delete
#
# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway,
# let's jump the shark and pull these via NGC index directly,
# but then share them with the rest of the system so native compilation/linking
# works, too:
shopt -s nullglob; \
$(PIP) install nvidia-pyindex \
&& $(PIP) install nvidia-cudnn-cu11~=8.7 \
nvidia-cublas-cu11~=11.11 \
nvidia-cusparse-cu11~=11.7 \
nvidia-cusolver-cu11~=11.4 \
nvidia-curand-cu11~=10.3 \
nvidia-cufft-cu11~=10.9 \
nvidia-cuda-runtime-cu11~=11.8 \
nvidia-cuda-cupti-cu11~=11.8 \
nvidia-cuda-nvrtc-cu11 \
&& for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_cupti cuda_nvrtc; do \
for lib in $(PYTHON_PREFIX)/nvidia/$$pkg/lib/lib*.so.*; do \
base=`basename $$lib`; \
ln -s $$lib $(CONDA_PREFIX)/lib/$$base.so; \
ln -s $$lib $(CONDA_PREFIX)/lib/$${base%.so.*}.so; \
done \
&& for inc in $(PYTHON_PREFIX)/nvidia/$$pkg/include/*; do \
base=`basename $$inc`; case $$base in __*) continue; esac; \
ln -s $$inc $(CONDA_PREFIX)/include/; \
done \
done \
&& ldconfig
# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency...
# Workaround for missing prebuilt versions of TF<2 for Python==3.8
# todo: find another solution for 3.9, 3.10 etc
# https://docs.nvidia.com/deeplearning/frameworks/tensorflow-wheel-release-notes/tf-wheel-rel.html
# Nvidia has them, but under a different name, so let's rewrite that:
# (hold at nv22.11, because newer releases require CUDA 12, which is not supported by TF2 (at py38),
# and therefore not in our ocrd/core-cuda base image yet)
# However, at that time no Numpy 1.24 was known, which breaks TF1
# (which is why later nv versions hold it at <1.24 automatically -
# see https://github.com/NVIDIA/tensorflow/blob/r1.15.5%2Bnv22.11/tensorflow/tools/pip_package/setup.py)
deps-tf1:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8 && \
! $(PIP) show -q tensorflow-gpu; then \
$(PIP) install nvidia-pyindex && \
pushd $$(mktemp -d) && \
$(PIP) download --no-deps nvidia-tensorflow==1.15.5+nv22.11 && \
for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \
$(PYTHON) -m wheel unpack $$name.whl && \
for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \
newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \
pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \
$(PYTHON) -m wheel pack $$name && \
$(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; \
$(PIP) install "numpy<1.24"; \
else \
$(PIP) install "tensorflow-gpu<2.0"; \
fi
deps-tf2:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \
$(PIP) install tensorflow; \
else \
$(PIP) install "tensorflow[and-cuda]"; \
fi
deps-torch:
$(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118
# Dependencies for deployment in an ubuntu/debian linux
deps-ubuntu:
apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev
# Install test python deps via pip
deps-test:
$(PIP) install -U pip
$(PIP) install -r requirements_test.txt
.PHONY: build install install-dev uninstall
build:
$(PIP) install build
$(PYTHON) -m build .
# or use -n ?
# (Re)install the tool
install: #build
# not strictly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166
$(PIP) install -U pip wheel
$(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION)
@# workaround for shapely#1598
$(PIP) config set global.no-binary shapely
# Install with pip install -e
install-dev: PIP_INSTALL = $(PIP) install -e
install-dev: PIP_INSTALL_CONFIG_OPTION = --config-settings editable_mode=strict
install-dev: uninstall
$(MAKE) install
# Uninstall the tool
uninstall:
$(PIP) uninstall --yes ocrd
# Regenerate python code from PAGE XSD
generate-page: GDS_PAGE = src/ocrd_models/ocrd_page_generateds.py
generate-page: GDS_PAGE_USER = src/ocrd_page_user_methods.py
generate-page: repo/assets
generateDS \
-f \
--root-element='PcGts' \
-o $(GDS_PAGE) \
--silence \
--export "write etree" \
--disable-generatedssuper-lookup \
--user-methods=$(GDS_PAGE_USER) \
src/ocrd_validators/page.xsd
# hack to prevent #451: enum keys will be strings
sed -i 's/(Enum):$$/(str, Enum):/' $(GDS_PAGE)
# hack to ensure output has pc: prefix
@#sed -i "s/namespaceprefix_=''/namespaceprefix_='pc:'/" $(GDS_PAGE)
sed -i 's/_nsprefix_ = None/_nsprefix_ = "pc"/' $(GDS_PAGE)
# hack to ensure child nodes also have pc: prefix...
sed -i 's/.*_nsprefix_ = child_.prefix$$//' $(GDS_PAGE)
# replace the need for six since we target python 3.6+
sed -i 's/from six.moves/from itertools/' $(GDS_PAGE)
#
# Repos
#
.PHONY: repos always-update
repos: repo/assets repo/spec
# Update OCR-D/assets and OCR-D/spec resp.
repo/assets repo/spec: always-update
git submodule sync --recursive $@
if git submodule status --recursive $@ | grep -qv '^ '; then \
git submodule update --init --recursive $@ && \
touch $@; \
fi
#
# Spec
#
.PHONY: spec
# Copy JSON Schema, OpenAPI from OCR-D/spec
spec: repo/spec
cp repo/spec/ocrd_tool.schema.yml ocrd_validators/ocrd_validators/ocrd_tool.schema.yml
cp repo/spec/bagit-profile.yml ocrd_validators/ocrd_validators/bagit-profile.yml
#
# Assets
#
# Setup test assets
assets: repo/assets
rm -rf $(TESTDIR)/assets
mkdir -p $(TESTDIR)/assets
cp -r repo/assets/data/* $(TESTDIR)/assets
#
# Tests
#
.PHONY: test
# Run all unit tests
test: assets
$(PYTHON) \
-m pytest $(PYTEST_ARGS) --durations=10\
--ignore-glob="$(TESTDIR)/**/*bench*.py" \
--ignore-glob="$(TESTDIR)/network/*.py" \
$(TESTDIR)
$(MAKE) test-logging
test-logging: assets
# copy default logging to temporary directory and run logging tests from there
tempdir=$$(mktemp -d); \
cp src/ocrd_utils/ocrd_logging.conf $$tempdir; \
cd $$tempdir; \
$(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging -k TestDecorators $(TESTDIR); \
rm -r $$tempdir/ocrd_logging.conf $$tempdir/ocrd.log $$tempdir/.benchmarks; \
rm -rf $$tempdir/.coverage; \
rmdir $$tempdir
network-module-test: assets
$(PYTHON) \
-m pytest $(PYTEST_ARGS) -k 'test_modules_' -s -v --durations=10\
--ignore-glob="$(TESTDIR)/network/test_integration_*.py" \
$(TESTDIR)/network
INTEGRATION_TEST_IN_DOCKER = docker exec core_test
network-integration-test:
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d
-$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="tests/network/*ocrd_all*.py"
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans
network-integration-test-cicd:
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d
$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="tests/network/*ocrd_all*.py"
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans
benchmark:
$(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py
benchmark-extreme:
$(PYTHON) -m pytest $(TESTDIR)/model/*bench*.py
test-profile:
$(PYTHON) -m cProfile -o profile $$(which pytest)
$(PYTHON) analyze_profile.py
coverage: assets
coverage erase
make test PYTHON="coverage run --omit='*generate*'"
coverage report
coverage html
#
# Documentation
#
.PHONY: docs
# Build documentation
docs:
for mod in $(BUILD_ORDER);do sphinx-apidoc -f -M -e \
-o docs/api/$$mod src/$$mod \
'src/ocrd_models/ocrd_page_generateds.py' \
;done
cd docs ; $(MAKE) html
docs-push: gh-pages docs
cp -r docs/build/html/* gh-pages
cd gh-pages; git add . && git commit -m 'Updated docs $$(date)' && git push
# Clean docs
docs-clean:
cd gh-pages ; rm -rf *
cd docs ; rm -rf _build api/ocrd api/ocrd_*
# Calculate docstring coverage
docs-coverage:
for mod in $(BUILD_ORDER);do docstr-coverage $$mod/$$mod -e '.*(ocrd_page_generateds|/ocrd/cli/).*';done
for mod in $(BUILD_ORDER);do echo "# $$mod"; docstr-coverage -v1 $$mod/$$mod -e '.*(ocrd_page_generateds|/ocrd/cli/).*'|sed 's/^/\t/';done
gh-pages:
git clone --branch gh-pages https://github.com/OCR-D/core gh-pages
#
# Clean up
#
pyclean:
rm -rf ./build
rm -rf ./dist
rm -rf htmlcov
rm -rf .benchmarks
rm -rf **/*.egg-info
rm -f **/*.pyc
-find . -name '__pycache__' -exec rm -rf '{}' \;
rm -rf .pytest_cache
#
# Docker
#
.PHONY: docker docker-cuda
# Additional arguments to docker build. Default: '$(DOCKER_ARGS)'
DOCKER_ARGS ?=
DOCKER_BASE_TAG ?= ocrd
DOCKER_BUILD ?= docker build --progress=plain
# Build docker image
docker: DOCKER_BASE_IMAGE = ubuntu:20.04
docker: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core)
docker: DOCKER_FILE = Dockerfile
# Build extended sets for maximal layer sharing
docker-cuda: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core
docker-cuda: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda)
docker-cuda: DOCKER_FILE = Dockerfile.cuda
docker-cuda: docker
docker-cuda-tf1: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-tf1: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-tf1)
docker-cuda-tf1: DOCKER_FILE = Dockerfile.cuda-tf1
docker-cuda-tf1: docker-cuda
docker-cuda-tf2: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-tf2: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-tf2)
docker-cuda-tf2: DOCKER_FILE = Dockerfile.cuda-tf2
docker-cuda-tf2: docker-cuda
docker-cuda-torch: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-torch: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-torch)
docker-cuda-torch: DOCKER_FILE = Dockerfile.cuda-torch
docker-cuda-torch: docker-cuda
docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch:
$(DOCKER_BUILD) -f $(DOCKER_FILE) $(DOCKER_TAG:%=-t %) --target ocrd_core_base --build-arg BASE_IMAGE=$(lastword $(DOCKER_BASE_IMAGE)) $(DOCKER_ARGS) .
# Build wheels and source dist and twine upload them
pypi: build
twine upload --verbose dist/ocrd-$(VERSION)*{tar.gz,whl}
pypi-workaround: build-workaround
for dist in $(BUILD_ORDER);do twine upload dist/$$dist-$(VERSION)*{tar.gz,whl};done
# Only in place until v3 so we don't break existing installations
build-workaround: pyclean
cp pyproject.toml pyproject.toml.BAK
cp src/ocrd_utils/constants.py src/ocrd_utils/constants.py.BAK
cp src/ocrd/cli/__init__.py src/ocrd/cli/__init__.py.BAK
for dist in $(BUILD_ORDER);do \
cat pyproject.toml.BAK | sed "s,^name =.*,name = \"$$dist\"," > pyproject.toml; \
cat src/ocrd_utils/constants.py.BAK | sed "s,dist_version('ocrd'),dist_version('$$dist')," > src/ocrd_utils/constants.py; \
cat src/ocrd/cli/__init__.py.BAK | sed "s,package_name='ocrd',package_name='$$dist'," > src/ocrd/cli/__init__.py; \
$(MAKE) build; \
done
rm pyproject.toml.BAK
rm src/ocrd_utils/constants.py.BAK
rm src/ocrd/cli/__init__.py.BAK
# test that the aliased packages work in isolation and combined
test-workaround: build-workaround
$(MAKE) uninstall-workaround
for dist in $(BUILD_ORDER);do \
pip install dist/$$dist-*.whl ;\
ocrd --version ;\
make test ;\
pip uninstall --yes $$dist ;\
done
for dist in $(BUILD_ORDER);do \
pip install dist/$$dist-*.whl ;\
done
ocrd --version ;\
make test ;\
for dist in $(BUILD_ORDER);do pip uninstall --yes $$dist;done
uninstall-workaround:
for dist in $(BUILD_ORDER);do $(PIP) uninstall --yes $$dist;done