From 5e037cd52b586974c7b8bec688119c76b5a562bc Mon Sep 17 00:00:00 2001 From: Rory McNicholl Date: Tue, 12 Dec 2023 10:19:38 +0000 Subject: [PATCH] Clamby (#496) * app user to run clamav as daemon * use clamby gem in preference to clamav gem * make sure clamav-daemon (clamd) is running on start up * switch interface for clamav from clamav gem to clamby (as per Hyc) * The Hyc::VirusScanner Module * tidy for rubocop * run clamd with sidekiq --- Dockerfile | 9 ++++ Gemfile | 2 + Gemfile.lock | 2 + app/jobs/attach_files_to_work_job.rb | 69 ++++++++++++++++++++++++++++ app/lib/hyc/virus_scanner.rb | 8 ++++ bin/worker | 2 +- config/initializers/clamav.rb | 20 +++++++- docker-compose.production.yml | 2 +- docker-compose.yml | 6 ++- 9 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 app/jobs/attach_files_to_work_job.rb create mode 100644 app/lib/hyc/virus_scanner.rb diff --git a/Dockerfile b/Dockerfile index b9b3a828a..df56291a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,8 @@ USER root RUN apk --no-cache upgrade && \ apk --no-cache add \ bash \ + clamav \ + clamav-daemon \ cmake \ exiftool \ ffmpeg \ @@ -35,6 +37,11 @@ RUN apk --no-cache upgrade && \ # cargo install rbspy && \ echo "******** Packages Installed *********" +RUN sed -i 's/User clamav/User app/g' /etc/clamav/clamd.conf +RUN mkdir -p /var/run/clamav && chown -R app:app /var/run/clamav +RUN mkdir -p /var/log/clamav && chown -R app:app /var/log/clamav +RUN chown -R app:app /var/lib/clamav + RUN wget https://github.com/ImageMagick/ImageMagick/archive/refs/tags/7.1.0-57.tar.gz \ && tar xf 7.1.0-57.tar.gz \ && apk --no-cache add \ @@ -92,11 +99,13 @@ RUN sh -l -c " \ sed -i '/require .enumerator./d' /usr/local/bundle/gems/csl-1.6.0/lib/csl.rb" COPY --chown=1001:101 $APP_PATH/bin/db-migrate-seed.sh /app/samvera/ COPY --chown=1001:101 $APP_PATH /app/samvera/hyrax-webapp +RUN ln -sf /app/samvera/branding /app/samvera/hyrax-webapp/public/branding ARG HYKU_BULKRAX_ENABLED="true" RUN RAILS_ENV=production SECRET_KEY_BASE=`bin/rake secret` DB_ADAPTER=nulldb DATABASE_URL='postgresql://fake' bundle exec rake assets:precompile && yarn install CMD ./bin/web FROM hyku-web as hyku-worker +RUN freshclam ENV MALLOC_ARENA_MAX=2 CMD ./bin/worker diff --git a/Gemfile b/Gemfile index 9f09afdd0..478741808 100644 --- a/Gemfile +++ b/Gemfile @@ -153,3 +153,5 @@ gem 'tether-rails' gem 'validate_url' gem 'hyrax-v2_graph_indexer', "~> 0.5", git: 'https://github.com/scientist-softserv/hyrax-v2_graph_indexer.git', ref: '53b0a2d28868af25d306bc361634439c008892ac' gem 'iiif_print', git: 'https://github.com/scientist-softserv/iiif_print.git' +#clamby (for which interface has been back-ported from Hydra 2.0) +gem 'clamby' diff --git a/Gemfile.lock b/Gemfile.lock index c95f5bddc..b19193151 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -364,6 +364,7 @@ GEM citeproc-ruby (1.1.14) citeproc (~> 1.0, >= 1.0.9) csl (~> 1.6) + clamby (1.6.10) clipboard-rails (1.7.1) cocoon (1.2.15) code_analyzer (0.5.2) @@ -1268,6 +1269,7 @@ DEPENDENCIES capybara capybara-screenshot (~> 1.0) carrierwave-aws (~> 1.3) + clamby cocoon codemirror-rails coffee-rails (~> 4.2) diff --git a/app/jobs/attach_files_to_work_job.rb b/app/jobs/attach_files_to_work_job.rb new file mode 100644 index 000000000..099cd4e9d --- /dev/null +++ b/app/jobs/attach_files_to_work_job.rb @@ -0,0 +1,69 @@ +# [BL-override] add virus checking to job +# Converts UploadedFiles into FileSets and attaches them to works. +class AttachFilesToWorkJob < Hyrax::ApplicationJob + queue_as Hyrax.config.ingest_queue_name + + # @param [ActiveFedora::Base] work - the work object + # @param [Array] uploaded_files - an array of files to attach + def perform(work, uploaded_files, **work_attributes) + validate_files!(uploaded_files) + depositor = proxy_or_depositor(work) + user = User.find_by_user_key(depositor) + work_permissions = work.permissions.map(&:to_hash) + metadata = visibility_attributes(work_attributes) + uploaded_files.each do |uploaded_file| + # [BL-override] check all files for viruses + STDERR.puts "######################################################" + STDERR.puts " Uploaded_file before virus_check! #{uploaded_file}" + STDERR.puts "######################################################" + virus_check!(uploaded_file) + next if uploaded_file.file_set_uri.present? + STDERR.puts " New file is not infected :) #{uploaded_file}" + actor = Hyrax::Actors::FileSetActor.new(FileSet.create, user) + uploaded_file.update(file_set_uri: actor.file_set.uri) + actor.file_set.permissions_attributes = work_permissions + actor.create_metadata(metadata) + actor.create_content(uploaded_file) + actor.attach_to_work(work) + end + # [BL-override] Log viruses + #rescue VirusDetectedError => error + #Rails.logger.error "Virus encountered while processing work #{work.id}.\n" "\t#{error.message}" + end + + # [BL-override] Add virus detection error class + class VirusDetectedError < RuntimeError; end + + private + + # The attributes used for visibility - sent as initial params to created FileSets. + def visibility_attributes(attributes) + attributes.slice(:visibility, :visibility_during_lease, + :visibility_after_lease, :lease_expiration_date, + :embargo_release_date, :visibility_during_embargo, + :visibility_after_embargo) + end + + def validate_files!(uploaded_files) + uploaded_files.each do |uploaded_file| + next if uploaded_file.is_a? Hyrax::UploadedFile + raise ArgumentError, "Hyrax::UploadedFile required, but #{uploaded_file.class} received: #{uploaded_file.inspect}" + end + end + + ## + # A work with files attached by a proxy user will set the depositor as the intended user + # that the proxy was depositing on behalf of. See tickets #2764, #2902. + def proxy_or_depositor(work) + work.on_behalf_of.presence || work.depositor + end + + # [hyc-override] add virus checking method + def virus_check!(uploaded_file) + rails_root = Rails.root.to_s + return unless Hyc::VirusScanner.infected?("#{rails_root}/public#{uploaded_file.file}") + carrierwave_file = uploaded_file.file.file + carrierwave_file.delete + raise(VirusDetectedError, carrierwave_file.filename) + end +end diff --git a/app/lib/hyc/virus_scanner.rb b/app/lib/hyc/virus_scanner.rb new file mode 100644 index 000000000..23ed34686 --- /dev/null +++ b/app/lib/hyc/virus_scanner.rb @@ -0,0 +1,8 @@ +# switching from clamav gem to clamby gem +module Hyc + class VirusScanner < Hydra::Works::VirusScanner + def infected? + Clamby.virus?(file) + end + end +end diff --git a/bin/worker b/bin/worker index b7605486b..2d33ecb79 100755 --- a/bin/worker +++ b/bin/worker @@ -9,4 +9,4 @@ else puts 'DATABASE_URL not set, no pool change needed' end -exec "echo $DATABASE_URL && bundle exec sidekiq" +exec "echo $DATABASE_URL && clamd && bundle exec sidekiq" diff --git a/config/initializers/clamav.rb b/config/initializers/clamav.rb index 7cd26d6a2..43813b6a7 100644 --- a/config/initializers/clamav.rb +++ b/config/initializers/clamav.rb @@ -1 +1,19 @@ -ClamAV.instance.loaddb if defined? ClamAV +# Pre Hyrax 3.5 we will overide use of clamav gem and use clamby +# Inspired by https://github.com/UNC-Libraries/hy-c/commit/57c84bd0fdfb9ee8b00cc70194971c3fe9fea265#diff-8b7db4d5cc4b8f6dc8feb7030baa2478 +Hydra::Works.default_system_virus_scanner = Hyc::VirusScanner + +Clamby.configure({ + :check => false, + :daemonize => true, + :config_file => nil, + :error_clamscan_missing => true, + :error_clamscan_client_error => false, + :error_file_missing => true, + :error_file_virus => false, + :fdpass => true, + :stream => false, + :output_level => 'medium', # one of 'off', 'low', 'medium', 'high' + :executable_path_clamscan => 'clamscan', + :executable_path_clamdscan => 'clamdscan', + :executable_path_freshclam => 'freshclam', + }) diff --git a/docker-compose.production.yml b/docker-compose.production.yml index 49ea47332..3ec7ab3b8 100644 --- a/docker-compose.production.yml +++ b/docker-compose.production.yml @@ -126,7 +126,7 @@ services: target: hyku-worker args: - HYKU_BULKRAX_ENABLED=true - command: bundle exec sidekiq + command: clamd && bundle exec sidekiq depends_on: - check_volumes - db diff --git a/docker-compose.yml b/docker-compose.yml index 9221cb824..00d49c6de 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -77,6 +77,8 @@ services: && runuser -u solr -- solr-foreground" expose: - 8983 +# ports: +# - 8983:8983 volumes: - solr:/var/solr networks: @@ -140,7 +142,7 @@ services: ## With the following line, uncommented during active development, we'll ## run bundle then boot the web-server. ## - # command: sh -l -c "bundle && bundle exec puma -v -b tcp://0.0.0.0:3000" + command: sh -l -c "bundle && bundle exec puma -v -b tcp://0.0.0.0:3000" ## ## Similar to the above, except we will bundle and then tell the container ## to wait. You'll then need to bash into the web container and start the @@ -182,7 +184,7 @@ services: ## ## With the following line, uncommented during active development, we'll ## run bundle then run sidekiq. - # command: sh -l -c "bundle && bundle exec sidekiq" + command: sh -l -c "clamd && bundle && bundle exec sidekiq" ## ## Similar to the above, except we will bundle and then tell the container ## to wait. You'll then need to bash into the worker container and start