From 6b238f8d745a9a6e3e0e2bb26d2c9533c1451d59 Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Wed, 15 Dec 2021 16:02:56 +0100 Subject: [PATCH 1/2] Add option keep-actual to clearsource_history command --- ckanext/harvest/cli.py | 9 +- ckanext/harvest/commands/harvester.py | 11 +- ckanext/harvest/logic/action/update.py | 62 ++++++-- ckanext/harvest/tests/test_action.py | 210 +++++++++++++++++++++++-- ckanext/harvest/utils.py | 12 +- 5 files changed, 276 insertions(+), 28 deletions(-) diff --git a/ckanext/harvest/cli.py b/ckanext/harvest/cli.py index d8486fe9b..5de36551b 100644 --- a/ckanext/harvest/cli.py +++ b/ckanext/harvest/cli.py @@ -108,8 +108,13 @@ def clear(ctx, id): @source.command() @click.argument(u"id", metavar=u"SOURCE_ID_OR_NAME", required=False) +@click.option( + "-k", + "--keep-actual", + default=False +) @click.pass_context -def clear_history(ctx, id): +def clear_history(ctx, id, keep_actual): """If no source id is given the history for all harvest sources (maximum is 1000) will be cleared. @@ -122,7 +127,7 @@ def clear_history(ctx, id): flask_app = ctx.meta["flask_app"] with flask_app.test_request_context(): - result = utils.clear_harvest_source_history(id) + result = utils.clear_harvest_source_history(id, bool(keep_actual)) click.secho(result, fg="green") diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index ed4bbc104..49b35e94a 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -190,6 +190,14 @@ def __init__(self, name): will be aborted. You can use comma as a separator to provide multiple source_id's""", ) + self.parser.add_option( + "-k", + "--keep-actual", + dest="keep_actual", + default=False, + help="Do not delete relevant harvest objects", + ) + def command(self): self._load_config() @@ -316,11 +324,12 @@ def create_harvest_source(self): print(result) def clear_harvest_source_history(self): + keep_actual = bool(self.options.keep_actual) source_id = None if len(self.args) >= 2: source_id = six.text_type(self.args[1]) - print(utils.clear_harvest_source_history(source_id)) + print(utils.clear_harvest_source_history(source_id, keep_actual)) def show_harvest_source(self): diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 33fde11e5..d2d967845 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -322,6 +322,8 @@ def harvest_sources_job_history_clear(context, data_dict): ''' check_access('harvest_sources_clear', context, data_dict) + keep_actual = data_dict.get('keep_actual', False) + job_history_clear_results = [] # We assume that the maximum of 1000 (hard limit) rows should be enough result = logic.get_action('package_search')(context, {'fq': '+dataset_type:harvest', 'rows': 1000}) @@ -329,7 +331,8 @@ def harvest_sources_job_history_clear(context, data_dict): if harvest_packages: for data_dict in harvest_packages: try: - clear_result = get_action('harvest_source_job_history_clear')(context, {'id': data_dict['id']}) + clear_result = get_action('harvest_source_job_history_clear')( + context, {'id': data_dict['id'], 'keep_actual': keep_actual}) job_history_clear_results.append(clear_result) except NotFound: # Ignoring not existent harvest sources because of a possibly corrupt search index @@ -352,6 +355,7 @@ def harvest_source_job_history_clear(context, data_dict): check_access('harvest_source_clear', context, data_dict) harvest_source_id = data_dict.get('id', None) + keep_actual = data_dict.get('keep_actual', False) source = HarvestSource.get(harvest_source_id) if not source: @@ -362,17 +366,51 @@ def harvest_source_job_history_clear(context, data_dict): model = context['model'] - sql = '''begin; - delete from harvest_object_error where harvest_object_id - in (select id from harvest_object where harvest_source_id = '{harvest_source_id}'); - delete from harvest_object_extra where harvest_object_id - in (select id from harvest_object where harvest_source_id = '{harvest_source_id}'); - delete from harvest_object where harvest_source_id = '{harvest_source_id}'; - delete from harvest_gather_error where harvest_job_id - in (select id from harvest_job where source_id = '{harvest_source_id}'); - delete from harvest_job where source_id = '{harvest_source_id}'; - commit; - '''.format(harvest_source_id=harvest_source_id) + if keep_actual: + sql = '''BEGIN; + DELETE FROM harvest_object_error WHERE harvest_object_id + IN (SELECT id FROM harvest_object AS obj WHERE harvest_source_id = '{harvest_source_id}' + AND current != true + AND (NOT EXISTS (SELECT id FROM harvest_job WHERE id = obj.harvest_job_id + AND status = 'Running')) + AND (NOT EXISTS (SELECT id FROM harvest_object WHERE harvest_job_id = obj.harvest_job_id + AND current = true)) + ); + DELETE FROM harvest_object_extra WHERE harvest_object_id + IN (SELECT id FROM harvest_object AS obj WHERE harvest_source_id = '{harvest_source_id}' + AND current != true + AND (NOT EXISTS (SELECT id FROM harvest_job WHERE id = obj.harvest_job_id + AND status = 'Running')) + AND (NOT EXISTS (SELECT id FROM harvest_object WHERE harvest_job_id = obj.harvest_job_id + AND current = true)) + ); + DELETE FROM harvest_object AS obj WHERE harvest_source_id = '{harvest_source_id}' + AND current != true + AND (NOT EXISTS (SELECT id FROM harvest_job WHERE id = obj.harvest_job_id + AND status = 'Running')) + AND (NOT EXISTS (SELECT id FROM harvest_object WHERE harvest_job_id = obj.harvest_job_id + AND current = true)); + DELETE FROM harvest_gather_error WHERE harvest_job_id + IN (SELECT id FROM harvest_job AS job WHERE source_id = '{harvest_source_id}' + AND job.status != 'Running' + AND NOT EXISTS (SELECT id FROM harvest_object WHERE harvest_job_id = job.id)); + DELETE FROM harvest_job AS job WHERE source_id = '{harvest_source_id}' + AND job.status != 'Running' + AND NOT EXISTS (SELECT id FROM harvest_object WHERE harvest_job_id = job.id); + COMMIT; + '''.format(harvest_source_id=harvest_source_id) + else: + sql = '''BEGIN; + DELETE FROM harvest_object_error WHERE harvest_object_id + IN (SELECT id FROM harvest_object WHERE harvest_source_id = '{harvest_source_id}'); + DELETE FROM harvest_object_extra WHERE harvest_object_id + IN (SELECT id FROM harvest_object WHERE harvest_source_id = '{harvest_source_id}'); + DELETE FROM harvest_object WHERE harvest_source_id = '{harvest_source_id}'; + DELETE FROM harvest_gather_error WHERE harvest_job_id + IN (SELECT id FROM harvest_job WHERE source_id = '{harvest_source_id}'); + DELETE FROM harvest_job WHERE source_id = '{harvest_source_id}'; + COMMIT; + '''.format(harvest_source_id=harvest_source_id) model.Session.execute(sql) diff --git a/ckanext/harvest/tests/test_action.py b/ckanext/harvest/tests/test_action.py index c2640a58c..fa3019a21 100644 --- a/ckanext/harvest/tests/test_action.py +++ b/ckanext/harvest/tests/test_action.py @@ -258,6 +258,15 @@ def test_harvest_source_job_history_clear(self): object_ = factories.HarvestObjectObj(job=job, source=source, package_id=dataset['id']) + data_dict = SOURCE_DICT.copy() + data_dict['name'] = 'another-source' + data_dict['url'] = 'http://another-url' + source2 = factories.HarvestSourceObj(**data_dict) + job2 = factories.HarvestJobObj(source=source2) + dataset2 = ckan_factories.Dataset() + object_2_ = factories.HarvestObjectObj(job=job2, source=source2, + package_id=dataset2['id']) + # execute context = {'session': model.Session, 'ignore_auth': True, 'user': ''} @@ -266,13 +275,19 @@ def test_harvest_source_job_history_clear(self): # verify assert result == {'id': source.id} - source = harvest_model.HarvestSource.get(source.id) - assert source + assert harvest_model.HarvestSource.get(source.id) assert harvest_model.HarvestJob.get(job.id) is None assert harvest_model.HarvestObject.get(object_.id) is None dataset_from_db = model.Package.get(dataset['id']) - assert dataset_from_db, 'is None' + assert dataset_from_db assert dataset_from_db.id == dataset['id'] + # source2 and related objects are untouched + assert harvest_model.HarvestSource.get(source2.id) + assert harvest_model.HarvestJob.get(job2.id) + assert harvest_model.HarvestObject.get(object_2_.id) + dataset_from_db_2 = model.Package.get(dataset2['id']) + assert dataset_from_db_2 + assert dataset_from_db_2.id == dataset2['id'] def test_harvest_sources_job_history_clear(self): # prepare @@ -300,21 +315,198 @@ def test_harvest_sources_job_history_clear(self): # verify assert sorted(result, key=lambda item: item['id']) == sorted( [{'id': source_1.id}, {'id': source_2.id}], key=lambda item: item['id']) - source_1 = harvest_model.HarvestSource.get(source_1.id) - assert source_1 + assert harvest_model.HarvestSource.get(source_1.id) assert harvest_model.HarvestJob.get(job_1.id) is None assert harvest_model.HarvestObject.get(object_1_.id) is None dataset_from_db_1 = model.Package.get(dataset_1['id']) - assert dataset_from_db_1, 'is None' + assert dataset_from_db_1 assert dataset_from_db_1.id == dataset_1['id'] - source_2 = harvest_model.HarvestSource.get(source_1.id) - assert source_2 + assert harvest_model.HarvestSource.get(source_2.id) assert harvest_model.HarvestJob.get(job_2.id) is None assert harvest_model.HarvestObject.get(object_2_.id) is None dataset_from_db_2 = model.Package.get(dataset_2['id']) - assert dataset_from_db_2, 'is None' + assert dataset_from_db_2 assert dataset_from_db_2.id == dataset_2['id'] + def test_harvest_sources_job_history_clear_keep_actual(self): + # prepare + data_dict = SOURCE_DICT.copy() + source_1 = factories.HarvestSourceObj(**data_dict) + data_dict['name'] = 'another-source' + data_dict['url'] = 'http://another-url' + source_2 = factories.HarvestSourceObj(**data_dict) + + job_1 = factories.HarvestJobObj(source=source_1) + dataset_1 = ckan_factories.Dataset() + object_1_ = factories.HarvestObjectObj(job=job_1, source=source_1, + package_id=dataset_1['id']) + + job_2 = factories.HarvestJobObj(source=source_2) + # creating harvest_object with empty package_id + object_2_ = factories.HarvestObjectObj(job=job_2, source=source_2, + package_id=None) + + setattr(object_1_, 'report_status', 'added') + setattr(object_1_, 'current', True) + model.Session.commit() + + # execute + context = {'model': model, 'session': model.Session, + 'ignore_auth': True, 'user': ''} + result = get_action('harvest_sources_job_history_clear')( + context, {'keep_actual': True}) + + # verify + assert sorted(result, key=lambda item: item['id']) == sorted( + [{'id': source_1.id}, {'id': source_2.id}], key=lambda item: item['id']) + + # dataset, related source, object and job still persist! + assert harvest_model.HarvestSource.get(source_1.id) + assert harvest_model.HarvestJob.get(job_1.id) + assert harvest_model.HarvestObject.get(object_1_.id) + dataset_from_db_1 = model.Package.get(dataset_1['id']) + assert dataset_from_db_1 + assert dataset_from_db_1.id == dataset_1['id'] + + # second source persist, but job and object was deleted + assert harvest_model.HarvestSource.get(source_2.id) + assert not harvest_model.HarvestJob.get(job_2.id) + assert not harvest_model.HarvestObject.get(object_2_.id) + + def test_harvest_source_job_history_clear_keep_actual(self): + # prepare + source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) + job = factories.HarvestJobObj(source=source) + dataset = ckan_factories.Dataset() + object_ = factories.HarvestObjectObj(job=job, source=source, + package_id=dataset['id']) + + data_dict = SOURCE_DICT.copy() + data_dict['name'] = 'another-source' + data_dict['url'] = 'http://another-url' + source2 = factories.HarvestSourceObj(**data_dict) + job2 = factories.HarvestJobObj(source=source2) + dataset2 = ckan_factories.Dataset() + object_2_ = factories.HarvestObjectObj(job=job2, source=source2, + package_id=dataset2['id']) + + setattr(object_, 'report_status', 'added') + setattr(object_, 'current', True) + model.Session.commit() + + # execute + context = {'model': model, 'session': model.Session, + 'ignore_auth': True, 'user': ''} + result = get_action('harvest_source_job_history_clear')( + context, {'id': source.id, 'keep_actual': True}) + + # verify + assert result == {'id': source.id} + assert harvest_model.HarvestSource.get(source.id) + assert harvest_model.HarvestJob.get(job.id) + assert harvest_model.HarvestObject.get(object_.id) + dataset_from_db = model.Package.get(dataset['id']) + assert dataset_from_db + assert dataset_from_db.id == dataset['id'] + # source2 and related objects are untouched + assert harvest_model.HarvestSource.get(source2.id) + assert harvest_model.HarvestJob.get(job2.id) + assert harvest_model.HarvestObject.get(object_2_.id) + dataset_from_db_2 = model.Package.get(dataset2['id']) + assert dataset_from_db_2 + assert dataset_from_db_2.id == dataset2['id'] + + def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self): + # prepare + source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) + job = factories.HarvestJobObj(source=source) + setattr(job, 'status', 'Finished') + setattr(job, 'finished', datetime.datetime.utcnow()-datetime.timedelta(days=2)) + + dataset = ckan_factories.Dataset() + object_ = factories.HarvestObjectObj(job=job, source=source, + package_id=dataset['id']) + + job2 = factories.HarvestJobObj(source=source) + setattr(job2, 'finished', datetime.datetime.utcnow()-datetime.timedelta(days=1)) + setattr(job2, 'status', 'Finished') + dataset2 = ckan_factories.Dataset() + object_2_ = factories.HarvestObjectObj(job=job2, source=source, + package_id=dataset2['id']) + setattr(object_2_, 'current', True) + model.Session.commit() + + # execute + context = {'model': model, 'session': model.Session, + 'ignore_auth': True, 'user': ''} + result = get_action('harvest_source_job_history_clear')( + context, {'id': source.id, 'keep_actual': True}) + + # verify + assert result == {'id': source.id} + assert harvest_model.HarvestSource.get(source.id) + assert not harvest_model.HarvestJob.get(job.id) + assert not harvest_model.HarvestObject.get(object_.id) + dataset_from_db = model.Package.get(dataset['id']) + assert dataset_from_db + assert dataset_from_db.id == dataset['id'] + # job2 and related objects are untouched + assert harvest_model.HarvestJob.get(job2.id) + assert harvest_model.HarvestObject.get(object_2_.id) + dataset_from_db_2 = model.Package.get(dataset2['id']) + assert dataset_from_db_2 + assert dataset_from_db_2.id == dataset2['id'] + + def test_harvest_source_job_history_clear_keep_actual_running_job(self): + # Both jobs contain current objects + # prepare + source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) + job1 = factories.HarvestJobObj(source=source) + setattr(job1, 'status', 'Finished') + setattr(job1, 'finished', datetime.datetime.utcnow()-datetime.timedelta(days=1)) + + dataset1 = ckan_factories.Dataset() + dataset2 = ckan_factories.Dataset() + + object_1_ = factories.HarvestObjectObj(job=job1, source=source, + package_id=dataset1['id']) + setattr(object_1_, 'current', False) + object_2_ = factories.HarvestObjectObj(job=job1, source=source, + package_id=dataset2['id']) + setattr(object_2_, 'current', True) + + job2 = factories.HarvestJobObj(source=source) + setattr(job2, 'status', 'Running') + object_3_ = factories.HarvestObjectObj(job=job2, source=source, + package_id=dataset1['id']) + setattr(object_3_, 'current', True) + object_4_ = factories.HarvestObjectObj(job=job2, source=source, + package_id=dataset2['id']) + setattr(object_4_, 'current', False) + model.Session.commit() + + # execute + context = {'model': model, 'session': model.Session, + 'ignore_auth': True, 'user': ''} + result = get_action('harvest_source_job_history_clear')( + context, {'id': source.id, 'keep_actual': True}) + + # verify that both jobs still exists + assert result == {'id': source.id} + assert harvest_model.HarvestSource.get(source.id) + assert harvest_model.HarvestJob.get(job1.id) + assert harvest_model.HarvestObject.get(object_1_.id) + assert harvest_model.HarvestObject.get(object_2_.id) + dataset_from_db = model.Package.get(dataset1['id']) + assert dataset_from_db + assert dataset_from_db.id == dataset1['id'] + assert harvest_model.HarvestJob.get(job2.id) + assert harvest_model.HarvestObject.get(object_3_.id) + assert harvest_model.HarvestObject.get(object_4_.id) + dataset_from_db_2 = model.Package.get(dataset2['id']) + assert dataset_from_db_2 + assert dataset_from_db_2.id == dataset2['id'] + def test_harvest_abort_failed_jobs_without_failed_jobs(self): # prepare data_dict = SOURCE_DICT.copy() diff --git a/ckanext/harvest/utils.py b/ckanext/harvest/utils.py index 785505d31..d2e88f64b 100644 --- a/ckanext/harvest/utils.py +++ b/ckanext/harvest/utils.py @@ -206,7 +206,7 @@ def clear_harvest_source(source_id_or_name): tk.get_action("harvest_source_clear")(context, {"id": source["id"]}) -def clear_harvest_source_history(source_id): +def clear_harvest_source_history(source_id, keep_actual): context = { "model": model, @@ -215,15 +215,19 @@ def clear_harvest_source_history(source_id): } if source_id is not None: tk.get_action("harvest_source_job_history_clear")(context, { - "id": source_id + "id": source_id, + "keep_actual": keep_actual }) return "Cleared job history of harvest source: {0}".format(source_id) else: # Purge queues, because we clean all harvest jobs and # objects in the database. - purge_queues() + if not keep_actual: + purge_queues() cleared_sources_dicts = tk.get_action( - "harvest_sources_job_history_clear")(context, {}) + "harvest_sources_job_history_clear")(context, { + "keep_actual": keep_actual + }) return "Cleared job history for all harvest sources: {0} source(s)".format( len(cleared_sources_dicts)) From d2b7340509db9b0989d129034443328090816e6e Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Wed, 22 Dec 2021 10:39:18 +0100 Subject: [PATCH 2/2] Rename keep-actual to keep-current and updated documentation --- README.rst | 4 +++- ckanext/harvest/cli.py | 6 +++--- ckanext/harvest/commands/harvester.py | 12 +++++++----- ckanext/harvest/logic/action/update.py | 8 ++++---- ckanext/harvest/tests/test_action.py | 16 ++++++++-------- ckanext/harvest/utils.py | 8 ++++---- 6 files changed, 29 insertions(+), 25 deletions(-) diff --git a/README.rst b/README.rst index 6264fc98d..091446407 100644 --- a/README.rst +++ b/README.rst @@ -242,7 +242,7 @@ The following operations can be run from the command line as described underneat - clears all datasets, jobs and objects related to a harvest source, but keeps the source itself - harvester clearsource-history [{source-id}] + harvester clearsource-history [{source-id}] [-k] - If no source id is given the history for all harvest sources (maximum is 1000) will be cleared. Clears all jobs and objects related to a harvest source, but keeps the source @@ -250,6 +250,8 @@ The following operations can be run from the command line as described underneat If a source id is given, it only clears the history of the harvest source with the given source id. + To keep the currently active jobs use the -k option. + harvester sources [all] - lists harvest sources If 'all' is defined, it also shows the Inactive sources diff --git a/ckanext/harvest/cli.py b/ckanext/harvest/cli.py index 5de36551b..20a750447 100644 --- a/ckanext/harvest/cli.py +++ b/ckanext/harvest/cli.py @@ -110,11 +110,11 @@ def clear(ctx, id): @click.argument(u"id", metavar=u"SOURCE_ID_OR_NAME", required=False) @click.option( "-k", - "--keep-actual", + "--keep-current", default=False ) @click.pass_context -def clear_history(ctx, id, keep_actual): +def clear_history(ctx, id, keep_current): """If no source id is given the history for all harvest sources (maximum is 1000) will be cleared. @@ -127,7 +127,7 @@ def clear_history(ctx, id, keep_actual): flask_app = ctx.meta["flask_app"] with flask_app.test_request_context(): - result = utils.clear_harvest_source_history(id, bool(keep_actual)) + result = utils.clear_harvest_source_history(id, bool(keep_current)) click.secho(result, fg="green") diff --git a/ckanext/harvest/commands/harvester.py b/ckanext/harvest/commands/harvester.py index 49b35e94a..57f1390e8 100644 --- a/ckanext/harvest/commands/harvester.py +++ b/ckanext/harvest/commands/harvester.py @@ -34,12 +34,14 @@ class Harvester(CkanCommand): - clears all datasets, jobs and objects related to a harvest source, but keeps the source itself - harvester clearsource_history [{source-id}] + harvester clearsource_history [{source-id}] [-k] - If no source id is given the history for all harvest sources (maximum is 1000) will be cleared. Clears all jobs and objects related to a harvest source, but keeps the source itself. The datasets imported from the harvest source will NOT be deleted!!! If a source id is given, it only clears the history of the harvest source with the given source id. + To keep the currently active jobs use the -k option. + harvester sources [all] - lists harvest sources If 'all' is defined, it also shows the Inactive sources @@ -192,8 +194,8 @@ def __init__(self, name): self.parser.add_option( "-k", - "--keep-actual", - dest="keep_actual", + "--keep-current", + dest="keep_current", default=False, help="Do not delete relevant harvest objects", ) @@ -324,12 +326,12 @@ def create_harvest_source(self): print(result) def clear_harvest_source_history(self): - keep_actual = bool(self.options.keep_actual) + keep_current = bool(self.options.keep_current) source_id = None if len(self.args) >= 2: source_id = six.text_type(self.args[1]) - print(utils.clear_harvest_source_history(source_id, keep_actual)) + print(utils.clear_harvest_source_history(source_id, keep_current)) def show_harvest_source(self): diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index d2d967845..d492d6850 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -322,7 +322,7 @@ def harvest_sources_job_history_clear(context, data_dict): ''' check_access('harvest_sources_clear', context, data_dict) - keep_actual = data_dict.get('keep_actual', False) + keep_current = data_dict.get('keep_current', False) job_history_clear_results = [] # We assume that the maximum of 1000 (hard limit) rows should be enough @@ -332,7 +332,7 @@ def harvest_sources_job_history_clear(context, data_dict): for data_dict in harvest_packages: try: clear_result = get_action('harvest_source_job_history_clear')( - context, {'id': data_dict['id'], 'keep_actual': keep_actual}) + context, {'id': data_dict['id'], 'keep_current': keep_current}) job_history_clear_results.append(clear_result) except NotFound: # Ignoring not existent harvest sources because of a possibly corrupt search index @@ -355,7 +355,7 @@ def harvest_source_job_history_clear(context, data_dict): check_access('harvest_source_clear', context, data_dict) harvest_source_id = data_dict.get('id', None) - keep_actual = data_dict.get('keep_actual', False) + keep_current = data_dict.get('keep_current', False) source = HarvestSource.get(harvest_source_id) if not source: @@ -366,7 +366,7 @@ def harvest_source_job_history_clear(context, data_dict): model = context['model'] - if keep_actual: + if keep_current: sql = '''BEGIN; DELETE FROM harvest_object_error WHERE harvest_object_id IN (SELECT id FROM harvest_object AS obj WHERE harvest_source_id = '{harvest_source_id}' diff --git a/ckanext/harvest/tests/test_action.py b/ckanext/harvest/tests/test_action.py index fa3019a21..2c5b2b61c 100644 --- a/ckanext/harvest/tests/test_action.py +++ b/ckanext/harvest/tests/test_action.py @@ -328,7 +328,7 @@ def test_harvest_sources_job_history_clear(self): assert dataset_from_db_2 assert dataset_from_db_2.id == dataset_2['id'] - def test_harvest_sources_job_history_clear_keep_actual(self): + def test_harvest_sources_job_history_clear_keep_current(self): # prepare data_dict = SOURCE_DICT.copy() source_1 = factories.HarvestSourceObj(**data_dict) @@ -354,7 +354,7 @@ def test_harvest_sources_job_history_clear_keep_actual(self): context = {'model': model, 'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_sources_job_history_clear')( - context, {'keep_actual': True}) + context, {'keep_current': True}) # verify assert sorted(result, key=lambda item: item['id']) == sorted( @@ -373,7 +373,7 @@ def test_harvest_sources_job_history_clear_keep_actual(self): assert not harvest_model.HarvestJob.get(job_2.id) assert not harvest_model.HarvestObject.get(object_2_.id) - def test_harvest_source_job_history_clear_keep_actual(self): + def test_harvest_source_job_history_clear_keep_current(self): # prepare source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) job = factories.HarvestJobObj(source=source) @@ -398,7 +398,7 @@ def test_harvest_source_job_history_clear_keep_actual(self): context = {'model': model, 'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_source_job_history_clear')( - context, {'id': source.id, 'keep_actual': True}) + context, {'id': source.id, 'keep_current': True}) # verify assert result == {'id': source.id} @@ -416,7 +416,7 @@ def test_harvest_source_job_history_clear_keep_actual(self): assert dataset_from_db_2 assert dataset_from_db_2.id == dataset2['id'] - def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self): + def test_harvest_source_job_history_clear_keep_current_finished_jobs(self): # prepare source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) job = factories.HarvestJobObj(source=source) @@ -440,7 +440,7 @@ def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self): context = {'model': model, 'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_source_job_history_clear')( - context, {'id': source.id, 'keep_actual': True}) + context, {'id': source.id, 'keep_current': True}) # verify assert result == {'id': source.id} @@ -457,7 +457,7 @@ def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self): assert dataset_from_db_2 assert dataset_from_db_2.id == dataset2['id'] - def test_harvest_source_job_history_clear_keep_actual_running_job(self): + def test_harvest_source_job_history_clear_keep_current_running_job(self): # Both jobs contain current objects # prepare source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) @@ -489,7 +489,7 @@ def test_harvest_source_job_history_clear_keep_actual_running_job(self): context = {'model': model, 'session': model.Session, 'ignore_auth': True, 'user': ''} result = get_action('harvest_source_job_history_clear')( - context, {'id': source.id, 'keep_actual': True}) + context, {'id': source.id, 'keep_current': True}) # verify that both jobs still exists assert result == {'id': source.id} diff --git a/ckanext/harvest/utils.py b/ckanext/harvest/utils.py index d2e88f64b..7acaaea59 100644 --- a/ckanext/harvest/utils.py +++ b/ckanext/harvest/utils.py @@ -206,7 +206,7 @@ def clear_harvest_source(source_id_or_name): tk.get_action("harvest_source_clear")(context, {"id": source["id"]}) -def clear_harvest_source_history(source_id, keep_actual): +def clear_harvest_source_history(source_id, keep_current): context = { "model": model, @@ -216,17 +216,17 @@ def clear_harvest_source_history(source_id, keep_actual): if source_id is not None: tk.get_action("harvest_source_job_history_clear")(context, { "id": source_id, - "keep_actual": keep_actual + "keep_current": keep_current }) return "Cleared job history of harvest source: {0}".format(source_id) else: # Purge queues, because we clean all harvest jobs and # objects in the database. - if not keep_actual: + if not keep_current: purge_queues() cleared_sources_dicts = tk.get_action( "harvest_sources_job_history_clear")(context, { - "keep_actual": keep_actual + "keep_current": keep_current }) return "Cleared job history for all harvest sources: {0} source(s)".format( len(cleared_sources_dicts))