Skip to content

Commit

Permalink
Merge pull request #68 from IBM/develop
Browse files Browse the repository at this point in the history
* Changed top level execption catching from catching only our own ValueErrors to any execptions - preventing a whole abort of SPPMon if something unexpected happens. This will reduce the need of urgent hotfixes like this one. 
* Changed typings from critical components to support better linting

- Hotfixes SPPMon storages request to fail due `free` or `total` beeing none.
- Changes empty result severity of REST-Requests from error to info
  • Loading branch information
NielsKorschinsky authored Jul 11, 2021
2 parents 1402f0b + 8a31b8f commit 652c339
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 38 deletions.
72 changes: 40 additions & 32 deletions python/sppmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
02/10/2021 version 0.13.1 Fixes to partial send(influx), including influxdb version into stats
03/29/2021 version 0.13.2 Fixes to typing, reducing error messages and tracking code for NaN bug
06/07/2021 version 0.13.3 Hotfixing version endpoint for SPP 10.1.8.1
06/09/2021 version 0.13.4 Hotfixing storage execption, chaning top-level execption handling to reduce the need of further hotfixes
"""
from __future__ import annotations
import functools
Expand All @@ -66,7 +67,7 @@
from subprocess import CalledProcessError
import sys
import time
from typing import Any, Dict, List, NoReturn, Union
from typing import Any, Dict, List, NoReturn, Union, Optional

from influx.influx_client import InfluxClient
from sppConnection.api_queries import ApiQueries
Expand All @@ -82,7 +83,7 @@
from utils.spp_utils import SppUtils

# Version:
VERSION = "0.13.3 (2021/06/07)"
VERSION = "0.13.4 (2021/06/09)"

# ----------------------------------------------------------------------------
# command line parameter parsing
Expand Down Expand Up @@ -145,7 +146,8 @@
LOGGER_NAME = 'sppmon'
LOGGER = logging.getLogger(LOGGER_NAME)

ERROR_CODE_CMD_LINE = 2
ERROR_CODE_START_ERROR = 3
ERROR_CODE_CMD_ARGS = 2
ERROR_CODE = 1


Expand Down Expand Up @@ -261,13 +263,13 @@ class SppMon:
"""Configured spp log rentation time, logs get deleted after this time."""

# set later in each method, here to avoid missing attribute
influx_client: InfluxClient = None
rest_client: RestClient = None
api_queries: ApiQueries = None
system_methods: SystemMethods = None
job_methods: JobMethods = None
protection_methods: ProtectionMethods = None
ssh_methods: SshMethods = None
influx_client: Optional[InfluxClient] = None
rest_client: Optional[RestClient] = None
api_queries: Optional[ApiQueries] = None
system_methods: Optional[SystemMethods] = None
job_methods: Optional[JobMethods] = None
protection_methods: Optional[ProtectionMethods] = None
ssh_methods: Optional[SshMethods] = None

def __init__(self):
self.log_path: str = ""
Expand All @@ -280,7 +282,7 @@ def __init__(self):
LOGGER.info("Starting SPPMon")
if(not self.check_pid_file()):
ExceptionUtils.error_message("Another instance of sppmon with the same args is running")
self.exit(ERROR_CODE_CMD_LINE)
self.exit(ERROR_CODE_START_ERROR)

# everything is option, otherwise its a typo.
if(len(ARGS) > 0):
Expand All @@ -296,12 +298,12 @@ def __init__(self):

if(not OPTIONS.confFileJSON):
ExceptionUtils.error_message("missing config file, aborting")
self.exit(error_code=ERROR_CODE_CMD_LINE)
self.exit(error_code=ERROR_CODE_CMD_ARGS)
try:
self.config_file = SppUtils.read_conf_file(config_file_path=OPTIONS.confFileJSON)
except ValueError as error:
ExceptionUtils.exception_info(error=error, extra_message="Error when trying to read Config file, unable to read")
self.exit(error_code=ERROR_CODE_CMD_LINE)
self.exit(error_code=ERROR_CODE_START_ERROR)

LOGGER.info("Setting up configurations")
self.setup_args()
Expand Down Expand Up @@ -422,7 +424,7 @@ def set_critial_configs(self, config_file: Dict[str, Any]) -> None:
"""
if(not config_file):
ExceptionUtils.error_message("missing or empty config file, aborting")
self.exit(error_code=ERROR_CODE_CMD_LINE)
self.exit(error_code=ERROR_CODE_START_ERROR)
try:
# critical components only
self.influx_client = InfluxClient(config_file)
Expand All @@ -446,7 +448,10 @@ def set_optional_configs(self, config_file: Dict[str, Any]) -> None:

if(not config_file):
ExceptionUtils.error_message("missing or empty config file, aborting.")
self.exit(error_code=ERROR_CODE_CMD_LINE)
self.exit(error_code=ERROR_CODE_START_ERROR)
if(not self.influx_client):
ExceptionUtils.error_message("Influx client is somehow missing. aborting")
self.exit(error_code=ERROR_CODE)

# ############################ REST-API #####################################
try:
Expand Down Expand Up @@ -571,7 +576,7 @@ def setup_args(self) -> None:
if((OPTIONS.create_dashboard or bool(OPTIONS.dashboard_folder_path)) and not
(OPTIONS.create_dashboard and bool(OPTIONS.dashboard_folder_path))):
ExceptionUtils.error_message("> Using --create_dashboard without associated folder path. Aborting.")
self.exit(ERROR_CODE_CMD_LINE)
self.exit(ERROR_CODE_CMD_ARGS)

# incremental setup, higher executes all below
all_args: bool = OPTIONS.all
Expand Down Expand Up @@ -677,12 +682,15 @@ def exit(self, error_code: int = False) -> NoReturn:

# error with the command line arguments
# dont store runtime here
if(error_code == ERROR_CODE_CMD_LINE):
if(error_code == ERROR_CODE_CMD_ARGS):
prog_args = []
prog_args.append(sys.argv[0])
prog_args.append("--help")
os.execv(sys.executable, ['python'] + prog_args)
sys.exit(ERROR_CODE_CMD_LINE) # unreachable?
sys.exit(ERROR_CODE_CMD_ARGS) # unreachable?
if(error_code == ERROR_CODE_START_ERROR):
ExceptionUtils.error_message("Error when starting SPPMon. Please review the errors above")
sys.exit(ERROR_CODE_START_ERROR)

script_end_time = SppUtils.get_actual_time_sec()
LOGGER.debug("Script end time: %d", script_end_time)
Expand Down Expand Up @@ -725,7 +733,7 @@ def main(self):
try:
self.system_methods.sites()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting sites, skipping them all")
Expand All @@ -734,7 +742,7 @@ def main(self):
try:
self.system_methods.cpuram()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when collecting cpu stats, skipping them all")
Expand All @@ -743,7 +751,7 @@ def main(self):
try:
self.system_methods.sppcatalog()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when collecting file system stats, skipping them all")
Expand All @@ -754,7 +762,7 @@ def main(self):
try:
self.job_methods.get_all_jobs()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting jobs, skipping them all")
Expand All @@ -764,7 +772,7 @@ def main(self):
try:
self.job_methods.job_logs()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting job logs, skipping them all")
Expand All @@ -776,7 +784,7 @@ def main(self):
try:
self.ssh_methods.ssh()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when excecuting ssh commands, skipping them all")
Expand All @@ -786,7 +794,7 @@ def main(self):
try:
self.protection_methods.store_vms()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting all VMs, skipping them all")
Expand All @@ -797,7 +805,7 @@ def main(self):
self.protection_methods.vms_per_sla()
self.protection_methods.sla_dumps()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting and computing VMs per sla, skipping them all")
Expand All @@ -807,7 +815,7 @@ def main(self):
try:
self.protection_methods.create_inventory_summary()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when creating inventory summary, skipping them all")
Expand All @@ -816,7 +824,7 @@ def main(self):
try:
self.protection_methods.vadps()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when requesting vadps, skipping them all")
Expand All @@ -825,7 +833,7 @@ def main(self):
try:
self.protection_methods.storages()
self.influx_client.flush_insert_buffer()
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when collecting storages, skipping them all")
Expand All @@ -835,7 +843,7 @@ def main(self):
if(OPTIONS.copy_database):
try:
self.influx_client.copy_database(OPTIONS.copy_database)
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when coping database.")
Expand All @@ -845,7 +853,7 @@ def main(self):
if(OPTIONS.test):
try:
OtherMethods.test_connection(self.influx_client, self.rest_client, self.config_file)
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when testing connection.")
Expand All @@ -859,7 +867,7 @@ def main(self):
OtherMethods.create_dashboard(
dashboard_folder_path=OPTIONS.dashboard_folder_path,
database_name=self.influx_client.database.name)
except ValueError as error:
except Exception as error:
ExceptionUtils.exception_info(
error=error,
extra_message="Top-level-error when creating dashboard")
Expand Down
6 changes: 4 additions & 2 deletions python/sppmonMethods/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sppmonMethods.ssh import SshMethods
from utils.methods_utils import MethodUtils
from influx.influx_client import InfluxClient
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import logging
import os
import re
Expand All @@ -22,7 +22,7 @@
class OtherMethods:

@staticmethod
def test_connection(influx_client: InfluxClient, rest_client: RestClient, config_file: Dict[str, Any]):
def test_connection(influx_client: InfluxClient, rest_client: Optional[RestClient], config_file: Dict[str, Any]):
if(not config_file):
raise ValueError("SPPmon does not work without a config file")

Expand All @@ -49,6 +49,8 @@ def test_connection(influx_client: InfluxClient, rest_client: RestClient, config

LOGGER.info("> Testing REST-API of SPP.")
try:
if(not rest_client):
raise ValueError("Rest-client is setup. Unavailable to test it.")
rest_client.login()
(version_nr, build_nr) = rest_client.get_spp_version_build()
LOGGER.info(f">> Sucessfully connected to SPP V{version_nr}, build {build_nr}.")
Expand Down
6 changes: 4 additions & 2 deletions python/sppmonMethods/protection.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,10 @@ def storages(self) -> None:
# get calulated extra info
for row in result:
row['siteName'] = self.__system_methods.site_name_by_id(row['site'])
if('free' in row and 'total' in row
and row['free'] > 0 and row['total'] > 0):
if('free' in row and row['free'] != None and
'total' in row and row['total'] != None and
row['total'] > 0):

row['used'] = row['total'] - row['free']
row['pct_free'] = row['free'] / row['total'] * 100
row['pct_used'] = row['used'] / row['total'] * 100
Expand Down
4 changes: 2 additions & 2 deletions python/utils/methods_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ def query_something(

LOGGER.info("> getting %s", name)

# request all Sites from SPP
# request information from SPP via the api_queries.py file
elem_list = source_func()
if(not elem_list):
ExceptionUtils.error_message(f">> No {name} are found")
LOGGER.info(f"WARNING: No {name} are returned when requesting from server")

if(rename_tuples):
for elem in elem_list:
Expand Down

0 comments on commit 652c339

Please sign in to comment.