From 161776349f1e0a885eb66ce8f47861d7202159ac Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:12:32 -0700 Subject: [PATCH] move setupslice after cgroupsv2 check, remove unit file for log collector and remove fiirewall daemon-reload (#3223) * move daemon reload * test fix * UT test * firewall daemon-reload * address comments * address comments (cherry picked from commit 47e969a87b7c740e2e4dd057608bf2dd2e77dacd) --- azurelinuxagent/ga/cgroupconfigurator.py | 30 ++++++-------------- azurelinuxagent/ga/persist_firewall_rules.py | 10 +------ tests/ga/test_cgroupconfigurator.py | 18 ++++-------- tests/ga/test_persist_firewall_rules.py | 11 ------- tests/lib/mock_command.py | 10 +++++-- tests/lib/mock_environment.py | 5 ++++ 6 files changed, 29 insertions(+), 55 deletions(-) diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index efb00d481..22634bb64 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -68,16 +68,6 @@ LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice" # More info on resource limits properties in systemd here: # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups -_LOGCOLLECTOR_SLICE_CONTENTS_FMT = """ -[Unit] -Description=Slice for Azure VM Agent Periodic Log Collector -DefaultDependencies=no -Before=slices.target -[Slice] -CPUAccounting=yes -CPUQuota={cpu_quota} -MemoryAccounting=yes -""" LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2 = "5%" LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2 = "170M" LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 = 10 @@ -181,6 +171,11 @@ def initialize(self): log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) return + # TODO: Move this and systemd system check to cgroups_supported logic above + if self.using_cgroup_v2(): + log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") + return + # We check the agent unit 'Slice' property before setting up azure.slice. This check is done first # because the agent's Slice unit property will be 'azure.slice' if the slice drop-in file exists, even # though systemd has not moved the agent to azure.slice yet. Systemd will only move the agent to @@ -192,18 +187,12 @@ def initialize(self): return # Notes about slice setup: - # 1. On first agent update (for machines where daemon version did not already create azure.slice), the + # On first agent update (for machines where daemon version did not already create azure.slice), the # agent creates azure.slice and the agent unit Slice drop-in file, but systemd does not move the agent # unit to azure.slice until service restart. It is ok to enable cgroup usage in this case if agent is # running in system.slice. - # 2. We setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but - # can be enabled for log collector in waagent.conf. The log collector slice should be created in case - # v2 usage is enabled for log collector. - self.__setup_azure_slice() - if self.using_cgroup_v2(): - log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") - return + self.__setup_azure_slice() # Log mount points/root paths for cgroup controllers self._cgroups_api.log_root_paths() @@ -295,9 +284,8 @@ def __setup_azure_slice(): if not os.path.exists(vmextensions_slice): files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS)) - # Update log collector slice contents - slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2) - files_to_create.append((logcollector_slice, slice_contents)) + # New agent will setup limits for scope instead slice, so removing existing logcollector slice. + CGroupConfigurator._Impl.__cleanup_unit_file(logcollector_slice) if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None: CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice) diff --git a/azurelinuxagent/ga/persist_firewall_rules.py b/azurelinuxagent/ga/persist_firewall_rules.py index a20e2874a..e7c8373ec 100644 --- a/azurelinuxagent/ga/persist_firewall_rules.py +++ b/azurelinuxagent/ga/persist_firewall_rules.py @@ -199,8 +199,7 @@ def _setup_network_setup_service(self): # Create unit file with default values self.__set_service_unit_file() - # Reload systemd configurations when we setup the service for the first time to avoid systemctl warnings - self.__reload_systemd_conf() + # After modifying the service, systemctl may issue a warning when checking the service, and daemon-reload should not be used to clear the warning, since it can affect other services logger.info("Successfully added and enabled the {0}".format(self._network_setup_service_name)) def __setup_binary_file(self): @@ -297,13 +296,6 @@ def __log_network_setup_service_logs(self): message=msg, log_event=False) - def __reload_systemd_conf(self): - try: - logger.info("Executing systemctl daemon-reload for setting up {0}".format(self._network_setup_service_name)) - shellutil.run_command(["systemctl", "daemon-reload"]) - except Exception as exception: - logger.warn("Unable to reload systemctl configurations: {0}".format(ustr(exception))) - def __get_unit_file_version(self): if not os.path.exists(self.get_service_file_path()): raise OSError("{0} not found".format(self.get_service_file_path())) diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 9af0d88d7..1ea7d9325 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -221,26 +221,20 @@ def test_initialize_should_create_unit_files_when_the_agent_service_file_is_not_ self.assertTrue(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} was not created".format(agent_drop_in_file_cpu_accounting)) self.assertTrue(os.path.exists(agent_drop_in_file_memory_accounting), "{0} was not created".format(agent_drop_in_file_memory_accounting)) - def test_initialize_should_update_logcollector_memorylimit(self): + def test_initialize_should_clear_logcollector_slice(self): with self._get_cgroup_configurator(initialize=False) as configurator: log_collector_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.logcollector) - original_memory_limit = "MemoryLimit=30M" - # The mock creates the slice unit file with memory limit + # The mock creates the slice unit file configurator.mocks.add_data_file(os.path.join(data_dir, 'init', "azure-walinuxagent-logcollector.slice"), UnitFilePaths.logcollector) - if not os.path.exists(log_collector_unit_file): - raise Exception("{0} should have been created during test setup".format(log_collector_unit_file)) - if not fileutil.findre_in_file(log_collector_unit_file, original_memory_limit): - raise Exception("MemoryLimit was not set correctly. Expected: {0}. Got:\n{1}".format( - original_memory_limit, fileutil.read_file(log_collector_unit_file))) + + self.assertTrue(os.path.exists(log_collector_unit_file), "{0} was not created".format(log_collector_unit_file)) configurator.initialize() - # initialize() should update the unit file to remove the memory limit - self.assertFalse(fileutil.findre_in_file(log_collector_unit_file, original_memory_limit), - "Log collector slice unit file was not updated correctly. Expected no memory limit. Got:\n{0}".format( - fileutil.read_file(log_collector_unit_file))) + # initialize() should remove the unit file + self.assertFalse(os.path.exists(log_collector_unit_file), "{0} should not have been created".format(log_collector_unit_file)) def test_setup_extension_slice_should_create_unit_files(self): with self._get_cgroup_configurator() as configurator: diff --git a/tests/ga/test_persist_firewall_rules.py b/tests/ga/test_persist_firewall_rules.py index adcf43b75..7754f1efb 100644 --- a/tests/ga/test_persist_firewall_rules.py +++ b/tests/ga/test_persist_firewall_rules.py @@ -127,13 +127,6 @@ def __assert_systemctl_called(self, cmd="enable", validate_command_called=True): else: self.assertNotIn(systemctl_command, self.__executed_commands, "Systemctl command {0} found".format(cmd)) - def __assert_systemctl_reloaded(self, validate_command_called=True): - systemctl_reload = ["systemctl", "daemon-reload"] - if validate_command_called: - self.assertIn(systemctl_reload, self.__executed_commands, "Systemctl config not reloaded") - else: - self.assertNotIn(systemctl_reload, self.__executed_commands, "Systemctl config reloaded") - def __assert_firewall_cmd_running_called(self, validate_command_called=True): cmd = PersistFirewallRulesHandler._FIREWALLD_RUNNING_CMD if validate_command_called: @@ -144,7 +137,6 @@ def __assert_firewall_cmd_running_called(self, validate_command_called=True): def __assert_network_service_setup_properly(self): self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=True) - self.__assert_systemctl_reloaded() self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False) self.assertTrue(os.path.exists(self._network_service_unit_file), "Service unit file should be there") self.assertTrue(os.path.exists(self._binary_file), "Binary file should be there") @@ -200,7 +192,6 @@ def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen= self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=True) - self.__assert_systemctl_reloaded(validate_command_called=True) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) @@ -234,7 +225,6 @@ def test_it_should_skip_setup_if_agent_network_setup_service_already_enabled_and self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=False) - self.__assert_systemctl_reloaded(validate_command_called=False) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) @@ -396,7 +386,6 @@ def test_it_should_delete_custom_service_files_if_firewalld_enabled(self): self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True) self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=False) self.__assert_systemctl_called(cmd="enable", validate_command_called=False) - self.__assert_systemctl_reloaded(validate_command_called=False) self.assertFalse(os.path.exists(handler.get_service_file_path()), "Service unit file found") self.assertFalse(os.path.exists(os.path.join(conf.get_lib_dir(), handler.BINARY_FILE_NAME)), "Binary file found") diff --git a/tests/lib/mock_command.py b/tests/lib/mock_command.py index e181d26d9..83509c3d3 100755 --- a/tests/lib/mock_command.py +++ b/tests/lib/mock_command.py @@ -2,12 +2,18 @@ import os import sys -if len(sys.argv) != 4: +if len(sys.argv) < 4: sys.stderr.write("usage: {0} ".format(os.path.basename(__file__))) # W0632: Possible unbalanced tuple unpacking with sequence: left side has 3 label(s), right side has 0 value(s) (unbalanced-tuple-unpacking) # Disabled: Unpacking is balanced: there is a check for the length on line 5 -stdout, return_value, stderr = sys.argv[1:] # pylint: disable=W0632 + +# This script will be used for mocking cgroups commands in test, when popen called this script will be executed instead of actual commands +# We pass stdout, return_value, stderr of the mocked command output as arguments to this script and this script will print them to stdout, stderr and exit with the return value +# So that popen gets the output of the mocked command. Ideally we should get 4 arguments in sys.argv, first one is the script name, next 3 are the actual command output +# But somehow when we run the tests from pycharm, it adds extra arguments next to the script name, so we need to handle that when reading the arguments +# ex: /home/nag/Documents/repos/WALinuxAgent/tests/lib/mock_command.py /snap/pycharm-professional/412/plugins/python-ce/helpers/py... +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid\n 0 +stdout, return_value, stderr = sys.argv[-3:] # pylint: disable=W0632 if stdout != '': sys.stdout.write(stdout) diff --git a/tests/lib/mock_environment.py b/tests/lib/mock_environment.py index 8f5682cf8..5b7209358 100644 --- a/tests/lib/mock_environment.py +++ b/tests/lib/mock_environment.py @@ -76,12 +76,14 @@ def __init__(self, tmp_dir, commands=None, paths=None, files=None, data_files=No self._original_popen = subprocess.Popen self._original_mkdir = fileutil.mkdir self._original_path_exists = os.path.exists + self._original_os_remove = os.remove self._original_open = open self.patchers = [ patch_builtin("open", side_effect=self._mock_open), patch("subprocess.Popen", side_effect=self._mock_popen), patch("os.path.exists", side_effect=self._mock_path_exists), + patch("os.remove", side_effect=self._mock_os_remove), patch("azurelinuxagent.common.utils.fileutil.mkdir", side_effect=self._mock_mkdir) ] @@ -166,3 +168,6 @@ def _mock_open(self, path, *args, **kwargs): def _mock_path_exists(self, path): return self._original_path_exists(self.get_mapped_path(path)) + def _mock_os_remove(self, path): + return self._original_os_remove(self.get_mapped_path(path)) +