From 161776349f1e0a885eb66ce8f47861d7202159ac Mon Sep 17 00:00:00 2001
From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com>
Date: Fri, 20 Sep 2024 14:12:32 -0700
Subject: [PATCH] move setupslice after cgroupsv2 check, remove unit file for
 log collector and remove fiirewall daemon-reload (#3223)

* move daemon reload

* test fix

* UT test

* firewall daemon-reload

* address comments

* address comments

(cherry picked from commit 47e969a87b7c740e2e4dd057608bf2dd2e77dacd)
---
 azurelinuxagent/ga/cgroupconfigurator.py     | 30 ++++++--------------
 azurelinuxagent/ga/persist_firewall_rules.py | 10 +------
 tests/ga/test_cgroupconfigurator.py          | 18 ++++--------
 tests/ga/test_persist_firewall_rules.py      | 11 -------
 tests/lib/mock_command.py                    | 10 +++++--
 tests/lib/mock_environment.py                |  5 ++++
 6 files changed, 29 insertions(+), 55 deletions(-)

diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py
index efb00d481..22634bb64 100644
--- a/azurelinuxagent/ga/cgroupconfigurator.py
+++ b/azurelinuxagent/ga/cgroupconfigurator.py
@@ -68,16 +68,6 @@
 LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice"
 # More info on resource limits properties in systemd here:
 # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups
-_LOGCOLLECTOR_SLICE_CONTENTS_FMT = """
-[Unit]
-Description=Slice for Azure VM Agent Periodic Log Collector
-DefaultDependencies=no
-Before=slices.target
-[Slice]
-CPUAccounting=yes
-CPUQuota={cpu_quota}
-MemoryAccounting=yes
-"""
 LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2 = "5%"
 LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2 = "170M"
 LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 = 10
@@ -181,6 +171,11 @@ def initialize(self):
                     log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
                     return
 
+                # TODO: Move this and systemd system check to cgroups_supported logic above
+                if self.using_cgroup_v2():
+                    log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2")
+                    return
+
                 # We check the agent unit 'Slice' property before setting up azure.slice. This check is done first
                 # because the agent's Slice unit property will be 'azure.slice' if the slice drop-in file exists, even
                 # though systemd has not moved the agent to azure.slice yet. Systemd will only move the agent to
@@ -192,18 +187,12 @@ def initialize(self):
                     return
 
                 # Notes about slice setup:
-                #   1. On first agent update (for machines where daemon version did not already create azure.slice), the
+                #   On first agent update (for machines where daemon version did not already create azure.slice), the
                 #   agent creates azure.slice and the agent unit Slice drop-in file, but systemd does not move the agent
                 #   unit to azure.slice until service restart. It is ok to enable cgroup usage in this case if agent is
                 #   running in system.slice.
-                #   2. We setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but
-                #   can be enabled for log collector in waagent.conf. The log collector slice should be created in case
-                #   v2 usage is enabled for log collector.
-                self.__setup_azure_slice()
 
-                if self.using_cgroup_v2():
-                    log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2")
-                    return
+                self.__setup_azure_slice()
 
                 # Log mount points/root paths for cgroup controllers
                 self._cgroups_api.log_root_paths()
@@ -295,9 +284,8 @@ def __setup_azure_slice():
             if not os.path.exists(vmextensions_slice):
                 files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS))
 
-            # Update log collector slice contents
-            slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2)
-            files_to_create.append((logcollector_slice, slice_contents))
+            # New agent will setup limits for scope instead slice, so removing existing logcollector slice.
+            CGroupConfigurator._Impl.__cleanup_unit_file(logcollector_slice)
 
             if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None:
                 CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice)
diff --git a/azurelinuxagent/ga/persist_firewall_rules.py b/azurelinuxagent/ga/persist_firewall_rules.py
index a20e2874a..e7c8373ec 100644
--- a/azurelinuxagent/ga/persist_firewall_rules.py
+++ b/azurelinuxagent/ga/persist_firewall_rules.py
@@ -199,8 +199,7 @@ def _setup_network_setup_service(self):
 
             # Create unit file with default values
             self.__set_service_unit_file()
-            # Reload systemd configurations when we setup the service for the first time to avoid systemctl warnings
-            self.__reload_systemd_conf()
+            # After modifying the service, systemctl may issue a warning when checking the service, and daemon-reload should not be used to clear the warning, since it can affect other services
             logger.info("Successfully added and enabled the {0}".format(self._network_setup_service_name))
 
     def __setup_binary_file(self):
@@ -297,13 +296,6 @@ def __log_network_setup_service_logs(self):
             message=msg,
             log_event=False)
 
-    def __reload_systemd_conf(self):
-        try:
-            logger.info("Executing systemctl daemon-reload for setting up {0}".format(self._network_setup_service_name))
-            shellutil.run_command(["systemctl", "daemon-reload"])
-        except Exception as exception:
-            logger.warn("Unable to reload systemctl configurations: {0}".format(ustr(exception)))
-
     def __get_unit_file_version(self):
         if not os.path.exists(self.get_service_file_path()):
             raise OSError("{0} not found".format(self.get_service_file_path()))
diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py
index 9af0d88d7..1ea7d9325 100644
--- a/tests/ga/test_cgroupconfigurator.py
+++ b/tests/ga/test_cgroupconfigurator.py
@@ -221,26 +221,20 @@ def test_initialize_should_create_unit_files_when_the_agent_service_file_is_not_
             self.assertTrue(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} was not created".format(agent_drop_in_file_cpu_accounting))
             self.assertTrue(os.path.exists(agent_drop_in_file_memory_accounting), "{0} was not created".format(agent_drop_in_file_memory_accounting))
 
-    def test_initialize_should_update_logcollector_memorylimit(self):
+    def test_initialize_should_clear_logcollector_slice(self):
         with self._get_cgroup_configurator(initialize=False) as configurator:
             log_collector_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.logcollector)
-            original_memory_limit = "MemoryLimit=30M"
 
-            # The mock creates the slice unit file with memory limit
+            # The mock creates the slice unit file
             configurator.mocks.add_data_file(os.path.join(data_dir, 'init', "azure-walinuxagent-logcollector.slice"),
                                              UnitFilePaths.logcollector)
-            if not os.path.exists(log_collector_unit_file):
-                raise Exception("{0} should have been created during test setup".format(log_collector_unit_file))
-            if not fileutil.findre_in_file(log_collector_unit_file, original_memory_limit):
-                raise Exception("MemoryLimit was not set correctly. Expected: {0}. Got:\n{1}".format(
-                    original_memory_limit, fileutil.read_file(log_collector_unit_file)))
+
+            self.assertTrue(os.path.exists(log_collector_unit_file), "{0} was not created".format(log_collector_unit_file))
 
             configurator.initialize()
 
-            # initialize() should update the unit file to remove the memory limit
-            self.assertFalse(fileutil.findre_in_file(log_collector_unit_file, original_memory_limit),
-                             "Log collector slice unit file was not updated correctly. Expected no memory limit. Got:\n{0}".format(
-                                 fileutil.read_file(log_collector_unit_file)))
+            # initialize() should remove the unit file
+            self.assertFalse(os.path.exists(log_collector_unit_file), "{0} should not have been created".format(log_collector_unit_file))
 
     def test_setup_extension_slice_should_create_unit_files(self):
         with self._get_cgroup_configurator() as configurator:
diff --git a/tests/ga/test_persist_firewall_rules.py b/tests/ga/test_persist_firewall_rules.py
index adcf43b75..7754f1efb 100644
--- a/tests/ga/test_persist_firewall_rules.py
+++ b/tests/ga/test_persist_firewall_rules.py
@@ -127,13 +127,6 @@ def __assert_systemctl_called(self, cmd="enable", validate_command_called=True):
         else:
             self.assertNotIn(systemctl_command, self.__executed_commands, "Systemctl command {0} found".format(cmd))
 
-    def __assert_systemctl_reloaded(self, validate_command_called=True):
-        systemctl_reload = ["systemctl", "daemon-reload"]
-        if validate_command_called:
-            self.assertIn(systemctl_reload, self.__executed_commands, "Systemctl config not reloaded")
-        else:
-            self.assertNotIn(systemctl_reload, self.__executed_commands, "Systemctl config reloaded")
-
     def __assert_firewall_cmd_running_called(self, validate_command_called=True):
         cmd = PersistFirewallRulesHandler._FIREWALLD_RUNNING_CMD
         if validate_command_called:
@@ -144,7 +137,6 @@ def __assert_firewall_cmd_running_called(self, validate_command_called=True):
     def __assert_network_service_setup_properly(self):
         self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
         self.__assert_systemctl_called(cmd="enable", validate_command_called=True)
-        self.__assert_systemctl_reloaded()
         self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False)
         self.assertTrue(os.path.exists(self._network_service_unit_file), "Service unit file should be there")
         self.assertTrue(os.path.exists(self._binary_file), "Binary file should be there")
@@ -200,7 +192,6 @@ def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen=
 
         self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
         self.__assert_systemctl_called(cmd="enable", validate_command_called=True)
-        self.__assert_systemctl_reloaded(validate_command_called=True)
         self.__assert_firewall_cmd_running_called(validate_command_called=True)
         self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False)
         self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False)
@@ -234,7 +225,6 @@ def test_it_should_skip_setup_if_agent_network_setup_service_already_enabled_and
 
             self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True)
             self.__assert_systemctl_called(cmd="enable", validate_command_called=False)
-            self.__assert_systemctl_reloaded(validate_command_called=False)
             self.__assert_firewall_cmd_running_called(validate_command_called=True)
             self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False)
             self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False)
@@ -396,7 +386,6 @@ def test_it_should_delete_custom_service_files_if_firewalld_enabled(self):
             self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True)
             self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=False)
             self.__assert_systemctl_called(cmd="enable", validate_command_called=False)
-            self.__assert_systemctl_reloaded(validate_command_called=False)
             self.assertFalse(os.path.exists(handler.get_service_file_path()), "Service unit file found")
             self.assertFalse(os.path.exists(os.path.join(conf.get_lib_dir(), handler.BINARY_FILE_NAME)), "Binary file found")
 
diff --git a/tests/lib/mock_command.py b/tests/lib/mock_command.py
index e181d26d9..83509c3d3 100755
--- a/tests/lib/mock_command.py
+++ b/tests/lib/mock_command.py
@@ -2,12 +2,18 @@
 import os
 import sys
 
-if len(sys.argv) != 4:
+if len(sys.argv) < 4:
     sys.stderr.write("usage: {0} <stdout> <return_value> <stderr>".format(os.path.basename(__file__)))
 
 # W0632: Possible unbalanced tuple unpacking with sequence: left side has 3 label(s), right side has 0 value(s) (unbalanced-tuple-unpacking)
 # Disabled: Unpacking is balanced: there is a check for the length on line 5
-stdout, return_value, stderr = sys.argv[1:]  # pylint: disable=W0632
+
+# This script will be used for mocking cgroups commands in test, when popen called this script will be executed instead of actual commands
+# We pass stdout, return_value, stderr of the mocked command output as arguments to this script and this script will print them to stdout, stderr and exit with the return value
+# So that popen gets the output of the mocked command. Ideally we should get 4 arguments in sys.argv, first one is the script name, next 3 are the actual command output
+# But somehow when we run the tests from pycharm, it adds extra arguments next to the script name, so we need to handle that when reading the arguments
+# ex: /home/nag/Documents/repos/WALinuxAgent/tests/lib/mock_command.py /snap/pycharm-professional/412/plugins/python-ce/helpers/py... +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid\n 0
+stdout, return_value, stderr = sys.argv[-3:]  # pylint: disable=W0632
 
 if stdout != '':
     sys.stdout.write(stdout)
diff --git a/tests/lib/mock_environment.py b/tests/lib/mock_environment.py
index 8f5682cf8..5b7209358 100644
--- a/tests/lib/mock_environment.py
+++ b/tests/lib/mock_environment.py
@@ -76,12 +76,14 @@ def __init__(self, tmp_dir, commands=None, paths=None, files=None, data_files=No
         self._original_popen = subprocess.Popen
         self._original_mkdir = fileutil.mkdir
         self._original_path_exists = os.path.exists
+        self._original_os_remove = os.remove
         self._original_open = open
 
         self.patchers = [
             patch_builtin("open", side_effect=self._mock_open),
             patch("subprocess.Popen", side_effect=self._mock_popen),
             patch("os.path.exists", side_effect=self._mock_path_exists),
+            patch("os.remove", side_effect=self._mock_os_remove),
             patch("azurelinuxagent.common.utils.fileutil.mkdir", side_effect=self._mock_mkdir)
         ]
 
@@ -166,3 +168,6 @@ def _mock_open(self, path, *args, **kwargs):
     def _mock_path_exists(self, path):
         return self._original_path_exists(self.get_mapped_path(path))
 
+    def _mock_os_remove(self, path):
+        return self._original_os_remove(self.get_mapped_path(path))
+