From 9390ce579f2dd2307371eefb5c0f46b1bb17f013 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 11 Apr 2023 10:26:51 -0700 Subject: [PATCH 001/240] Add support for Azure Clouds (#2795) * Add support for Azure Clouds --------- Co-authored-by: narrieta --- test-requirements.txt | 1 + .../orchestrator/lib/agent_test_loader.py | 15 ++- .../orchestrator/lib/agent_test_suite.py | 7 +- .../lib/agent_test_suite_combinator.py | 61 ++++++++++--- tests_e2e/orchestrator/runbook.yml | 23 +++-- .../sample_runbooks/existing_vm.yml | 22 ++++- tests_e2e/pipeline/pipeline-cleanup.yml | 91 ++++++++++--------- tests_e2e/pipeline/pipeline.yml | 26 ++++-- tests_e2e/test_suites/images.yml | 60 +++++++++--- tests_e2e/tests/lib/agent_test_context.py | 6 +- tests_e2e/tests/lib/azure_clouds.py | 24 +++++ tests_e2e/tests/lib/identifiers.py | 5 +- tests_e2e/tests/lib/virtual_machine.py | 16 +++- tests_e2e/tests/lib/vm_extension.py | 10 +- 14 files changed, 261 insertions(+), 106 deletions(-) create mode 100644 tests_e2e/tests/lib/azure_clouds.py diff --git a/test-requirements.txt b/test-requirements.txt index 3c54ab9974..6b8a78bd03 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -19,3 +19,4 @@ azure-core azure-identity azure-mgmt-compute>=22.1.0 azure-mgmt-resource>=15.0.0 +msrestazure diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index a0f0bfaaf1..f1a2dfc9d2 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -48,7 +48,7 @@ class VmImageInfo(object): # The URN of the image (publisher, offer, version separated by spaces) urn: str # Indicates that the image is available only on those locations. If empty, the image should be available in all locations - locations: List[str] + locations: Dict[str, List[str]] # Indicates that the image is available only for those VM sizes. If empty, the image should be available for all VM sizes vm_sizes: List[str] @@ -109,8 +109,9 @@ def _validate(self): if suite.location != '': for suite_image in suite.images: for image in self.images[suite_image]: - if len(image.locations) > 0: - if suite.location not in image.locations: + # If the image has a location restriction, validate that it is available on the location the suite must run on + if image.locations: + if not any(suite.location in l for l in image.locations.values()): raise Exception(f"Test suite {suite.name} must be executed in {suite.location}, but <{image.urn}> is not available in that location") @staticmethod @@ -223,14 +224,18 @@ def _load_images() -> Dict[str, List[VmImageInfo]]: i = VmImageInfo() if isinstance(description, str): i.urn = description - i.locations = [] + i.locations = {} i.vm_sizes = [] else: if "urn" not in description: raise Exception(f"Image {name} is missing the 'urn' property: {description}") i.urn = description["urn"] - i.locations = description["locations"] if "locations" in description else [] + i.locations = description["locations"] if "locations" in description else {} i.vm_sizes = description["vm_sizes"] if "vm_sizes" in description else [] + for cloud in i.locations.keys(): + if cloud not in ["AzureCloud", "AzureChinaCloud", "AzureUSGovernment"]: + raise Exception(f"Invalid cloud {cloud} for image {name} in images.yml") + images[name] = [i] # now load the image-sets, mapping them to the images that we just computed diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 0c95daf60f..847ace7560 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -40,8 +40,7 @@ ) from lisa.environment import EnvironmentStatus # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 -from lisa.sut_orchestrator import AZURE # pylint: disable=E0401 -from lisa.sut_orchestrator.azure.common import get_node_context, AzureNodeSchema # pylint: disable=E0401 +from lisa.sut_orchestrator.azure.common import get_node_context # pylint: disable=E0401 import makepkg from azurelinuxagent.common.version import AGENT_VERSION @@ -133,11 +132,11 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: def _initialize(self, node: Node, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): connection_info = node.connection_info node_context = get_node_context(node) - runbook = node.capability.get_extended_runbook(AzureNodeSchema, AZURE) self.__context = self._Context( vm=VmIdentifier( - location=runbook.location, + cloud=self._get_required_parameter(variables, "c_cloud"), + location=self._get_required_parameter(variables, "c_location"), subscription=node.features._platform.subscription_id, resource_group=node_context.resource_group_name, name=node_context.vm_name), diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 28fca0fad6..423e542904 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -98,9 +98,21 @@ def _next(self) -> Optional[Dict[str, Any]]: return result _DEFAULT_LOCATIONS = { - "china": "china north 2", - "government": "usgovarizona", - "public": "westus2" + "AzureCloud": "westus2", + "AzureChinaCloud": "chinanorth2", + "AzureUSGovernment": "usgovarizona", + } + + _MARKETPLACE_IMAGE_INFORMATION_LOCATIONS = { + "AzureCloud": "", # empty indicates the default location used by LISA + "AzureChinaCloud": "chinanorth2", + "AzureUSGovernment": "usgovarizona", + } + + _SHARED_RESOURCE_GROUP_LOCATIONS = { + "AzureCloud": "", # empty indicates the default location used by LISA + "AzureChinaCloud": "chinanorth2", + "AzureUSGovernment": "usgovarizona", } def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: @@ -178,15 +190,23 @@ def create_environment_list(self) -> List[Dict[str, Any]]: raise Exception(f"Invalid URN: {image.urn}") name = f"{match.group('offer')}-{match.group('sku')}" - # If the runbook specified a location, use it. Then try the suite location, if any. Otherwise, check if the image specifies - # a list of locations and use any of them. If no location is specified so far, use the default. + location: str = None + # If the runbook specified a location, use it. if self.runbook.location != "": location = self.runbook.location + # Then try the suite location, if any. elif suite_info.location != '': location = suite_info.location - elif len(image.locations) > 0: - location = image.locations[0] - else: + # If the image has a location restriction, use any location where it is available. + # However, if it is not available on any location, skip the image. + elif image.locations: + image_locations = image.locations.get(self.runbook.cloud) + if image_locations is not None: + if len(image_locations) == 0: + continue + location = image_locations[0] + # If no location has been selected, use the default. + if location is None: location = AgentTestSuitesCombinator._DEFAULT_LOCATIONS[self.runbook.cloud] # If the runbook specified a VM size, use it. Else if the image specifies a list of VM sizes, use any of them. Otherwise, @@ -202,11 +222,14 @@ def create_environment_list(self) -> List[Dict[str, Any]]: # create an environment for exclusive use by this suite environment_list.append({ "c_marketplace_image": marketplace_image, + "c_cloud": self.runbook.cloud, "c_location": location, "c_vm_size": vm_size, "c_vhd": vhd, "c_test_suites": [suite_info], - "c_env_name": f"{name}-{suite_info.name}" + "c_env_name": f"{name}-{suite_info.name}", + "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], + "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud] }) else: # add this suite to the shared environments @@ -216,27 +239,35 @@ def create_environment_list(self) -> List[Dict[str, Any]]: else: shared_environments[key] = { "c_marketplace_image": marketplace_image, + "c_cloud": self.runbook.cloud, "c_location": location, "c_vm_size": vm_size, "c_vhd": vhd, "c_test_suites": [suite_info], - "c_env_name": key + "c_env_name": key, + "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], + "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud] } environment_list.extend(shared_environments.values()) + if len(environment_list) == 0: + raise Exception("No VM images were found to execute the test suites.") + log: logging.Logger = logging.getLogger("lisa") - log.info("******** Environments *****") - for e in environment_list: - log.info( - "{ c_marketplace_image: '%s', c_location: '%s', c_vm_size: '%s', c_vhd: '%s', c_test_suites: '%s', c_env_name: '%s' }", - e['c_marketplace_image'], e['c_location'], e['c_vm_size'], e['c_vhd'], [s.name for s in e['c_test_suites']], e['c_env_name']) + log.info("") + log.info("******** Agent Test Environments *****") + for environment in environment_list: + test_suites = [s.name for s in environment['c_test_suites']] + log.info("Settings for %s:\n%s\n", environment['c_env_name'], '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else test_suites}" for name, value in environment.items()])) log.info("***************************") + log.info("") return environment_list _URN = re.compile(r"(?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)") + @staticmethod def _is_urn(urn: str) -> bool: # URNs can be given as ' ' or ':::' diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 8075725eb0..8180be732c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: - name: test_suites value: "agent_bvt" - name: cloud - value: "public" + value: "AzureCloud" - name: image value: "" - name: location @@ -64,21 +64,26 @@ variable: # prefixed with "c_" to distinguish them from the rest of the variables, whose value can be set from # the command line. # - # c_marketplace_image, c_vm_size, c_location, and c_vhd are handled by LISA and define - # the set of test VMs that need to be created, while c_test_suites and c_env_name are parameters - # for the AgentTestSuite; the former defines the test suites that must be executed on each - # of those test VMs and the latter is the name of the environment, which is used for logging - # purposes (NOTE: the AgentTestSuite also uses c_vhd). + # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be + # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. # - name: c_env_name value: "" is_case_visible: true - name: c_marketplace_image value: "" + - name: c_marketplace_image_information_location + value: "" + - name: c_shared_resource_group_location + value: "" - name: c_vm_size value: "" + - name: c_cloud + value: "" + is_case_visible: true - name: c_location value: "" + is_case_visible: true - name: c_vhd value: "" is_case_visible: true @@ -107,6 +112,12 @@ platform: keep_environment: $(keep_environment) azure: deploy: True +# +# TODO: Enable these parameters once LISA supports all Azure clouds +# +# cloud: $(cloud) +# marketplace_image_information_location: $(c_marketplace_image_information_location) +# shared_resource_group_location: $(c_shared_resource_group_location) subscription_id: $(subscription_id) wait_delete: false requirement: diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml index 2a5109f41f..bbfcf8a7f9 100644 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml @@ -32,7 +32,7 @@ variable: # These variables identify the existing VM, and the user for SSH connections # - name: cloud - value: "public" + value: "AzureCloud" - name: subscription_id value: "" - name: resource_group_name @@ -80,18 +80,24 @@ variable: # prefixed with "c_" to distinguish them from the rest of the variables, whose value can be set from # the command line. # - # c_marketplace_image, c_vm_size, c_location, and c_vhd are handled by LISA and define - # the set of test VMs that need to be created, while c_test_suites is a parameter - # for the AgentTestSuite and defines the test suites that must be executed on each - # of those test VMs (the AgentTestSuite also uses c_vhd) + # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be + # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. # - name: c_env_name value: "" is_case_visible: true - name: c_vm_name value: "" + - name: c_marketplace_image_information_location + value: "" + - name: c_shared_resource_group_location + value: "" + - name: c_cloud + value: "" + is_case_visible: true - name: c_location value: "" + is_case_visible: true - name: c_test_suites value: [] is_case_visible: true @@ -114,6 +120,12 @@ platform: admin_username: $(user) admin_private_key_file: $(identity_file) azure: +# +# TODO: Enable these parameters once LISA supports all Azure clouds +# +# cloud: $(cloud) +# marketplace_image_information_location: $(c_marketplace_image_information_location) +# shared_resource_group_location: $(c_shared_resource_group_location) resource_group_name: $(resource_group_name) deploy: false subscription_id: $(subscription_id) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index ba880a4f4f..b82ad53eea 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -1,58 +1,59 @@ # # Pipeline for cleaning up any remaining Resource Groups generated by the Azure.WALinuxAgent pipeline. # -# Deletes any resource groups that are more than a day old and contain string "lisa-WALinuxAgent-" +# Deletes any resource groups that are older than 'older_than' and match the 'name_pattern' regular expression # -schedules: - - cron: "0 */12 * * *" # Run twice a day (every 12 hours) - displayName: cleanup build - branches: - include: - - develop - always: true -trigger: - - develop +parameters: + - name: name_pattern + displayName: Regular expression to match the name of the resource groups to delete + type: string + default: lisa-WALinuxAgent-.* -pr: none + - name: older_than + displayName: Delete resources older than (use the syntax of the "date -d" command) + type: string + default: 1 day ago + + - name: service_connections + type: object + default: + - azuremanagement +# +# TODO: Enable these services connections once we create test pipelines for all Azure clouds +# +# - azuremanagement.china +# - azuremanagement.government pool: vmImage: ubuntu-latest -variables: - - name: azureConnection - value: 'azuremanagement' - - name: rgPrefix - value: 'lisa-WALinuxAgent-' - steps: + - ${{ each service_connection in parameters.service_connections }}: + - task: AzureCLI@2 + inputs: + azureSubscription: ${{ service_connection }} + scriptType: 'bash' + scriptLocation: 'inlineScript' + inlineScript: | + set -euxo pipefail - - task: AzureKeyVault@2 - displayName: "Fetch secrets from KV" - inputs: - azureSubscription: '$(azureConnection)' - KeyVaultName: 'dcrV2SPs' - SecretsFilter: '*' - RunAsPreJob: true + # + # We use the REST API to list the resource groups because we need the createdTime and that + # property is not available via the az-cli commands. + # + subscription_id=$(az account list --all --query "[?isDefault].id" -o tsv) + + date=$(date --utc +%Y-%m-%d'T'%H:%M:%S.%N'Z' -d "${{ parameters.older_than }}") - - task: AzureCLI@2 - inputs: - azureSubscription: '$(azureConnection)' - scriptType: 'bash' - scriptLocation: 'inlineScript' - inlineScript: | - set -euxo pipefail - date=`date --utc +%Y-%m-%d'T'%H:%M:%S.%N'Z' -d "1 day ago"` - - # Using the Azure REST GET resourceGroups API call as we can add the createdTime to the results. - # This feature is not available via the az-cli commands directly so we have to use the Azure REST APIs - - az rest --method GET \ - --url "https://management.azure.com/subscriptions/$(SUBSCRIPTION-ID)/resourcegroups" \ - --url-parameters api-version=2021-04-01 \$expand=createdTime \ - --output json \ - --query value \ - | jq --arg date "$date" '.[] | select (.createdTime < $date).name' \ - | grep "$(rgPrefix)" \ - | xargs -l -t -r az group delete --no-wait -y -n \ - || echo "No resource groups found to delete" + rest_endpoint=$(az cloud show --query "endpoints.resourceManager" -o tsv) + + az rest --method GET \ + --url "${rest_endpoint}/subscriptions/${subscription_id}/resourcegroups" \ + --url-parameters api-version=2021-04-01 \$expand=createdTime \ + --output json \ + --query value \ + | jq --arg date "$date" '.[] | select (.createdTime < $date).name' \ + | grep '${{ parameters.name_pattern }}' \ + | xargs -l -t -r az group delete --no-wait -y -n \ + || echo "No resource groups found to delete" diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 1de5416340..b0f9ebaa18 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -50,11 +50,6 @@ parameters: - failed - no -trigger: - - develop - -pr: none - pool: vmImage: ubuntu-latest @@ -69,9 +64,24 @@ jobs: addToPath: true architecture: 'x64' - # Extract the Azure cloud from the "connection_info" variable and store it in the "cloud" variable. - # The cloud name is used as a suffix of the value for "connection_info" and comes after the last '-'. - - bash: echo "##vso[task.setvariable variable=cloud]$(echo $CONNECTION_INFO | sed 's/^.*-//')" + # Extract the Azure cloud from the "connection_info" variable. Its value includes one of + # 'public', 'china', or 'government' as a suffix (the suffix comes after the last '-'). + - bash: | + case $(echo $CONNECTION_INFO | sed 's/^.*-//') in + public) + echo "##vso[task.setvariable variable=cloud]AzureCloud" + ;; + china) + echo "##vso[task.setvariable variable=cloud]AzureChinaCloud" + ;; + government) + echo "##vso[task.setvariable variable=cloud]AzureUSGovernment" + ;; + *) + echo "Invalid CONNECTION_INFO: $CONNECTION_INFO" >&2 + exit 1 + ;; + esac displayName: "Set Cloud type" - task: DownloadSecureFile@1 diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 253f8a1389..6fef5314dd 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -47,7 +47,7 @@ image-sets: # mariner_2_arm64: # urn: "microsoftcblmariner cbl-mariner cbl-mariner-2-arm64 latest" # locations: -# - "eastus" +# - AzureCloud: ["eastus"] # vm_sizes: # - "Standard_D2pls_v5" # @@ -55,38 +55,76 @@ image-sets: # two properties can be used to specify that the image is available only in # some locations, or that it can be used only on some VM sizes. # +# The 'locations' property consists of 3 items, one for each cloud (AzureCloud, +# AzureUSGovernment and AzureChinaCloud). For each of these items: +# +# - If the item is not present, the image is available in all locations for that cloud. +# - If the value is a list of locations, the image is available only in those locations +# - If the value is an empty list, the image is not available in that cloud. +# # URNs follow the format ' ' or # ':::' # images: - alma_9: "almalinux almalinux 9-gen2 latest" + alma_9: + urn: "almalinux almalinux 9-gen2 latest" + locations: + AzureChinaCloud: [] centos_610: "OpenLogic CentOS 6.10 latest" centos_79: "OpenLogic CentOS 7_9 latest" debian_8: "credativ Debian 8 latest" debian_9: "credativ Debian 9 latest" debian_10: "Debian debian-10 10 latest" debian_11: "Debian debian-11 11 latest" - debian_11_arm64: "Debian debian-11 11-backports-arm64 latest" - flatcar: "kinvolk flatcar-container-linux-free stable latest" + debian_11_arm64: + urn: "Debian debian-11 11-backports-arm64 latest" + locations: + AzureUSGovernment: [] + flatcar: + urn: "kinvolk flatcar-container-linux-free stable latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] flatcar_arm64: urn: "kinvolk flatcar-container-linux-corevm stable latest" + locations: + AzureChinaCloud: [] vm_sizes: - "Standard_D2pls_v5" - mariner_1: "microsoftcblmariner cbl-mariner cbl-mariner-1 latest" + mariner_1: + urn: "microsoftcblmariner cbl-mariner cbl-mariner-1 latest" + locations: + AzureChinaCloud: [] mariner_2: "microsoftcblmariner cbl-mariner cbl-mariner-2 latest" mariner_2_arm64: urn: "microsoftcblmariner cbl-mariner cbl-mariner-2-arm64 latest" locations: - - "eastus" + AzureCloud: ["eastus"] vm_sizes: - "Standard_D2pls_v5" - rocky_9: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" + rocky_9: + urn: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] suse_12: "SUSE sles-12-sp5-basic gen1 latest" suse_15: "SUSE sles-15-sp2-basic gen2 latest" - rhel_79: "RedHat RHEL 7_9 latest" - rhel_82: "RedHat RHEL 8.2 latest" - rhel_90: "RedHat RHEL 9_0 latest" - rhel_90_arm64: "RedHat rhel-arm64 9_0-arm64 latest" + rhel_79: + urn: "RedHat RHEL 7_9 latest" + locations: + AzureChinaCloud: [] + rhel_82: + urn: "RedHat RHEL 8.2 latest" + locations: + AzureChinaCloud: [] + rhel_90: + urn: "RedHat RHEL 9_0 latest" + locations: + AzureChinaCloud: [] + rhel_90_arm64: + urn: "RedHat rhel-arm64 9_0-arm64 latest" + locations: + AzureChinaCloud: [] ubuntu_1604: "Canonical UbuntuServer 16.04-LTS latest" ubuntu_1804: "Canonical UbuntuServer 18.04-LTS latest" ubuntu_2004: "Canonical 0001-com-ubuntu-server-focal 20_04-lts latest" diff --git a/tests_e2e/tests/lib/agent_test_context.py b/tests_e2e/tests/lib/agent_test_context.py index ca9fc64ad3..28d663f8ae 100644 --- a/tests_e2e/tests/lib/agent_test_context.py +++ b/tests_e2e/tests/lib/agent_test_context.py @@ -127,6 +127,7 @@ def from_args(): Creates an AgentTestContext from the command line arguments. """ parser = argparse.ArgumentParser() + parser.add_argument('-c', '--cloud', dest="cloud", required=False, choices=['AzureCloud', 'AzureChinaCloud', 'AzureUSGovernment'], default="AzureCloud") parser.add_argument('-g', '--group', required=True) parser.add_argument('-l', '--location', required=True) parser.add_argument('-s', '--subscription', required=True) @@ -138,7 +139,7 @@ def from_args(): parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) - parser.add_argument('-k', '--private-key-file', dest="private_key_file", required=False, default=Path.home()/".ssh"/"id_rsa") + parser.add_argument('-k', '--private-key-file', dest="private_key_file", required=False, default=str(Path.home()/".ssh"/"id_rsa")) parser.add_argument('-p', '--ssh-port', dest="ssh_port", required=False, default=AgentTestContext.Connection.DEFAULT_SSH_PORT) args = parser.parse_args() @@ -149,12 +150,13 @@ def from_args(): return AgentTestContext( vm=VmIdentifier( + cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vm), paths=AgentTestContext.Paths( - working_directory=working_directory, + working_directory=Path(working_directory), remote_working_directory=Path(args.remote_working_directory), test_source_directory=Path(args.test_source_directory)), connection=AgentTestContext.Connection( diff --git a/tests_e2e/tests/lib/azure_clouds.py b/tests_e2e/tests/lib/azure_clouds.py new file mode 100644 index 0000000000..2e1f5674e0 --- /dev/null +++ b/tests_e2e/tests/lib/azure_clouds.py @@ -0,0 +1,24 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from typing import Dict +from msrestazure.azure_cloud import Cloud, AZURE_PUBLIC_CLOUD, AZURE_CHINA_CLOUD, AZURE_US_GOV_CLOUD + +AZURE_CLOUDS: Dict[str, Cloud] = { + "AzureCloud": AZURE_PUBLIC_CLOUD, + "AzureChinaCloud": AZURE_CHINA_CLOUD, + "AzureUSGovernment": AZURE_US_GOV_CLOUD +} diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 48794140b3..398ffd61cb 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -17,10 +17,11 @@ class VmIdentifier(object): - def __init__(self, location, subscription, resource_group, name): + def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): """ Represents the information that identifies a VM to the ARM APIs """ + self.cloud: str = cloud self.location = location self.subscription: str = subscription self.resource_group: str = resource_group @@ -31,7 +32,7 @@ def __str__(self): class VmExtensionIdentifier(object): - def __init__(self, publisher, ext_type, version): + def __init__(self, publisher: str, ext_type: str, version: str): """ Represents the information that identifies an extension to the ARM APIs diff --git a/tests_e2e/tests/lib/virtual_machine.py b/tests_e2e/tests/lib/virtual_machine.py index 032a7e0f54..79b86a6f3b 100644 --- a/tests_e2e/tests/lib/virtual_machine.py +++ b/tests_e2e/tests/lib/virtual_machine.py @@ -28,7 +28,9 @@ from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineInstanceView, VirtualMachineScaleSetInstanceView from azure.mgmt.resource import ResourceManagementClient +from msrestazure.azure_cloud import Cloud +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.identifiers import VmIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry @@ -43,8 +45,18 @@ class VirtualMachineBaseClass(ABC): def __init__(self, vm: VmIdentifier): super().__init__() self._identifier: VmIdentifier = vm - self._compute_client = ComputeManagementClient(credential=DefaultAzureCredential(), subscription_id=vm.subscription) - self._resource_client = ResourceManagementClient(credential=DefaultAzureCredential(), subscription_id=vm.subscription) + cloud: Cloud = AZURE_CLOUDS[vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + self._compute_client = ComputeManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self._resource_client = ResourceManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) @abstractmethod def get_instance_view(self) -> Any: # Returns VirtualMachineInstanceView or VirtualMachineScaleSetInstanceView diff --git a/tests_e2e/tests/lib/vm_extension.py b/tests_e2e/tests/lib/vm_extension.py index eab676e75a..abefcc723f 100644 --- a/tests_e2e/tests/lib/vm_extension.py +++ b/tests_e2e/tests/lib/vm_extension.py @@ -30,7 +30,9 @@ from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineExtensionInstanceView from azure.identity import DefaultAzureCredential +from msrestazure.azure_cloud import Cloud +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.identifiers import VmIdentifier, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry @@ -51,7 +53,13 @@ def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_ self._vm: VmIdentifier = vm self._identifier = extension self._resource_name = resource_name - self._compute_client: ComputeManagementClient = ComputeManagementClient(credential=DefaultAzureCredential(), subscription_id=vm.subscription) + cloud: Cloud = AZURE_CLOUDS[vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + self._compute_client: ComputeManagementClient = ComputeManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) def enable( self, From 7de613305a9ee8f2ef7f35fa916a72b8622bcf51 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 14 Apr 2023 16:20:39 -0700 Subject: [PATCH 002/240] Check certificates only if certificates are included in goal state and update test-requirements to remove codecov (#2803) * Update version to dummy 1.0.0.0' * Revert version change * Only check certificats if goal state includes certs * Fix code coverage deprecated issue * Move condition to function call --- .github/workflows/ci_pr.yml | 2 +- azurelinuxagent/common/protocol/goal_state.py | 4 ++-- test-requirements.txt | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index e5592688c6..4e8b299671 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -123,6 +123,6 @@ jobs: - name: Upload Coverage if: matrix.python-version == 3.9 - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: file: ./coverage.xml \ No newline at end of file diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 6b2a0c2cf8..0980ca9d02 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -290,12 +290,12 @@ def _update(self, force_update): # Track goal state comes after that, the extensions will need the new certificate. The Agent needs to refresh the goal state in that # case, to ensure it fetches the new certificate. # - if self._extensions_goal_state.source == GoalStateSource.FastTrack: + if self._extensions_goal_state.source == GoalStateSource.FastTrack and self._goal_state_properties & GoalStateProperties.Certificates: self._check_certificates() def _check_certificates(self): # Re-download certificates in case they have been removed from disk since last download - if self._goal_state_properties & GoalStateProperties.Certificates and self._certs_uri is not None: + if self._certs_uri is not None: self._download_certificates(self._certs_uri) # Check that certificates needed by extensions are in goal state certs.summary for extension in self.extensions_goal_state.extensions: diff --git a/test-requirements.txt b/test-requirements.txt index 6b8a78bd03..3576621706 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,3 @@ -codecov coverage mock==2.0.0; python_version == '2.6' mock==3.0.5; python_version >= '2.7' and python_version <= '3.5' From cb566561a7b4750fc94afd2295ea4b3d8d975dbe Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 18 Apr 2023 13:17:36 -0700 Subject: [PATCH 003/240] Add tests for no outbound connectivity (#2804) * Add tests for no outbound connectivity --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_loader.py | 63 +++++++++--- .../orchestrator/lib/agent_test_suite.py | 55 ++++++----- .../lib/agent_test_suite_combinator.py | 51 +++++----- .../lib/update_arm_template_hook.py | 67 +++++++++++++ tests_e2e/orchestrator/runbook.yml | 7 ++ .../sample_runbooks/existing_vm.yml | 11 +-- tests_e2e/pipeline/pipeline.yml | 2 +- .../test_suites/no_outbound_connections.yml | 21 ++++ tests_e2e/tests/bvts/run_command.py | 5 +- tests_e2e/tests/bvts/vm_access.py | 2 +- tests_e2e/tests/lib/agent_test_context.py | 11 +++ tests_e2e/tests/lib/retry.py | 2 +- tests_e2e/tests/lib/shell.py | 2 +- tests_e2e/tests/lib/ssh_client.py | 2 +- .../check_fallback_to_hgap.py | 51 ++++++++++ .../check_no_outbound_connections.py | 59 ++++++++++++ .../no_outbound_connections/nsg_template.py | 95 +++++++++++++++++++ 17 files changed, 432 insertions(+), 74 deletions(-) create mode 100644 tests_e2e/orchestrator/lib/update_arm_template_hook.py create mode 100644 tests_e2e/test_suites/no_outbound_connections.yml create mode 100755 tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py create mode 100755 tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py create mode 100755 tests_e2e/tests/no_outbound_connections/nsg_template.py diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index f1a2dfc9d2..201d398131 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -25,6 +25,23 @@ from tests_e2e.tests.lib.agent_test import AgentTest +class TestInfo(object): + """ + Description of a test + """ + # The class that implements the test + test_class: Type[AgentTest] + # If True, an error in the test blocks the execution of the test suite (defaults to False) + blocks_suite: bool + + @property + def name(self) -> str: + return self.test_class.__name__ + + def __str__(self): + return self.name + + class TestSuiteInfo(object): """ Description of a test suite @@ -32,13 +49,15 @@ class TestSuiteInfo(object): # The name of the test suite name: str # The tests that comprise the suite - tests: List[Type[AgentTest]] + tests: List[TestInfo] # Images or image sets (as defined in images.yml) on which the suite must run. images: List[str] # The location (region) on which the suite must run; if empty, the suite can run on any location location: str # Whether this suite must run on its own test VM owns_vm: bool + # Customization for the ARM template used when creating the test VM + template: str def __str__(self): return self.name @@ -139,7 +158,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: """ Loads the description of a TestSuite from its YAML file. - A test suite has 5 properties: name, tests, images, location, and owns-vm. For example: + A test suite has 5 properties: name, tests, images, location, and owns_vm. For example: name: "AgentBvt" tests: @@ -148,18 +167,22 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: - "bvts/vm_access.py" images: "endorsed" location: "eastuseaup" - owns-vm: true + owns_vm: true * name - A string used to identify the test suite - * tests - A list of the tests in the suite. Each test is specified by the path for its source code relative to - WALinuxAgent/tests_e2e/tests. + * tests - A list of the tests in the suite. Each test can be specified by a string (the path for its source code relative to + WALinuxAgent/tests_e2e/tests), or a dictionary with two items: + * source: the path for its source code relative to WALinuxAgent/tests_e2e/tests + * blocks_suite: [Optional; boolean] If True, a failure on the test will stop execution of the test suite (i.e. the + rest of the tests in the suite will not be executed). By default, a failure on a test does not stop execution of + the test suite. * images - A string, or a list of strings, specifying the images on which the test suite must be executed. Each value can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed"). The names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml. * location - [Optional; string] If given, the test suite must be executed on that location. If not specified, or set to an empty string, the test suite will be executed in the default location. This is useful for test suites that exercise a feature that is enabled only in certain regions. - * owns-vm - [Optional; boolean] By default all suites in a test run are executed on the same test VMs; if this + * owns_vm - [Optional; boolean] By default all suites in a test run are executed on the same test VMs; if this value is set to True, new test VMs will be created and will be used exclusively for this test suite. This is useful for suites that modify the test VMs in such a way that the setup may cause problems in other test suites (for example, some tests targeted to the HGAP block internet access in order to @@ -176,9 +199,15 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: test_suite_info.name = test_suite["name"] test_suite_info.tests = [] - source_files = [AgentTestLoader._SOURCE_CODE_ROOT/"tests"/t for t in test_suite["tests"]] - for f in source_files: - test_suite_info.tests.extend(AgentTestLoader._load_test_classes(f)) + for test in test_suite["tests"]: + test_info = TestInfo() + if isinstance(test, str): + test_info.test_class = AgentTestLoader._load_test_class(test) + test_info.blocks_suite = False + else: + test_info.test_class = AgentTestLoader._load_test_class(test["source"]) + test_info.blocks_suite = test.get("blocks_suite", False) + test_suite_info.tests.append(test_info) images = test_suite["images"] if isinstance(images, str): @@ -190,20 +219,26 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: if test_suite_info.location is None: test_suite_info.location = "" - test_suite_info.owns_vm = "owns-vm" in test_suite and test_suite["owns-vm"] + test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] + + test_suite_info.template = test_suite.get("template", "") return test_suite_info @staticmethod - def _load_test_classes(source_file: Path) -> List[Type[AgentTest]]: + def _load_test_class(relative_path: str) -> Type[AgentTest]: """ - Takes a 'source_file', which must be a Python module, and returns a list of all the classes derived from AgentTest. + Loads an AgentTest from its source code file, which is given as a path relative to WALinuxAgent/tests_e2e/tests. """ - spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.{source_file.name}", str(source_file)) + full_path: Path = AgentTestLoader._SOURCE_CODE_ROOT/"tests"/relative_path + spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.{relative_path.replace('/', '.').replace('.py', '')}", str(full_path)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) # return all the classes in the module that are subclasses of AgentTest but are not AgentTest itself. - return [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentTest] + matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentTest] + if len(matches) != 1: + raise Exception(f"Error in {full_path} (each test file must contain exactly one class derived from AgentTest)") + return matches[0] @staticmethod def _load_images() -> Dict[str, List[VmImageInfo]]: diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 847ace7560..373ad826a4 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -459,6 +459,8 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: with _set_thread_name(suite_full_name): # The thread name is added to the LISA log log_path: Path = self.context.log_path/f"{suite_full_name}.log" with set_current_thread_log(log_path): + suite_success: bool = True + try: log.info("") log.info("**************************************** %s ****************************************", suite_name) @@ -467,54 +469,54 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: summary: List[str] = [] for test in suite.tests: - test_name = test.__name__ - test_full_name = f"{suite_name}-{test_name}" + test_full_name = f"{suite_name}-{test.name}" test_start_time: datetime.datetime = datetime.datetime.now() - log.info("******** Executing %s", test_name) + log.info("******** Executing %s", test.name) self.context.lisa_log.info("Executing test %s", test_full_name) - try: + test_success: bool = True - test(self.context).run() + try: + test.test_class(self.context).run() - summary.append(f"[Passed] {test_name}") - log.info("******** [Passed] %s", test_name) + summary.append(f"[Passed] {test.name}") + log.info("******** [Passed] %s", test.name) self.context.lisa_log.info("[Passed] %s", test_full_name) self._report_test_result( suite_full_name, - test_name, + test.name, TestStatus.PASSED, test_start_time) except TestSkipped as e: - summary.append(f"[Skipped] {test_name}") - log.info("******** [Skipped] %s: %s", test_name, e) + summary.append(f"[Skipped] {test.name}") + log.info("******** [Skipped] %s: %s", test.name, e) self.context.lisa_log.info("******** [Skipped] %s", test_full_name) self._report_test_result( suite_full_name, - test_name, + test.name, TestStatus.SKIPPED, test_start_time, message=str(e)) except AssertionError as e: - success = False - summary.append(f"[Failed] {test_name}") - log.error("******** [Failed] %s: %s", test_name, e) + test_success = False + summary.append(f"[Failed] {test.name}") + log.error("******** [Failed] %s: %s", test.name, e) self.context.lisa_log.error("******** [Failed] %s", test_full_name) self._report_test_result( suite_full_name, - test_name, + test.name, TestStatus.FAILED, test_start_time, message=str(e)) except: # pylint: disable=bare-except - success = False - summary.append(f"[Error] {test_name}") - log.exception("UNHANDLED EXCEPTION IN %s", test_name) + test_success = False + summary.append(f"[Error] {test.name}") + log.exception("UNHANDLED EXCEPTION IN %s", test.name) self.context.lisa_log.exception("UNHANDLED EXCEPTION IN %s", test_full_name) self._report_test_result( suite_full_name, - test_name, + test.name, TestStatus.FAILED, test_start_time, message="Unhandled exception.", @@ -522,14 +524,21 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: log.info("") - log.info("********* [Test Results]") + suite_success = suite_success and test_success + + if not test_success and test.blocks_suite: + log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) + break + + log.info("") + log.info("******** [Test Results]") log.info("") for r in summary: log.info("\t%s", r) log.info("") except: # pylint: disable=bare-except - success = False + suite_success = False self._report_test_result( suite_full_name, suite_name, @@ -538,7 +547,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: message=f"Unhandled exception while executing test suite {suite_name}.", add_exception_stack_trace=True) finally: - if not success: + if not suite_success: self._mark_log_as_failed() return success @@ -562,7 +571,7 @@ def _check_agent_log(self) -> bool: # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) for suite in self.context.test_suites: # pylint: disable=E1133 for test in suite.tests: - ignore_error_rules.extend(test(self.context).get_ignore_error_rules()) + ignore_error_rules.extend(test.test_class(self.context).get_ignore_error_rules()) if len(ignore_error_rules) > 0: new = [] diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 423e542904..839f39613d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -218,36 +218,44 @@ def create_environment_list(self) -> List[Dict[str, Any]]: else: vm_size = "" - if suite_info.owns_vm: - # create an environment for exclusive use by this suite - environment_list.append({ + # Note: Disabling "W0640: Cell variable 'foo' defined in loop (cell-var-from-loop)". This is a false positive, the closure is OK + # to use, since create_environment() is called within the same iteration of the loop. + # pylint: disable=W0640 + def create_environment(env_name: str) -> Dict[str, Any]: + tags = {} + if suite_info.template != '': + tags["templates"] = suite_info.template + return { "c_marketplace_image": marketplace_image, "c_cloud": self.runbook.cloud, "c_location": location, "c_vm_size": vm_size, "c_vhd": vhd, "c_test_suites": [suite_info], - "c_env_name": f"{name}-{suite_info.name}", + "c_env_name": env_name, "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], - "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud] - }) + "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], + "c_vm_tags": tags + } + # pylint: enable=W0640 + + if suite_info.owns_vm: + # create an environment for exclusive use by this suite + environment_list.append(create_environment(f"{name}-{suite_info.name}")) else: # add this suite to the shared environments key: str = f"{name}-{location}" - if key in shared_environments: - shared_environments[key]["c_test_suites"].append(suite_info) + environment = shared_environments.get(key) + if environment is not None: + environment["c_test_suites"].append(suite_info) + if suite_info.template != '': + vm_tags = environment["c_vm_tags"] + if "templates" in vm_tags: + vm_tags["templates"] += ", " + suite_info.template + else: + vm_tags["templates"] = suite_info.template else: - shared_environments[key] = { - "c_marketplace_image": marketplace_image, - "c_cloud": self.runbook.cloud, - "c_location": location, - "c_vm_size": vm_size, - "c_vhd": vhd, - "c_test_suites": [suite_info], - "c_env_name": key, - "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], - "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud] - } + shared_environments[key] = create_environment(key) environment_list.extend(shared_environments.values()) @@ -256,18 +264,17 @@ def create_environment_list(self) -> List[Dict[str, Any]]: log: logging.Logger = logging.getLogger("lisa") log.info("") - log.info("******** Agent Test Environments *****") + log.info("******** Waagent: Test Environments *****") + log.info("") for environment in environment_list: test_suites = [s.name for s in environment['c_test_suites']] log.info("Settings for %s:\n%s\n", environment['c_env_name'], '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else test_suites}" for name, value in environment.items()])) - log.info("***************************") log.info("") return environment_list _URN = re.compile(r"(?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)") - @staticmethod def _is_urn(urn: str) -> bool: # URNs can be given as ' ' or ':::' diff --git a/tests_e2e/orchestrator/lib/update_arm_template_hook.py b/tests_e2e/orchestrator/lib/update_arm_template_hook.py new file mode 100644 index 0000000000..c1c94f5522 --- /dev/null +++ b/tests_e2e/orchestrator/lib/update_arm_template_hook.py @@ -0,0 +1,67 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import importlib +import logging +from pathlib import Path +from typing import Any, Callable + +# Disable those warnings, since 'lisa' is an external, non-standard, dependency +# E0401: Unable to import 'lisa.*' (import-error) +# pylint: disable=E0401 +from lisa.environment import Environment +from lisa.util import hookimpl, plugin_manager +from lisa.sut_orchestrator.azure.platform_ import AzurePlatformSchema +# pylint: enable=E0401 + +import tests_e2e + + +class UpdateArmTemplateHook: + """ + This hook allows to customize the ARM template used to create the test VMs (see wiki for details). + """ + @hookimpl + def azure_update_arm_template(self, template: Any, environment: Environment) -> None: + azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema) + vm_tags = azure_runbook.vm_tags + templates = vm_tags.get("templates") + if templates is not None: + log: logging.Logger = logging.getLogger("lisa") + log.info("******** Waagent: Applying custom templates '%s' to environment '%s'", templates, environment.name) + + for t in templates.split(","): + update_arm_template = self._get_update_arm_template(t) + update_arm_template(template) + + _SOURCE_CODE_ROOT: Path = Path(tests_e2e.__path__[0]) + + @staticmethod + def _get_update_arm_template(template_path: str) -> Callable: + source_file: Path = UpdateArmTemplateHook._SOURCE_CODE_ROOT/"tests"/template_path + + spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.templates.{source_file.name}", str(source_file)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + matches = [v for v in module.__dict__.values() if callable(v) and v.__name__ == "update_arm_template"] + if len(matches) != 1: + raise Exception(f"Could not find update_arm_template in {source_file}") + return matches[0] + + +plugin_manager.register(UpdateArmTemplateHook()) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 8180be732c..15c39fff6f 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -67,6 +67,10 @@ variable: # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. # + # 'c_vm_tags' is a special case: it is used by the azure_update_arm_template hook. This hook does not + # have access to the runbook variables, so instead we use a dummy VM tag named "template" to pass the + # name of the custom ARM template that the hook needs to use (see wiki for more details). + # - name: c_env_name value: "" is_case_visible: true @@ -90,6 +94,8 @@ variable: - name: c_test_suites value: [] is_case_visible: true + - name: c_vm_tags + value: {} # # Set these variables to use an SSH proxy when executing the runbook @@ -120,6 +126,7 @@ platform: # shared_resource_group_location: $(c_shared_resource_group_location) subscription_id: $(subscription_id) wait_delete: false + vm_tags: $(c_vm_tags) requirement: core_count: min: 2 diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml index bbfcf8a7f9..f1e480311e 100644 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml @@ -76,12 +76,8 @@ variable: is_case_visible: true # - # The values for these variables are generated by the AgentTestSuitesCombinator combinator. They are - # prefixed with "c_" to distinguish them from the rest of the variables, whose value can be set from - # the command line. - # - # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be - # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. + # The values for these variables are generated by the AgentTestSuitesCombinator. See + # tests_e2e/orchestrator/runbook.yml for details. # - name: c_env_name value: "" @@ -101,6 +97,8 @@ variable: - name: c_test_suites value: [] is_case_visible: true + - name: c_vm_tags + value: {} # # Set these variables to use an SSH proxy when executing the runbook @@ -129,6 +127,7 @@ platform: resource_group_name: $(resource_group_name) deploy: false subscription_id: $(subscription_id) + vm_tags: $(c_vm_tags) requirement: azure: name: $(c_vm_name) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index b0f9ebaa18..255947859f 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt + default: agent_bvt, no_outbound_connections # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition diff --git a/tests_e2e/test_suites/no_outbound_connections.yml b/tests_e2e/test_suites/no_outbound_connections.yml new file mode 100644 index 0000000000..23e3ef1ec2 --- /dev/null +++ b/tests_e2e/test_suites/no_outbound_connections.yml @@ -0,0 +1,21 @@ +# +# This suite is used to test the scenario where outbound connections are blocked on the VM. In this case, +# the agent should fallback to the HostGAPlugin to request any downloads. +# +# The suite uses a custom ARM template to create a VM with a Network Security Group that blocks all outbound +# connections. The first test in the suite verifies that the setup of the NSG was successful, then the rest +# of the tests exercise different extension operations. The last test in the suite checks the agent log +# to verify it did fallback to the HostGAPlugin to execute the extensions. +# +name: "NoOutboundConnections" +tests: + - source: "no_outbound_connections/check_no_outbound_connections.py" + blocks_suite: true # If the NSG is not setup correctly, there is no point in executing the rest of the tests. + - "bvts/extension_operations.py" + - "bvts/run_command.py" + - "bvts/vm_access.py" + - "no_outbound_connections/check_fallback_to_hgap.py" +images: + - "ubuntu_2004" +template: "no_outbound_connections/nsg_template.py" +owns_vm: true diff --git a/tests_e2e/tests/bvts/run_command.py b/tests_e2e/tests/bvts/run_command.py index 188c12d3fa..5dc548583b 100755 --- a/tests_e2e/tests/bvts/run_command.py +++ b/tests_e2e/tests/bvts/run_command.py @@ -45,10 +45,7 @@ def __init__(self, extension: VmExtension, get_settings: Callable[[str], Dict[st self.get_settings = get_settings def run(self): - ssh_client = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + ssh_client: SshClient = self._context.create_ssh_client() test_cases = [ RunCommandBvt.TestCase( diff --git a/tests_e2e/tests/bvts/vm_access.py b/tests_e2e/tests/bvts/vm_access.py index 1af0f99e16..1db3780480 100755 --- a/tests_e2e/tests/bvts/vm_access.py +++ b/tests_e2e/tests/bvts/vm_access.py @@ -38,7 +38,7 @@ class VmAccessBvt(AgentTest): def run(self): - ssh: SshClient = SshClient(ip_address=self._context.vm_ip_address, username=self._context.username, private_key_file=self._context.private_key_file) + ssh: SshClient = self._context.create_ssh_client() if "-flatcar" in ssh.run_command("uname -a"): raise TestSkipped("Currently VMAccess is not supported on Flatcar") diff --git a/tests_e2e/tests/lib/agent_test_context.py b/tests_e2e/tests/lib/agent_test_context.py index 28d663f8ae..e791542894 100644 --- a/tests_e2e/tests/lib/agent_test_context.py +++ b/tests_e2e/tests/lib/agent_test_context.py @@ -21,6 +21,7 @@ import tests_e2e from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.ssh_client import SshClient class AgentTestContext: @@ -121,6 +122,16 @@ def ssh_port(self) -> int: """ return self._connection._ssh_port + def create_ssh_client(self) -> SshClient: + """ + Convenience method to create an SSH client using the connection info from the context. + """ + return SshClient( + ip_address=self.vm_ip_address, + username=self.username, + private_key_file=self.private_key_file, + port=self.ssh_port) + @staticmethod def from_args(): """ diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index bbd327cda3..e399efdda5 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -55,5 +55,5 @@ def retry_ssh_run(operation: Callable[[], Any]) -> Any: # Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member raise - log.warning("The operation failed with %s, retrying in 30 secs.", e) + log.warning("The operation failed, retrying in 30 secs.\n%s", e) time.sleep(30) diff --git a/tests_e2e/tests/lib/shell.py b/tests_e2e/tests/lib/shell.py index a5288439a6..af5b30b80a 100644 --- a/tests_e2e/tests/lib/shell.py +++ b/tests_e2e/tests/lib/shell.py @@ -38,7 +38,7 @@ def __str__(self): def run_command(command: Any, shell=False) -> str: """ This function is a thin wrapper around Popen/communicate in the subprocess module. It executes the given command - and returns its stdout. If the command returns a non-zero exit code, the function raises a RunCommandException. + and returns its stdout. If the command returns a non-zero exit code, the function raises a CommandError. Similarly to Popen, the 'command' can be a string or a list of strings, and 'shell' indicates whether to execute the command through the shell. diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index a6e1ab9fd3..fda9911d92 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -34,7 +34,7 @@ def __init__(self, ip_address: str, username: str, private_key_file: Path, port: def run_command(self, command: str, use_sudo: bool = False) -> str: """ Executes the given command over SSH and returns its stdout. If the command returns a non-zero exit code, - the function raises a RunCommandException. + the function raises a CommandError. """ if re.match(r"^\s*sudo\s*", command): raise Exception("Do not include 'sudo' in the 'command' argument, use the 'use_sudo' parameter instead") diff --git a/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py new file mode 100755 index 0000000000..b767dc93cd --- /dev/null +++ b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from assertpy import assert_that + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient + + +class NoOutboundConnections(AgentTest): + """ + Check the agent log to verify that the default channel was changed to HostGAPlugin before executing any extensions. + """ + def run(self): + # 2023-04-14T14:49:43.005530Z INFO ExtHandler ExtHandler Default channel changed to HostGAPlugin channel. + # 2023-04-14T14:49:44.625061Z INFO ExtHandler [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.25.2] Target handler state: enabled [incarnation_2] + + ssh_client: SshClient = self._context.create_ssh_client() + log.info("Parsing agent log on the test VM") + output = ssh_client.run_command("grep -E 'INFO ExtHandler.*(Default channel changed to HostGAPlugin)|(Target handler state:)' /var/log/waagent.log | head").split('\n') + log.info("Output (first 10 lines) from the agent log:\n\t\t%s", '\n\t\t'.join(output)) + + assert_that(len(output) > 1).is_true().described_as( + "The agent log should contain multiple matching records" + ) + assert_that(output[0]).contains("Default channel changed to HostGAPlugin").described_as( + "The agent log should contain a record indicating that the default channel was changed to HostGAPlugin before executing any extensions" + ) + + log.info("The agent log indicates that the default channel was changed to HostGAPlugin before executing any extensions") + + +if __name__ == "__main__": + NoOutboundConnections.run_from_command_line() + diff --git a/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py new file mode 100755 index 0000000000..66cc707d26 --- /dev/null +++ b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from assertpy import fail + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient + + +class CheckNoOutboundConnections(AgentTest): + """ + Verifies that there is no outbound connectivity on the test VM. + """ + def run(self): + # This script is executed on the test VM. It tries to connect to a well-known DNS server (DNS is on port 53). + script: str = """ +import socket, sys + +try: + socket.setdefaulttimeout(5) + socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(("8.8.8.8", 53)) +except socket.timeout: + print("No outbound connectivity [expected]") + exit(0) +print("There is outbound connectivity [unexpected: the custom ARM template should not allow it]", file=sys.stderr) +exit(1) +""" + ssh_client: SshClient = self._context.create_ssh_client() + try: + log.info("Verifying that there is no outbound connectivity on the test VM") + ssh_client.run_command("pypy3 -c '{0}'".format(script.replace('"', '\"'))) + log.info("There is no outbound connectivity, as expected.") + except CommandError as e: + if e.exit_code == 1 and "There is outbound connectivity" in e.stderr: + fail("There is outbound connectivity on the test VM, the custom ARM template should not allow it") + else: + raise Exception(f"Unexpected error while checking outbound connectivity on the test VM: {e}") + + +if __name__ == "__main__": + CheckNoOutboundConnections.run_from_command_line() + diff --git a/tests_e2e/tests/no_outbound_connections/nsg_template.py b/tests_e2e/tests/no_outbound_connections/nsg_template.py new file mode 100755 index 0000000000..8a7421f00e --- /dev/null +++ b/tests_e2e/tests/no_outbound_connections/nsg_template.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +from typing import Any + + +def update_arm_template(template: Any) -> None: + """ + Updates the ARM template to add a network security group that denies all outbound connections. + """ + resources = template["resources"] + + # Append the NSG to the list of resources + resources.append(json.loads(""" + { + "type": "Microsoft.Network/networkSecurityGroups", + "name": "no-outbound-connections", + "location": "[parameters('location')]", + "apiVersion": "2020-05-01", + "properties": { + "securityRules": [ + { + "name": "ssh_rule", + "properties": { + "description": "Allows inbound SSH connections.", + "protocol": "Tcp", + "sourcePortRange": "*", + "destinationPortRange": "22", + "sourceAddressPrefix": "*", + "destinationAddressPrefix": "*", + "access": "Allow", + "priority": 110, + "direction": "Inbound" + } + }, + { + "name": "outbound_rule", + "properties": { + "description": "Denies all outbound connections.", + "protocol": "*", + "sourcePortRange": "*", + "destinationPortRange": "*", + "sourceAddressPrefix": "*", + "destinationAddressPrefix": "Internet", + "access": "Deny", + "priority": 200, + "direction": "Outbound" + } + } + ] + } + } + """)) + + # Add a dependency of the deployment on the NSG + deployment_resource = _get_resource(resources, "Microsoft.Resources/deployments") + deployment_resource["dependsOn"].append("[resourceId('Microsoft.Network/networkSecurityGroups', 'no-outbound-connections')]") + + # Add reference to the NSG to the properties of the network interface + template_resources = deployment_resource["properties"]["template"]["resources"] + network_interface_resource = _get_resource(template_resources, "Microsoft.Network/networkInterfaces") + network_interface_resource["properties"].update(json.loads( + """ + { + "networkSecurityGroup": { + "id": "[resourceId('Microsoft.Network/networkSecurityGroups', 'no-outbound-connections')]" + } + } + """)) + + +def _get_resource(resources: Any, type_name: str) -> Any: + for item in resources: + if item["type"] == type_name: + return item + raise Exception(f"Cannot find a resource of type {type_name} in the ARM template") + + From 4ba20de52653286f535dd617ba2486fc6809120c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 20 Apr 2023 17:18:56 -0700 Subject: [PATCH 004/240] Use cloud when validating test location (#2806) * Use cloud when validating test location --------- Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_loader.py | 9 +++++++-- .../orchestrator/lib/agent_test_suite_combinator.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index 201d398131..193ee9f4da 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -79,11 +79,14 @@ class AgentTestLoader(object): """ Loads a given set of test suites from the YAML configuration files. """ - def __init__(self, test_suites: str): + def __init__(self, test_suites: str, cloud: str): """ Loads the specified 'test_suites', which are given as a string of comma-separated suite names or a YAML description of a single test_suite. + The 'cloud' parameter indicates the cloud on which the tests will run. It is used to validate any restrictions on the test suite and/or + images location. + When given as a comma-separated list, each item must correspond to the name of the YAML files describing s suite (those files are located under the .../WALinuxAgent/tests_e2e/test_suites directory). For example, if test_suites == "agent_bvt, fast_track" then this method will load files agent_bvt.yml and fast_track.yml. @@ -97,6 +100,7 @@ def __init__(self, test_suites: str): - "bvts/vm_access.py" """ self.__test_suites: List[TestSuiteInfo] = self._load_test_suites(test_suites) + self.__cloud: str = cloud self.__images: Dict[str, List[VmImageInfo]] = self._load_images() self._validate() @@ -130,7 +134,8 @@ def _validate(self): for image in self.images[suite_image]: # If the image has a location restriction, validate that it is available on the location the suite must run on if image.locations: - if not any(suite.location in l for l in image.locations.values()): + locations = image.locations.get(self.__cloud) + if locations is not None and not any(suite.location in l for l in locations): raise Exception(f"Test suite {suite.name} must be executed in {suite.location}, but <{image.urn}> is not available in that location") @staticmethod diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 839f39613d..efb0e6f212 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -116,7 +116,7 @@ def _next(self) -> Optional[Dict[str, Any]]: } def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: - loader = AgentTestLoader(self.runbook.test_suites) + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) environment: List[Dict[str, Any]] = [ { @@ -137,7 +137,7 @@ def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: return environment def create_environment_list(self) -> List[Dict[str, Any]]: - loader = AgentTestLoader(self.runbook.test_suites) + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) # # If the runbook provides any of 'image', 'location', or 'vm_size', those values From f02b31f1fbbb03a6c4a4513edbb7decac092d512 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 25 Apr 2023 09:39:40 -0700 Subject: [PATCH 005/240] Redact access tokens from extension's output (#2811) * Redact access tokens from extension's output * python 2.6 --------- Co-authored-by: narrieta --- .../common/utils/extensionprocessutil.py | 10 ++++++- tests/ga/test_extension.py | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/common/utils/extensionprocessutil.py b/azurelinuxagent/common/utils/extensionprocessutil.py index 9038f6145c..137f3aa2dd 100644 --- a/azurelinuxagent/common/utils/extensionprocessutil.py +++ b/azurelinuxagent/common/utils/extensionprocessutil.py @@ -18,6 +18,7 @@ # import os +import re import signal import time @@ -87,6 +88,9 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c return process_output +SAS_TOKEN_RE = re.compile(r'(https://\S+\?)((sv|st|se|sr|sp|sip|spr|sig)=\S+)+', flags=re.IGNORECASE) + + def read_output(stdout, stderr): """ Read the output of the process sent to stdout and stderr and trim them to the max appropriate length. @@ -103,7 +107,11 @@ def read_output(stdout, stderr): stderr = ustr(stderr.read(TELEMETRY_MESSAGE_MAX_LEN), encoding='utf-8', errors='backslashreplace') - return format_stdout_stderr(stdout, stderr) + def redact(s): + # redact query strings that look like SAS tokens + return SAS_TOKEN_RE.sub(r'\1', s) + + return format_stdout_stderr(redact(stdout), redact(stderr)) except Exception as e: return format_stdout_stderr("", "Cannot read stdout/stderr: {0}".format(ustr(e))) diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 2272a1907b..10f442749d 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3418,6 +3418,33 @@ def http_get_handler(url, *_, **kwargs): self._assert_handler_status(protocol.report_vm_status, "Ready", 1, "1.0.0") self.assertEqual("1", protocol.report_vm_status.call_args[0][0].vmAgent.vm_artifacts_aggregate_status.goal_state_aggregate_status.in_svd_seq_no, "SVD sequence number mismatch") + def test_it_should_redact_access_tokens_in_extension_output(self): + original = r'''ONE https://foo.blob.core.windows.net/bar?sv=2000&ss=bfqt&srt=sco&sp=rw&se=2025&st=2022&spr=https&sig=SI%3D + TWO:HTTPS://bar.blob.core.com/foo/bar/foo.txt?sv=2018&sr=b&sig=Yx%3D&st=2023%3A52Z&se=9999%3A59%3A59Z&sp=r TWO + https://bar.com/foo?uid=2018&sr=b THREE''' + expected = r'''ONE https://foo.blob.core.windows.net/bar? + TWO:HTTPS://bar.blob.core.com/foo/bar/foo.txt? TWO + https://bar.com/foo?uid=2018&sr=b THREE''' + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + exthandlers_handler = get_exthandlers_handler(protocol) + + original_popen = subprocess.Popen + + def mock_popen(cmd, *args, **kwargs): + if cmd.endswith("sample.py -enable"): + cmd = "echo '{0}'; >&2 echo '{0}'; exit 1".format(original) + return original_popen(cmd, *args, **kwargs) + + with patch.object(subprocess, 'Popen', side_effect=mock_popen): + exthandlers_handler.run() + + status = exthandlers_handler.report_ext_handlers_status() + self.assertEqual(1, len(status.vmAgent.extensionHandlers), 'Expected exactly 1 extension status') + message = status.vmAgent.extensionHandlers[0].message + self.assertIn('[stdout]\n{0}'.format(expected), message, "The extension's stdout was not redacted correctly") + self.assertIn('[stderr]\n{0}'.format(expected), message, "The extension's stderr was not redacted correctly") + if __name__ == '__main__': unittest.main() From a46b9d2538d50fd5fc68a24022d0911073e15797 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Apr 2023 16:44:45 -0700 Subject: [PATCH 006/240] Add @gabstamsft as code owner (#2813) Co-authored-by: narrieta --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 8707e60a58..aebbe4c94d 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -21,4 +21,4 @@ # # Linux Agent team # -* @narrieta @ZhidongPeng @nagworld9 @maddieford +* @narrieta @ZhidongPeng @nagworld9 @maddieford @gabstamsft From 5382a5e7b44ce17bccf3f30006db582258682f42 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 8 May 2023 11:21:42 -0700 Subject: [PATCH 007/240] Fix name of single IB device when provisioning RDMA (#2814) The current code assumes the ipoib interface name is ib0 when single IB interface is provisioned. This is not always true when udev rules are used to rename to other names like ibPxxxxx. Fix this by searching any interface name starting with "ib". --- azurelinuxagent/common/rdma.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/azurelinuxagent/common/rdma.py b/azurelinuxagent/common/rdma.py index 299b1a8a51..aabd05541e 100644 --- a/azurelinuxagent/common/rdma.py +++ b/azurelinuxagent/common/rdma.py @@ -419,28 +419,33 @@ def update_iboip_interfaces(self, mac_ip_array): @staticmethod def update_iboip_interface(ipv4_addr, timeout_sec, check_interval_sec): - logger.info("Wait for ib0 become available") + logger.info("Wait for ib become available") total_retries = timeout_sec / check_interval_sec n = 0 - found_ib0 = None - while not found_ib0 and n < total_retries: + found_ib = None + while not found_ib and n < total_retries: ret, output = shellutil.run_get_output("ifconfig -a") if ret != 0: raise Exception("Failed to list network interfaces") - found_ib0 = re.search("ib0", output, re.IGNORECASE) - if found_ib0: + found_ib = re.search(r"(ib\S+):", output, re.IGNORECASE) + if found_ib: break time.sleep(check_interval_sec) n += 1 - if not found_ib0: - raise Exception("ib0 is not available") + if not found_ib: + raise Exception("ib is not available") + + ibname = found_ib.groups()[0] + if shellutil.run("ifconfig {0} up".format(ibname)) != 0: + raise Exception("Could not run ifconfig {0} up".format(ibname)) netmask = 16 logger.info("RDMA: configuring IPv4 addr and netmask on ipoib interface") addr = '{0}/{1}'.format(ipv4_addr, netmask) - if shellutil.run("ifconfig ib0 {0}".format(addr)) != 0: - raise Exception("Could set addr to {0} on ib0".format(addr)) + if shellutil.run("ifconfig {0} {1}".format(ibname, addr)) != 0: + raise Exception("Could not set addr to {0} on {1}".format(addr, ibname)) + logger.info("RDMA: ipoib address and netmask configured on interface") @staticmethod From 0b7505e58f8bded9d472d68e81b9c1dbcdb1c279 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 10 May 2023 11:28:37 -0700 Subject: [PATCH 008/240] Allow tests to run on random images (#2817) * Allow tests to run on random images * PR feedback --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_loader.py | 7 + .../lib/agent_test_suite_combinator.py | 268 +++++++++++------- tests_e2e/orchestrator/runbook.yml | 2 +- .../test_suites/no_outbound_connections.yml | 3 +- 4 files changed, 171 insertions(+), 109 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index 193ee9f4da..fcfd35ae3c 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -15,6 +15,7 @@ # limitations under the License. # import importlib.util +import re # E0401: Unable to import 'yaml' (import-error) import yaml # pylint: disable=E0401 @@ -118,6 +119,9 @@ def images(self) -> Dict[str, List[VmImageInfo]]: """ return self.__images + # Matches a reference to a random subset of images within a set with an optional count: random(, []), e.g. random(endorsed, 3), random(endorsed) + RANDOM_IMAGES_RE = re.compile(r"random\((?P[^,]+)(\s*,\s*(?P\d+))?\)") + def _validate(self): """ Performs some basic validations on the data loaded from the YAML description files @@ -125,6 +129,9 @@ def _validate(self): for suite in self.test_suites: # Validate that the images the suite must run on are in images.yml for image in suite.images: + match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) + if match is not None: + image = match.group('image_set') if image not in self.images: raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index efb0e6f212..2dcc41ac1d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import logging +import random import re import urllib.parse @@ -17,7 +18,7 @@ from lisa.combinator import Combinator # pylint: disable=E0401 from lisa.util import field_metadata # pylint: disable=E0401 -from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo +from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo @dataclass_json() @@ -118,138 +119,91 @@ def _next(self) -> Optional[Dict[str, Any]]: def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) - environment: List[Dict[str, Any]] = [ - { - "c_env_name": self.runbook.vm_name, - "c_vm_name": self.runbook.vm_name, - "c_location": self.runbook.location, - "c_test_suites": loader.test_suites, - } - ] + environment: Dict[str, Any] = { + "c_env_name": self.runbook.vm_name, + "c_vm_name": self.runbook.vm_name, + "c_location": self.runbook.location, + "c_test_suites": loader.test_suites, + } log: logging.Logger = logging.getLogger("lisa") - log.info("******** Environment for existing VMs *****") - log.info( - "{ c_env_name: '%s', c_vm_name: '%s', c_location: '%s', c_test_suites: '%s' }", - environment[0]['c_env_name'], environment[0]['c_vm_name'], environment[0]['c_location'], [s.name for s in environment[0]['c_test_suites']]) - log.info("***************************") + log.info("******** Waagent: Settings for existing VM *****") + log.info("") + log.info("Settings for %s:\n%s\n", environment['c_env_name'], self._get_env_settings(environment)) + log.info("") - return environment + return [environment] def create_environment_list(self) -> List[Dict[str, Any]]: + """ + Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs) that need to be + created in order to execute these suites. + + Note that if the runbook provides an 'image', 'location', or 'vm_size', those values override any values provided in the + configuration of the test suites. + """ + environments: List[Dict[str, Any]] = [] + shared_environments: Dict[str, Dict[str, Any]] = {} # environments shared by multiple test suites + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) - # - # If the runbook provides any of 'image', 'location', or 'vm_size', those values - # override any configuration values on the test suite. - # - # Check 'images' first and add them to 'runbook_images', if any - # - if self.runbook.image == "": - runbook_images = [] - else: - runbook_images = loader.images.get(self.runbook.image) - if runbook_images is None: - if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image): - raise Exception(f"The 'image' parameter must be an image or image set name, a urn, or a vhd: {self.runbook.image}") - i = VmImageInfo() - i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD - i.locations = [] - i.vm_sizes = [] - runbook_images = [i] - - # - # Now walk through all the test_suites and create a list of the environments (test VMs) that need to be created. - # - environment_list: List[Dict[str, Any]] = [] - shared_environments: Dict[str, Dict[str, Any]] = {} + runbook_images = self._get_runbook_images(loader) for suite_info in loader.test_suites: if len(runbook_images) > 0: - images_info = runbook_images + images_info: List[VmImageInfo] = runbook_images else: - # The test suite may be referencing multiple image sets, and sets can intersect, so we need to ensure - # we eliminate any duplicates. - unique_images: Dict[str, str] = {} - for image in suite_info.images: - for i in loader.images[image]: - unique_images[i] = i - images_info = unique_images.values() + images_info: List[VmImageInfo] = self._get_test_suite_images(suite_info, loader) for image in images_info: - # The URN can actually point to a VHD if the runbook provided a VHD in the 'images' parameter + # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter if self._is_vhd(image.urn): - marketplace_image = "" - vhd = image.urn - name = "vhd" + c_marketplace_image = "" + c_vhd = image.urn + image_name = "vhd" else: - marketplace_image = image.urn - vhd = "" - match = AgentTestSuitesCombinator._URN.match(image.urn) - if match is None: - raise Exception(f"Invalid URN: {image.urn}") - name = f"{match.group('offer')}-{match.group('sku')}" - - location: str = None - # If the runbook specified a location, use it. - if self.runbook.location != "": - location = self.runbook.location - # Then try the suite location, if any. - elif suite_info.location != '': - location = suite_info.location - # If the image has a location restriction, use any location where it is available. - # However, if it is not available on any location, skip the image. - elif image.locations: - image_locations = image.locations.get(self.runbook.cloud) - if image_locations is not None: - if len(image_locations) == 0: - continue - location = image_locations[0] - # If no location has been selected, use the default. - if location is None: - location = AgentTestSuitesCombinator._DEFAULT_LOCATIONS[self.runbook.cloud] - - # If the runbook specified a VM size, use it. Else if the image specifies a list of VM sizes, use any of them. Otherwise, - # set the size to empty and let LISA choose it. - if self.runbook.vm_size != '': - vm_size = self.runbook.vm_size - elif len(image.vm_sizes) > 0: - vm_size = image.vm_sizes[0] - else: - vm_size = "" + c_marketplace_image = image.urn + c_vhd = "" + image_name = self._get_image_name(image.urn) + + c_location: str = self._get_location(suite_info, image) + if c_location is None: + continue + + c_vm_size = self._get_vm_size(image) # Note: Disabling "W0640: Cell variable 'foo' defined in loop (cell-var-from-loop)". This is a false positive, the closure is OK # to use, since create_environment() is called within the same iteration of the loop. # pylint: disable=W0640 - def create_environment(env_name: str) -> Dict[str, Any]: - tags = {} + def create_environment(c_env_name: str) -> Dict[str, Any]: + c_vm_tags = {} if suite_info.template != '': - tags["templates"] = suite_info.template + c_vm_tags["templates"] = suite_info.template return { - "c_marketplace_image": marketplace_image, + "c_marketplace_image": c_marketplace_image, "c_cloud": self.runbook.cloud, - "c_location": location, - "c_vm_size": vm_size, - "c_vhd": vhd, + "c_location": c_location, + "c_vm_size": c_vm_size, + "c_vhd": c_vhd, "c_test_suites": [suite_info], - "c_env_name": env_name, + "c_env_name": c_env_name, "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], - "c_vm_tags": tags + "c_vm_tags": c_vm_tags } # pylint: enable=W0640 if suite_info.owns_vm: # create an environment for exclusive use by this suite - environment_list.append(create_environment(f"{name}-{suite_info.name}")) + environments.append(create_environment(f"{image_name}-{suite_info.name}")) else: # add this suite to the shared environments - key: str = f"{name}-{location}" - environment = shared_environments.get(key) - if environment is not None: - environment["c_test_suites"].append(suite_info) + key: str = f"{image_name}-{c_location}" + env = shared_environments.get(key) + if env is not None: + env["c_test_suites"].append(suite_info) if suite_info.template != '': - vm_tags = environment["c_vm_tags"] + vm_tags = env["c_vm_tags"] if "templates" in vm_tags: vm_tags["templates"] += ", " + suite_info.template else: @@ -257,21 +211,123 @@ def create_environment(env_name: str) -> Dict[str, Any]: else: shared_environments[key] = create_environment(key) - environment_list.extend(shared_environments.values()) + environments.extend(shared_environments.values()) - if len(environment_list) == 0: + if len(environments) == 0: raise Exception("No VM images were found to execute the test suites.") log: logging.Logger = logging.getLogger("lisa") log.info("") log.info("******** Waagent: Test Environments *****") log.info("") - for environment in environment_list: - test_suites = [s.name for s in environment['c_test_suites']] - log.info("Settings for %s:\n%s\n", environment['c_env_name'], '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else test_suites}" for name, value in environment.items()])) + for env in environments: + log.info("Settings for %s:\n%s\n", env['c_env_name'], self._get_env_settings(env)) log.info("") - return environment_list + return environments + + def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: + """ + Returns the images specified in the runbook, or an empty list if none are specified. + """ + if self.runbook.image == "": + return [] + + images = loader.images.get(self.runbook.image) + if images is not None: + return images + + # If it is not image or image set, it must be a URN or VHD + if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image): + raise Exception(f"The 'image' parameter must be an image, an image set name, a urn, or a vhd: {self.runbook.image}") + + i = VmImageInfo() + i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD + i.locations = [] + i.vm_sizes = [] + + return [i] + + @staticmethod + def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> List[VmImageInfo]: + """ + Returns the images used by a test suite. + + A test suite may be reference multiple image sets and sets can intersect; this method eliminates any duplicates. + """ + unique: Dict[str, VmImageInfo] = {} + for image in suite.images: + match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) + if match is None: + image_list = loader.images[image] + else: + count = match.group('count') + if count is None: + count = 1 + matching_images = loader.images[match.group('image_set')].copy() + random.shuffle(matching_images) + image_list = matching_images[0:count] + for i in image_list: + unique[i.urn] = i + return [v for k, v in unique.items()] + + def _get_location(self, suite_info: TestSuiteInfo, image: VmImageInfo) -> str: + """ + Returns the location on which the test VM for the given test suite and image should be created. + + If the image is not available on any location, returns None, to indicate that the test suite should be skipped. + """ + # If the runbook specified a location, use it. + if self.runbook.location != "": + return self.runbook.location + + # Then try the suite location, if any. + if suite_info.location != '': + return suite_info.location + + # If the image has a location restriction, use any location where it is available. + # However, if it is not available on any location, skip the image (return None) + if image.locations: + image_locations = image.locations.get(self.runbook.cloud) + if image_locations is not None: + if len(image_locations) == 0: + return None + return image_locations[0] + + # Else use the default. + return AgentTestSuitesCombinator._DEFAULT_LOCATIONS[self.runbook.cloud] + + def _get_vm_size(self, image: VmImageInfo) -> str: + """ + Returns the VM size that should be used to create the test VM for the given image. + + If the size is set to an empty string, LISA will choose an appropriate size + """ + # If the runbook specified a VM size, use it. + if self.runbook.vm_size != '': + return self.runbook.vm_size + + # If the image specifies a list of VM sizes, use any of them. + if len(image.vm_sizes) > 0: + return image.vm_sizes[0] + + # Otherwise, set the size to empty and LISA will select an appropriate size. + return "" + + @staticmethod + def _get_image_name(urn: str) -> str: + """ + Creates an image name ("offer-sku") given its URN + """ + match = AgentTestSuitesCombinator._URN.match(urn) + if match is None: + raise Exception(f"Invalid URN: {urn}") + return f"{match.group('offer')}-{match.group('sku')}" + + @staticmethod + def _get_env_settings(environment: Dict[str, Any]): + suite_names = [s.name for s in environment['c_test_suites']] + return '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else suite_names}" for name, value in environment.items()]) _URN = re.compile(r"(?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)") diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 15c39fff6f..eb0ad5afec 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt" + value: "agent_bvt, no_outbound_connections" - name: cloud value: "AzureCloud" - name: image diff --git a/tests_e2e/test_suites/no_outbound_connections.yml b/tests_e2e/test_suites/no_outbound_connections.yml index 23e3ef1ec2..6cf6c490f7 100644 --- a/tests_e2e/test_suites/no_outbound_connections.yml +++ b/tests_e2e/test_suites/no_outbound_connections.yml @@ -15,7 +15,6 @@ tests: - "bvts/run_command.py" - "bvts/vm_access.py" - "no_outbound_connections/check_fallback_to_hgap.py" -images: - - "ubuntu_2004" +images: "random(endorsed)" template: "no_outbound_connections/nsg_template.py" owns_vm: true From 328421b53564d9484581538740d3eccfec34ffd4 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 16 May 2023 13:57:47 -0700 Subject: [PATCH 009/240] Bug fixes for end-to-end tests (#2820) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_suite.py | 6 ++---- tests_e2e/orchestrator/lib/agent_test_suite_combinator.py | 1 - tests_e2e/orchestrator/runbook.yml | 4 +--- tests_e2e/orchestrator/sample_runbooks/existing_vm.yml | 4 +--- tests_e2e/tests/lib/vm_extension.py | 4 +--- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 373ad826a4..d09361a963 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -135,7 +135,7 @@ def _initialize(self, node: Node, variables: Dict[str, Any], lisa_working_path: self.__context = self._Context( vm=VmIdentifier( - cloud=self._get_required_parameter(variables, "c_cloud"), + cloud=self._get_required_parameter(variables, "cloud"), location=self._get_required_parameter(variables, "c_location"), subscription=node.features._platform.subscription_id, resource_group=node_context.resource_group_name, @@ -454,8 +454,6 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: suite_full_name = f"{suite_name}-{self.context.environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() - success: bool = True # True if all the tests succeed - with _set_thread_name(suite_full_name): # The thread name is added to the LISA log log_path: Path = self.context.log_path/f"{suite_full_name}.log" with set_current_thread_log(log_path): @@ -550,7 +548,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: if not suite_success: self._mark_log_as_failed() - return success + return suite_success def _check_agent_log(self) -> bool: """ diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 2dcc41ac1d..bc14cd7427 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -181,7 +181,6 @@ def create_environment(c_env_name: str) -> Dict[str, Any]: c_vm_tags["templates"] = suite_info.template return { "c_marketplace_image": c_marketplace_image, - "c_cloud": self.runbook.cloud, "c_location": c_location, "c_vm_size": c_vm_size, "c_vhd": c_vhd, diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index eb0ad5afec..7579c1d88c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -52,6 +52,7 @@ variable: value: "agent_bvt, no_outbound_connections" - name: cloud value: "AzureCloud" + is_case_visible: true - name: image value: "" - name: location @@ -82,9 +83,6 @@ variable: value: "" - name: c_vm_size value: "" - - name: c_cloud - value: "" - is_case_visible: true - name: c_location value: "" is_case_visible: true diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml index f1e480311e..b3fafa0a67 100644 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml @@ -33,6 +33,7 @@ variable: # - name: cloud value: "AzureCloud" + is_case_visible: true - name: subscription_id value: "" - name: resource_group_name @@ -88,9 +89,6 @@ variable: value: "" - name: c_shared_resource_group_location value: "" - - name: c_cloud - value: "" - is_case_visible: true - name: c_location value: "" is_case_visible: true diff --git a/tests_e2e/tests/lib/vm_extension.py b/tests_e2e/tests/lib/vm_extension.py index abefcc723f..bf7a41a44d 100644 --- a/tests_e2e/tests/lib/vm_extension.py +++ b/tests_e2e/tests/lib/vm_extension.py @@ -107,9 +107,7 @@ def enable( extension_parameters ).result(timeout=_TIMEOUT)) - if result.provisioning_state not in ('Succeeded', 'Updating'): - raise Exception(f"Enable {self._identifier} failed. Provisioning state: {result.provisioning_state}") - log.info("Enable completed (provisioning state: %s).", result.provisioning_state) + log.info("Enable completed. Provisioning state: %s", result.provisioning_state) def get_instance_view(self) -> VirtualMachineExtensionInstanceView: # TODO: Check type for scale sets """ From df630ff9d54dec9693e0ab8ee1f617d9b8d8c813 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 17 May 2023 06:46:32 -0700 Subject: [PATCH 010/240] Enable all Azure clouds on end-to-end tests (#2821) Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 9 +++------ tests_e2e/orchestrator/sample_runbooks/existing_vm.yml | 9 +++------ tests_e2e/pipeline/pipeline-cleanup.yml | 7 ++----- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 7579c1d88c..d24e68f290 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -116,12 +116,9 @@ platform: keep_environment: $(keep_environment) azure: deploy: True -# -# TODO: Enable these parameters once LISA supports all Azure clouds -# -# cloud: $(cloud) -# marketplace_image_information_location: $(c_marketplace_image_information_location) -# shared_resource_group_location: $(c_shared_resource_group_location) + cloud: $(cloud) + marketplace_image_information_location: $(c_marketplace_image_information_location) + shared_resource_group_location: $(c_shared_resource_group_location) subscription_id: $(subscription_id) wait_delete: false vm_tags: $(c_vm_tags) diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml index b3fafa0a67..8ef5baba28 100644 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml @@ -116,12 +116,9 @@ platform: admin_username: $(user) admin_private_key_file: $(identity_file) azure: -# -# TODO: Enable these parameters once LISA supports all Azure clouds -# -# cloud: $(cloud) -# marketplace_image_information_location: $(c_marketplace_image_information_location) -# shared_resource_group_location: $(c_shared_resource_group_location) + cloud: $(cloud) + marketplace_image_information_location: $(c_marketplace_image_information_location) + shared_resource_group_location: $(c_shared_resource_group_location) resource_group_name: $(resource_group_name) deploy: false subscription_id: $(subscription_id) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index b82ad53eea..961778cf68 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -19,11 +19,8 @@ parameters: type: object default: - azuremanagement -# -# TODO: Enable these services connections once we create test pipelines for all Azure clouds -# -# - azuremanagement.china -# - azuremanagement.government + - azuremanagement.china + - azuremanagement.government pool: vmImage: ubuntu-latest From c7bef385d87f70a887e2cca6ad1c30c103450827 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 17 May 2023 10:06:28 -0700 Subject: [PATCH 011/240] Add Azure CLI to container image (#2822) Co-authored-by: narrieta --- tests_e2e/orchestrator/docker/Dockerfile | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index a748ff0b83..f248f8007c 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -42,10 +42,25 @@ RUN \ # \ groupadd waagent && \ useradd --shell /bin/bash --create-home -g waagent waagent && \ + \ + # \ + # Install the Azure CLI \ + # \ + apt-get install ca-certificates curl apt-transport-https lsb-release gnupg && \ + mkdir -p /etc/apt/keyrings && \ + curl -sLS https://packages.microsoft.com/keys/microsoft.asc \ + | gpg --dearmor \ + | tee /etc/apt/keyrings/microsoft.gpg > /dev/null && \ + chmod go+r /etc/apt/keyrings/microsoft.gpg && \ + AZ_REPO=$(lsb_release -cs) && \ + echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ $AZ_REPO main" \ + | tee /etc/apt/sources.list.d/azure-cli.list && \ + apt-get update && \ + apt-get install azure-cli && \ : # -# Do the Poetry and LISA setup as waagent +# Install LISA as user waagent # USER waagent From 3d713a2d89e1be974085f12cb863e982fcf3c4b4 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 18 May 2023 12:10:41 -0700 Subject: [PATCH 012/240] Fixes for Azure clouds (#2823) * Fixes for Azure clouds * add debug info --------- Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_junit.py | 14 ++++++++++---- .../lib/agent_test_suite_combinator.py | 1 + tests_e2e/orchestrator/scripts/install-agent | 11 ++++++++--- tests_e2e/test_suites/images.yml | 11 ++++++++++- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_junit.py b/tests_e2e/orchestrator/lib/agent_junit.py index a8ff8eb6c5..78b7e35845 100644 --- a/tests_e2e/orchestrator/lib/agent_junit.py +++ b/tests_e2e/orchestrator/lib/agent_junit.py @@ -29,6 +29,7 @@ from lisa.messages import ( # pylint: disable=E0401 MessageBase, TestResultMessage, + TestStatus ) @@ -48,19 +49,24 @@ def type_schema(cls) -> Type[schema.TypedSchema]: return AgentJUnitSchema def _received_message(self, message: MessageBase) -> None: - # The Agent sends its own TestResultMessage and marks them as "AgentTestResultMessage"; for the - # test results sent by LISA itself, we change the suite name to "_Runbook_" in order to separate them - # from actual test results. + # The Agent sends its own TestResultMessages setting their type as "AgentTestResultMessage". + # Any other message types are sent by LISA. if isinstance(message, TestResultMessage) and message.type != "AgentTestResultMessage": if "Unexpected error in AgentTestSuite" in message.message: # Ignore these errors, they are already reported as AgentTestResultMessages return + # Change the suite name to "_Runbook_" for LISA messages in order to separate them + # from actual test results. message.suite_full_name = "_Runbook_" message.suite_name = message.suite_full_name image = message.information.get('image') if image is not None: - # NOTE: message.information['environment'] is similar to "[generated_2]" and can be correlated + # NOTE: The value of message.information['environment'] is similar to "[generated_2]" and can be correlated # with the main LISA log to find the specific VM for the message. message.full_name = f"{image} [{message.information['environment']}]" message.name = message.full_name + # LISA silently skips tests on situations that should be errors (e.g. trying to create a test VM using an image that is not available). + # Mark these messages as failed so that the JUnit report shows them as errors. + if message.status == TestStatus.SKIPPED: + message.status = TestStatus.FAILED super()._received_message(message) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index bc14cd7427..82915dcfb0 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -219,6 +219,7 @@ def create_environment(c_env_name: str) -> Dict[str, Any]: log.info("") log.info("******** Waagent: Test Environments *****") log.info("") + log.info("Will execute tests on %d environments:\n\n\t%s\n", len(environments), '\n\t'.join([env['c_env_name'] for env in environments])) for env in environments: log.info("Settings for %s:\n%s\n", env['c_env_name'], self._get_env_settings(env)) log.info("") diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 14663d0b8d..513487b211 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -70,16 +70,21 @@ if service-status walinuxagent > /dev/null 2>&1;then else service_name="waagent" fi -echo "Service name: $service_name" # # Output the initial version of the agent # python=$(get-agent-python) waagent=$(get-agent-bin-path) -echo "Agent's path: $waagent" + +echo "============================================================" +echo "Service Name: $service_name" +echo "Agent Path: $waagent" +echo "Agent Version:" $python "$waagent" --version -printf "\n" +echo "Service Status:" +service-status $service_name +echo "============================================================" # # Install the package diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 6fef5314dd..a19105710b 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -80,6 +80,7 @@ images: urn: "Debian debian-11 11-backports-arm64 latest" locations: AzureUSGovernment: [] + AzureChinaCloud: [] flatcar: urn: "kinvolk flatcar-container-linux-free stable latest" locations: @@ -89,6 +90,7 @@ images: urn: "kinvolk flatcar-container-linux-corevm stable latest" locations: AzureChinaCloud: [] + AzureUSGovernment: [] vm_sizes: - "Standard_D2pls_v5" mariner_1: @@ -100,6 +102,8 @@ images: urn: "microsoftcblmariner cbl-mariner cbl-mariner-2-arm64 latest" locations: AzureCloud: ["eastus"] + AzureChinaCloud: [] + AzureUSGovernment: [] vm_sizes: - "Standard_D2pls_v5" rocky_9: @@ -125,8 +129,13 @@ images: urn: "RedHat rhel-arm64 9_0-arm64 latest" locations: AzureChinaCloud: [] + AzureUSGovernment: [] ubuntu_1604: "Canonical UbuntuServer 16.04-LTS latest" ubuntu_1804: "Canonical UbuntuServer 18.04-LTS latest" ubuntu_2004: "Canonical 0001-com-ubuntu-server-focal 20_04-lts latest" ubuntu_2204: "Canonical 0001-com-ubuntu-server-jammy 22_04-lts latest" - ubuntu_2204_arm64: "Canonical 0001-com-ubuntu-server-jammy 22_04-lts-arm64 latest" + ubuntu_2204_arm64: + urn: "Canonical 0001-com-ubuntu-server-jammy 22_04-lts-arm64 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] From 93b95ba85778ef5a57184a59ded26c1a1fefa7d9 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 23 May 2023 10:21:27 -0700 Subject: [PATCH 013/240] Add test for extensions disabled; refactor VirtualMachine and VmExtension (#2824) * Add test for extensions disabled; refactor VirtualMachine and VmExtension --------- Co-authored-by: narrieta --- test-requirements.txt | 1 + tests_e2e/orchestrator/docker/Dockerfile | 2 +- .../orchestrator/lib/agent_test_suite.py | 21 ++- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/orchestrator/scripts/agent-service | 85 ++++++++++ .../orchestrator/scripts/update-waagent-conf | 41 +++++ tests_e2e/pipeline/pipeline.yml | 2 +- tests_e2e/test_suites/extensions_disabled.yml | 9 + tests_e2e/tests/bvts/extension_operations.py | 8 +- tests_e2e/tests/bvts/run_command.py | 8 +- tests_e2e/tests/bvts/vm_access.py | 4 +- tests_e2e/tests/extensions_disabled.py | 86 ++++++++++ tests_e2e/tests/lib/azure_client.py | 44 +++++ tests_e2e/tests/lib/virtual_machine.py | 155 ------------------ tests_e2e/tests/lib/virtual_machine_client.py | 122 ++++++++++++++ ...py => virtual_machine_extension_client.py} | 150 +++++------------ 16 files changed, 456 insertions(+), 284 deletions(-) create mode 100755 tests_e2e/orchestrator/scripts/agent-service create mode 100755 tests_e2e/orchestrator/scripts/update-waagent-conf create mode 100644 tests_e2e/test_suites/extensions_disabled.yml create mode 100755 tests_e2e/tests/extensions_disabled.py create mode 100644 tests_e2e/tests/lib/azure_client.py delete mode 100644 tests_e2e/tests/lib/virtual_machine.py create mode 100644 tests_e2e/tests/lib/virtual_machine_client.py rename tests_e2e/tests/lib/{vm_extension.py => virtual_machine_extension_client.py} (59%) diff --git a/test-requirements.txt b/test-requirements.txt index 3576621706..89a2bb2c5d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -19,3 +19,4 @@ azure-identity azure-mgmt-compute>=22.1.0 azure-mgmt-resource>=15.0.0 msrestazure +pytz diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index f248f8007c..2d709c7913 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -80,7 +80,7 @@ RUN \ # \ # Install additional test dependencies \ # \ - python3 -m pip install distro msrestazure && \ + python3 -m pip install distro msrestazure pytz && \ python3 -m pip install azure-mgmt-compute --upgrade && \ \ # \ diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index d09361a963..2e84c59a3f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -203,12 +203,26 @@ def context(self): # # Test suites within the same runbook may be executed concurrently, and setup needs to be done only once. - # We use this lock to allow only 1 thread to do the setup. Setup completion is marked using the 'completed' + # We use these locks to allow only 1 thread to do the setup. Setup completion is marked using the 'completed' # file: the thread doing the setup creates the file and threads that find that the file already exists # simply skip setup. # + _working_directory_lock = RLock() _setup_lock = RLock() + def _create_working_directory(self) -> None: + """ + Creates the working directory for the test suite. + """ + self._working_directory_lock.acquire() + + try: + if not self.context.working_directory.exists(): + log.info("Creating working directory: %s", self.context.working_directory) + self.context.working_directory.mkdir(parents=True) + finally: + self._working_directory_lock.release() + def _setup(self) -> None: """ Prepares the test suite for execution (currently, it just builds the agent package) @@ -228,9 +242,6 @@ def _setup(self) -> None: return self.context.lisa_log.info("Building test agent") - log.info("Creating working directory: %s", self.context.working_directory) - self.context.working_directory.mkdir(parents=True) - self._build_agent_package() log.info("Completed setup, creating %s", completed) @@ -407,6 +418,8 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]): test_suite_success = True try: + self._create_working_directory() + if not self.context.skip_setup: self._setup() diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index d24e68f290..eb4ee2e996 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections" + value: "agent_bvt, no_outbound_connections, extensions_disabled" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/orchestrator/scripts/agent-service b/tests_e2e/orchestrator/scripts/agent-service new file mode 100755 index 0000000000..d740ef8f41 --- /dev/null +++ b/tests_e2e/orchestrator/scripts/agent-service @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -euo pipefail + +# +# The service name is walinuxagent in Ubuntu/debian and waagent elsewhere +# + +usage() ( + echo "Usage: agent-service command" + exit 1 +) + +if [ "$#" -lt 1 ]; then + usage +fi +cmd=$1 +shift + +if [ "$#" -ne 0 ] || [ -z ${cmd+x} ] ; then + usage +fi + +if command -v systemctl &> /dev/null; then + service-status() { systemctl --no-pager -l status $1; } + service-stop() { systemctl stop $1; } + service-restart() { systemctl restart $1; } + service-start() { systemctl start $1; } +else + service-status() { service $1 status; } + service-stop() { service $1 stop; } + service-restart() { service $1 restart; } + service-start() { service $1 start; } +fi + +python=$(get-agent-python) +distro=$($python -c 'from azurelinuxagent.common.version import get_distro; print(get_distro()[0])') +distro=$(echo $distro | tr '[:upper:]' '[:lower:]') + +if [[ $distro == *"ubuntu"* || $distro == *"debian"* ]]; then + service_name="walinuxagent" +else + service_name="waagent" +fi + +echo "Service name: $service_name" + +if [[ "$cmd" == "restart" ]]; then + echo "Restarting service..." + service-restart $service_name + echo "Service status..." + service-status $service_name +fi + +if [[ "$cmd" == "start" ]]; then + echo "Starting service..." + service-start $service_name +fi + +if [[ "$cmd" == "stop" ]]; then + echo "Stopping service..." + service-stop $service_name +fi + +if [[ "$cmd" == "status" ]]; then + echo "Service status..." + service-status $service_name +fi diff --git a/tests_e2e/orchestrator/scripts/update-waagent-conf b/tests_e2e/orchestrator/scripts/update-waagent-conf new file mode 100755 index 0000000000..13cfd45401 --- /dev/null +++ b/tests_e2e/orchestrator/scripts/update-waagent-conf @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Updates waagent.conf with the specified setting and value and restarts the Agent. +# + +set -euo pipefail + +if [[ $# -ne 2 ]]; then + echo "Usage: update-waagent-conf " + exit 1 +fi + +name=$1 +value=$2 + +PYTHON=$(get-agent-python) +waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') +echo "Setting $name=$value in $waagent_conf" +sed -i -E "/^$name=/d" "$waagent_conf" +sed -i -E "\$a $name=$value" "$waagent_conf" +updated=$(grep "$name" "$waagent_conf") +echo "Updated value: $updated" +agent-service restart \ No newline at end of file diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 255947859f..99d48d99ce 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections + default: agent_bvt, no_outbound_connections, extensions_disabled # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition diff --git a/tests_e2e/test_suites/extensions_disabled.yml b/tests_e2e/test_suites/extensions_disabled.yml new file mode 100644 index 0000000000..3fbff2ebde --- /dev/null +++ b/tests_e2e/test_suites/extensions_disabled.yml @@ -0,0 +1,9 @@ +# +# The test suite disables extension processing and verifies that extensions +# are not processed, but the agent continues reporting status. +# +name: "ExtensionsDisabled" +tests: + - "extensions_disabled.py" +images: "random(endorsed)" +owns_vm: true diff --git a/tests_e2e/tests/bvts/extension_operations.py b/tests_e2e/tests/bvts/extension_operations.py index e8a45ee449..0815728740 100755 --- a/tests_e2e/tests/bvts/extension_operations.py +++ b/tests_e2e/tests/bvts/extension_operations.py @@ -35,7 +35,7 @@ from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.vm_extension import VmExtension +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient class ExtensionOperationsBvt(AgentTest): @@ -47,7 +47,7 @@ def run(self): is_arm64: bool = ssh_client.get_architecture() == "aarch64" - custom_script_2_0 = VmExtension( + custom_script_2_0 = VirtualMachineExtensionClient( self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") @@ -65,7 +65,7 @@ def run(self): ) custom_script_2_0.assert_instance_view(expected_version="2.0", expected_message=message) - custom_script_2_1 = VmExtension( + custom_script_2_1 = VirtualMachineExtensionClient( self._context.vm, VmExtensionIdentifier(VmExtensionIds.CustomScript.publisher, VmExtensionIds.CustomScript.type, "2.1"), resource_name="CustomScript") @@ -73,7 +73,7 @@ def run(self): if is_arm64: log.info("Installing %s", custom_script_2_1) else: - log.info("Updating %s to %s", custom_script_2_0, custom_script_2_1) + log.info("Updating %s", custom_script_2_0) message = f"Hello {uuid.uuid4()}!" custom_script_2_1.enable( diff --git a/tests_e2e/tests/bvts/run_command.py b/tests_e2e/tests/bvts/run_command.py index 5dc548583b..494458eab4 100755 --- a/tests_e2e/tests/bvts/run_command.py +++ b/tests_e2e/tests/bvts/run_command.py @@ -35,12 +35,12 @@ from tests_e2e.tests.lib.identifiers import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.vm_extension import VmExtension +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient class RunCommandBvt(AgentTest): class TestCase: - def __init__(self, extension: VmExtension, get_settings: Callable[[str], Dict[str, str]]): + def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension self.get_settings = get_settings @@ -49,7 +49,7 @@ def run(self): test_cases = [ RunCommandBvt.TestCase( - VmExtension(self._context.vm, VmExtensionIds.RunCommand, resource_name="RunCommand"), + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommand, resource_name="RunCommand"), lambda s: { "script": base64.standard_b64encode(bytearray(s, 'utf-8')).decode('utf-8') }) @@ -60,7 +60,7 @@ def run(self): else: test_cases.append( RunCommandBvt.TestCase( - VmExtension(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="RunCommandHandler"), + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="RunCommandHandler"), lambda s: { "source": { "script": s diff --git a/tests_e2e/tests/bvts/vm_access.py b/tests_e2e/tests/bvts/vm_access.py index 1db3780480..7983d41479 100755 --- a/tests_e2e/tests/bvts/vm_access.py +++ b/tests_e2e/tests/bvts/vm_access.py @@ -33,7 +33,7 @@ from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.vm_extension import VmExtension +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient class VmAccessBvt(AgentTest): @@ -58,7 +58,7 @@ def run(self): public_key = f.read() # Invoke the extension - vm_access = VmExtension(self._context.vm, VmExtensionIds.VmAccess, resource_name="VmAccess") + vm_access = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.VmAccess, resource_name="VmAccess") vm_access.enable( protected_settings={ 'username': username, diff --git a/tests_e2e/tests/extensions_disabled.py b/tests_e2e/tests/extensions_disabled.py new file mode 100755 index 0000000000..98f74dc361 --- /dev/null +++ b/tests_e2e/tests/extensions_disabled.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test disables extension processing on waagent.conf and verifies that extensions are not processed, but the +# agent continues reporting status. +# + +import datetime +import pytz + +from assertpy import assert_that, fail + +from azure.mgmt.compute.models import VirtualMachineInstanceView + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class ExtensionsDisabled(AgentTest): + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + # Disable extension processing on the test VM + log.info("Disabling extension processing on the test VM [%s]", self._context.vm.name) + output = ssh_client.run_command("update-waagent-conf Extensions.Enabled n", use_sudo=True) + log.info("Disable completed:\n%s", output) + + # From now on, extensions will time out; set the timeout to the minimum allowed(15 minutes) + log.info("Setting the extension timeout to 15 minutes") + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + + vm.update({"extensionsTimeBudget": "PT15M"}) + + disabled_timestamp: datetime.datetime = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) + + # + # Validate that the agent is not processing extensions by attempting to run CustomScript + # + log.info("Executing CustomScript; it should time out after 15 min or so.") + custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + try: + custom_script.enable(settings={'commandToExecute': "date"}, force_update=True, timeout=20 * 60) + fail("CustomScript should have timed out") + except Exception as error: + assert_that("VMExtensionProvisioningTimeout" in str(error)) \ + .described_as(f"Expected a VMExtensionProvisioningTimeout: {error}") \ + .is_true() + log.info("CustomScript timed out as expected") + + # + # Validate that the agent continued reporting status even if it is not processing extensions + # + instance_view: VirtualMachineInstanceView = vm.get_instance_view() + log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) + assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) + assert_that(instance_view.vm_agent.statuses[0].display_status).described_as("The VM Agent should be ready").is_equal_to('Ready') + # The time in the status is time zone aware and 'disabled_timestamp' is not; we need to make the latter time zone aware before comparing them + assert_that(instance_view.vm_agent.statuses[0].time)\ + .described_as("The VM Agent should be have reported status even after extensions were disabled")\ + .is_greater_than(pytz.utc.localize(disabled_timestamp)) + log.info("The VM Agent reported status after extensions were disabled, as expected.") + + +if __name__ == "__main__": + ExtensionsDisabled.run_from_command_line() diff --git a/tests_e2e/tests/lib/azure_client.py b/tests_e2e/tests/lib/azure_client.py new file mode 100644 index 0000000000..3e01762e8b --- /dev/null +++ b/tests_e2e/tests/lib/azure_client.py @@ -0,0 +1,44 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Any, Callable + +from azure.core.polling import LROPoller + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry + + +class AzureClient: + """ + Utilities for classes using the Azure SDK. + """ + _DEFAULT_TIMEOUT = 10 * 60 # (in seconds) + + @staticmethod + def _execute_async_operation(operation: Callable[[], LROPoller], operation_name: str, timeout: int) -> Any: + """ + Starts an async operation and waits its completion. Returns the operation's result. + """ + log.info("Starting [%s]", operation_name) + poller: LROPoller = execute_with_retry(operation) + log.info("Waiting for [%s]", operation_name) + poller.wait(timeout=timeout) + if not poller.done(): + raise TimeoutError(f"[{operation_name}] did not complete within {timeout} seconds") + log.info("[%s] completed", operation_name) + return poller.result() diff --git a/tests_e2e/tests/lib/virtual_machine.py b/tests_e2e/tests/lib/virtual_machine.py deleted file mode 100644 index 79b86a6f3b..0000000000 --- a/tests_e2e/tests/lib/virtual_machine.py +++ /dev/null @@ -1,155 +0,0 @@ -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# This module includes facilities to execute some operations on virtual machines and scale sets (list extensions, restart, etc). -# - -from abc import ABC, abstractmethod -from builtins import TimeoutError -from typing import Any, List - -from azure.core.polling import LROPoller -from azure.identity import DefaultAzureCredential -from azure.mgmt.compute import ComputeManagementClient -from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineInstanceView, VirtualMachineScaleSetInstanceView -from azure.mgmt.resource import ResourceManagementClient -from msrestazure.azure_cloud import Cloud - -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.identifiers import VmIdentifier -from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.retry import execute_with_retry - - -class VirtualMachineBaseClass(ABC): - """ - Abstract base class for VirtualMachine and VmScaleSet. - - Defines the interface common to both classes and provides the implementation of some methods in that interface. - """ - def __init__(self, vm: VmIdentifier): - super().__init__() - self._identifier: VmIdentifier = vm - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - self._resource_client = ResourceManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - - @abstractmethod - def get_instance_view(self) -> Any: # Returns VirtualMachineInstanceView or VirtualMachineScaleSetInstanceView - """ - Retrieves the instance view of the virtual machine or scale set - """ - - @abstractmethod - def get_extensions(self) -> Any: # Returns List[VirtualMachineExtension] or List[VirtualMachineScaleSetExtension] - """ - Retrieves the extensions installed on the virtual machine or scale set - """ - - def restart(self, timeout=5 * 60) -> None: - """ - Restarts the virtual machine or scale set - """ - log.info("Initiating restart of %s", self._identifier) - - poller: LROPoller = execute_with_retry(self._begin_restart) - - poller.wait(timeout=timeout) - - if not poller.done(): - raise TimeoutError(f"Failed to restart {self._identifier.name} after {timeout} seconds") - - log.info("Restarted %s", self._identifier.name) - - @abstractmethod - def _begin_restart(self) -> LROPoller: - """ - Derived classes must provide the implementation for this method using their corresponding begin_restart() implementation - """ - - def __str__(self): - return f"{self._identifier}" - - -class VirtualMachine(VirtualMachineBaseClass): - def get_instance_view(self) -> VirtualMachineInstanceView: - log.info("Retrieving instance view for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name, - expand="instanceView" - ).instance_view) - - def get_extensions(self) -> List[VirtualMachineExtension]: - log.info("Retrieving extensions for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machine_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) - - def _begin_restart(self) -> LROPoller: - return self._compute_client.virtual_machines.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name) - - -class VmScaleSet(VirtualMachineBaseClass): - def get_instance_view(self) -> VirtualMachineScaleSetInstanceView: - log.info("Retrieving instance view for %s", self._identifier) - - # TODO: Revisit this implementation. Currently this method returns the instance view of the first VM instance available. - # For the instance view of the complete VMSS, use the compute_client.virtual_machine_scale_sets function - # https://docs.microsoft.com/en-us/python/api/azure-mgmt-compute/azure.mgmt.compute.v2019_12_01.operations.virtualmachinescalesetsoperations?view=azure-python - for vm in execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.list(self._identifier.resource_group, self._identifier.name)): - try: - return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vms.get_instance_view( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name, - instance_id=vm.instance_id)) - except Exception as e: - log.warning("Unable to retrieve instance view for scale set instance %s. Trying out other instances.\nError: %s", vm, e) - - raise Exception(f"Unable to retrieve instance view of any instances for scale set {self._identifier}") - - - @property - def vm_func(self): - return self._compute_client.virtual_machine_scale_set_vms - - @property - def extension_func(self): - return self._compute_client.virtual_machine_scale_set_extensions - - def get_extensions(self) -> List[VirtualMachineScaleSetExtension]: - log.info("Retrieving extensions for %s", self._identifier) - return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name)) - - def _begin_restart(self) -> LROPoller: - return self._compute_client.virtual_machine_scale_sets.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_scale_set_name=self._identifier.name) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py new file mode 100644 index 0000000000..f7e67a8236 --- /dev/null +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -0,0 +1,122 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to execute operations on virtual machines (list extensions, restart, etc). +# + +from typing import Any, Dict, List + +from azure.identity import DefaultAzureCredential +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineInstanceView, VirtualMachine +from azure.mgmt.resource import ResourceManagementClient +from msrestazure.azure_cloud import Cloud + +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.azure_client import AzureClient +from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry + + +class VirtualMachineClient(AzureClient): + """ + Provides operations on virtual machine (get instance view, update, restart, etc). + """ + def __init__(self, vm: VmIdentifier): + super().__init__() + self._identifier: VmIdentifier = vm + cloud: Cloud = AZURE_CLOUDS[vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + self._compute_client = ComputeManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self._resource_client = ResourceManagementClient( + credential=credential, + subscription_id=vm.subscription, + base_url=cloud.endpoints.resource_manager, + credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + + def get_description(self) -> VirtualMachine: + """ + Retrieves the description of the virtual machine. + """ + log.info("Retrieving description for %s", self._identifier) + return execute_with_retry( + lambda: self._compute_client.virtual_machines.get( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name)) + + def get_instance_view(self) -> VirtualMachineInstanceView: + """ + Retrieves the instance view of the virtual machine + """ + log.info("Retrieving instance view for %s", self._identifier) + return execute_with_retry(lambda: self._compute_client.virtual_machines.get( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name, + expand="instanceView" + ).instance_view) + + def get_extensions(self) -> List[VirtualMachineExtension]: + """ + Retrieves the extensions installed on the virtual machine + """ + log.info("Retrieving extensions for %s", self._identifier) + return execute_with_retry( + lambda: self._compute_client.virtual_machine_extensions.list( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name)) + + def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + """ + Updates a set of properties on the virtual machine + """ + # location is a required by begin_create_or_update, always add it + properties_copy = properties.copy() + properties_copy["location"] = self._identifier.location + + log.info("Updating %s with properties: %s", self._identifier, properties_copy) + + self._execute_async_operation( + lambda: self._compute_client.virtual_machines.begin_create_or_update( + self._identifier.resource_group, + self._identifier.name, + properties_copy), + operation_name=f"Update {self._identifier}", + timeout=timeout) + + def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + """ + Restarts the virtual machine or scale set + """ + self._execute_async_operation( + lambda: self._compute_client.virtual_machines.begin_restart( + resource_group_name=self._identifier.resource_group, + vm_name=self._identifier.name), + operation_name=f"Restart {self._identifier}", + timeout=timeout) + + def __str__(self): + return f"{self._identifier}" + + + + diff --git a/tests_e2e/tests/lib/vm_extension.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py similarity index 59% rename from tests_e2e/tests/lib/vm_extension.py rename to tests_e2e/tests/lib/virtual_machine_extension_client.py index bf7a41a44d..d94226e6ea 100644 --- a/tests_e2e/tests/lib/vm_extension.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -16,37 +16,29 @@ # # -# This module includes facilities to execute VM extension operations (enable, remove, etc) on single virtual machines (using -# class VmExtension) or virtual machine scale sets (using class VmssExtension). +# This module includes facilities to execute VM extension operations (enable, remove, etc). # import uuid -from abc import ABC, abstractmethod from assertpy import assert_that, soft_assertions -from typing import Any, Callable, Dict, Type +from typing import Any, Callable, Dict -from azure.core.polling import LROPoller from azure.mgmt.compute import ComputeManagementClient -from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineScaleSetExtension, VirtualMachineExtensionInstanceView +from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineExtensionInstanceView from azure.identity import DefaultAzureCredential from msrestazure.azure_cloud import Cloud from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.azure_client import AzureClient from tests_e2e.tests.lib.identifiers import VmIdentifier, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry -_TIMEOUT = 5 * 60 # Timeout for extension operations (in seconds) - - -class _VmExtensionBaseClass(ABC): +class VirtualMachineExtensionClient(AzureClient): """ - Abstract base class for VmExtension and VmssExtension. - - Implements the operations that are common to virtual machines and scale sets. Derived classes must provide the specific types and methods for the - virtual machine or scale set. + Client for operations virtual machine extensions. """ def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_name: str): super().__init__() @@ -61,18 +53,32 @@ def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_ base_url=cloud.endpoints.resource_manager, credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + def get_instance_view(self) -> VirtualMachineExtensionInstanceView: + """ + Retrieves the instance view of the extension + """ + log.info("Retrieving instance view for %s...", self._identifier) + + return execute_with_retry(lambda: self._compute_client.virtual_machine_extensions.get( + resource_group_name=self._vm.resource_group, + vm_name=self._vm.name, + vm_extension_name=self._resource_name, + expand="instanceView" + ).instance_view) + def enable( self, settings: Dict[str, Any] = None, protected_settings: Dict[str, Any] = None, auto_upgrade_minor_version: bool = True, force_update: bool = False, - force_update_tag: str = None + force_update_tag: str = None, + timeout: int = AzureClient._DEFAULT_TIMEOUT ) -> None: """ Performs an enable operation on the extension. - NOTE: 'force_update' is not a parameter of the actual ARM API. It is provided for convenience: If set to True, + NOTE: 'force_update' is not a parameter of the actual ARM API. It is provided here for convenience: If set to True, the 'force_update_tag' can be left unspecified and this method will generate a random tag. """ if force_update_tag is not None and not force_update: @@ -81,7 +87,7 @@ def enable( if force_update and force_update_tag is None: force_update_tag = str(uuid.uuid4()) - extension_parameters = self._ExtensionType( + extension_parameters = VirtualMachineExtension( publisher=self._identifier.publisher, location=self._vm.location, type_properties_type=self._identifier.type, @@ -99,28 +105,28 @@ def enable( # Now set the actual protected settings before invoking the extension extension_parameters.protected_settings = protected_settings - result: VirtualMachineExtension = execute_with_retry( - lambda: self._begin_create_or_update( + result: VirtualMachineExtension = self._execute_async_operation( + lambda: self._compute_client.virtual_machine_extensions.begin_create_or_update( self._vm.resource_group, self._vm.name, self._resource_name, - extension_parameters - ).result(timeout=_TIMEOUT)) + extension_parameters), + operation_name=f"Enable {self._identifier}", + timeout=timeout) - log.info("Enable completed. Provisioning state: %s", result.provisioning_state) + log.info("Provisioning state: %s", result.provisioning_state) - def get_instance_view(self) -> VirtualMachineExtensionInstanceView: # TODO: Check type for scale sets + def delete(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: """ - Retrieves the instance view of the extension + Performs a delete operation on the extension """ - log.info("Retrieving instance view for %s...", self._identifier) - - return execute_with_retry(lambda: self._get( - resource_group_name=self._vm.resource_group, - vm_name=self._vm.name, - vm_extension_name=self._resource_name, - expand="instanceView" - ).instance_view) + self._execute_async_operation( + lambda: self._compute_client.virtual_machine_extensions.begin_delete( + self._vm.resource_group, + self._vm.name, + self._resource_name), + operation_name=f"Delete {self._identifier}", + timeout=timeout) def assert_instance_view( self, @@ -157,89 +163,9 @@ def assert_instance_view( log.info("The instance view matches the expected values") - @abstractmethod - def delete(self) -> None: - """ - Performs a delete operation on the extension - """ - - @property - @abstractmethod - def _ExtensionType(self) -> Type: - """ - Type of the extension object for the virtual machine or scale set (i.e. VirtualMachineExtension or VirtualMachineScaleSetExtension) - """ - - @property - @abstractmethod - def _begin_create_or_update(self) -> Callable[[str, str, str, Any], LROPoller[Any]]: # "Any" can be VirtualMachineExtension or VirtualMachineScaleSetExtension - """ - The begin_create_or_update method for the virtual machine or scale set extension - """ - - @property - @abstractmethod - def _get(self) -> Any: # VirtualMachineExtension or VirtualMachineScaleSetExtension - """ - The get method for the virtual machine or scale set extension - """ - def __str__(self): return f"{self._identifier}" -class VmExtension(_VmExtensionBaseClass): - """ - Extension operations on a single virtual machine. - """ - @property - def _ExtensionType(self) -> Type: - return VirtualMachineExtension - - @property - def _begin_create_or_update(self) -> Callable[[str, str, str, VirtualMachineExtension], LROPoller[VirtualMachineExtension]]: - return self._compute_client.virtual_machine_extensions.begin_create_or_update - @property - def _get(self) -> VirtualMachineExtension: - return self._compute_client.virtual_machine_extensions.get - - def delete(self) -> None: - log.info("Deleting %s", self._identifier) - - execute_with_retry(lambda: self._compute_client.virtual_machine_extensions.begin_delete( - self._vm.resource_group, - self._vm.name, - self._resource_name - ).wait(timeout=_TIMEOUT)) - - -class VmssExtension(_VmExtensionBaseClass): - """ - Extension operations on virtual machine scale sets. - """ - @property - def _ExtensionType(self) -> Type: - return VirtualMachineScaleSetExtension - - @property - def _begin_create_or_update(self) -> Callable[[str, str, str, VirtualMachineScaleSetExtension], LROPoller[VirtualMachineScaleSetExtension]]: - return self._compute_client.virtual_machine_scale_set_extensions.begin_create_or_update - - @property - def _get(self) -> VirtualMachineScaleSetExtension: - return self._compute_client.virtual_machine_scale_set_extensions.get - - def delete(self) -> None: # TODO: Implement this method - raise NotImplementedError() - - def delete_from_instance(self, instance_id: str) -> None: - log.info("Deleting %s from scale set instance %s", self._identifier, instance_id) - - execute_with_retry(lambda: self._compute_client.virtual_machine_scale_set_vm_extensions.begin_delete( - resource_group_name=self._vm.resource_group, - vm_scale_set_name=self._vm.name, - vm_extension_name=self._resource_name, - instance_id=instance_id - ).wait(timeout=_TIMEOUT)) From 841ee1ddb1e3c7c2d90a6e3461c3802e5c0dbd90 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 24 May 2023 16:08:59 -0700 Subject: [PATCH 014/240] Fixes for end-to-end tests (#2827) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 +- tests_e2e/orchestrator/scripts/install-agent | 13 +++++++++---- tests_e2e/tests/lib/agent_log.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 2e84c59a3f..98dd58583a 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -349,7 +349,7 @@ def _setup_node(self) -> None: # log.info('Installing tools on the test node') command = f"tar xf {target_path/tarball_path.name} && ~/bin/install-tools" - log.info("%s\n%s", command, self.context.ssh_client.run_command(command)) + log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) if self.context.is_vhd: log.info("Using a VHD; will not install the Test Agent.") diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 513487b211..e7f78cb349 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -77,18 +77,18 @@ fi python=$(get-agent-python) waagent=$(get-agent-bin-path) -echo "============================================================" +echo "========== Initial Status ==========" echo "Service Name: $service_name" echo "Agent Path: $waagent" echo "Agent Version:" $python "$waagent" --version echo "Service Status:" service-status $service_name -echo "============================================================" # # Install the package # +echo "========== Installing Agent ==========" echo "Installing $package as version $version..." unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version" @@ -116,7 +116,8 @@ service-start $service_name echo "Verifying agent installation..." check-version() { - for i in {0..5} + # We need to wait for the extension handler to start, give it a couple of minutes + for i in {1..12} do if $python "$waagent" --version | grep -E "Goal state agent:\s+$version" > /dev/null; then return 0 @@ -131,10 +132,14 @@ if check-version "$version"; then printf "\nThe agent was installed successfully\n" exit_code=0 else - printf "\nFailed to install agent.\n" + printf "************************************\n" + printf " * ERROR: Failed to install agent. *\n" + printf "************************************\n" exit_code=1 fi +printf "\n" +echo "========== Final Status ==========" $python "$waagent" --version printf "\n" service-status $service_name diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 657b729282..e958d34d54 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -393,7 +393,7 @@ def matches_ignore_rule(record: AgentLogRecord, ignore_rules: List[Dict[str, Any # Extension: 2021/03/30 19:45:31 Azure Monitoring Agent for Linux started to handle. # 2021/03/30 19:45:31 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.7.0] cwd is /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.7.0 # - _NEWER_AGENT_RECORD = re.compile(r'(?P[\d-]+T[\d:.]+Z)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P\S+)\s(?P(Daemon)|(ExtHandler)|(\[\S+\]))\s(?P.*)') + _NEWER_AGENT_RECORD = re.compile(r'(?P[\d-]+T[\d:.]+Z)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P\S+)\s(?P(Daemon)|(ExtHandler)|(LogCollector)|(\[\S+\]))\s(?P.*)') _2_2_46_AGENT_RECORD = re.compile(r'(?P[\d-]+T[\d:.]+Z)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?PDaemon|ExtHandler|\[\S+\])\s(?P.*)') _OLDER_AGENT_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?P\S*)\s(?P.*)') _EXTENSION_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?P)(?P)((?P\[[^\]]+\])\s)?(?P.*)') From c864b458e421fa6ff6f9b8dbcb0b5a592fc4133e Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 26 May 2023 12:28:40 -0700 Subject: [PATCH 015/240] Add test for osProfile.linuxConfiguration.provisionVMAgent (#2826) * Add test for osProfile.linuxConfiguration.provisionVMAgent * add files * pylint * added messages * ssh issue --------- Co-authored-by: narrieta --- azurelinuxagent/daemon/main.py | 5 +- .../orchestrator/lib/agent_test_loader.py | 11 ++- .../orchestrator/lib/agent_test_suite.py | 23 +++-- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/orchestrator/scripts/install-agent | 2 +- tests_e2e/pipeline/pipeline.yml | 2 +- .../test_suites/agent_not_provisioned.yml | 12 +++ .../test_suites/no_outbound_connections.yml | 2 +- .../agent_not_provisioned.py | 90 +++++++++++++++++++ .../tests/agent_not_provisioned/template.py | 86 ++++++++++++++++++ tests_e2e/tests/extensions_disabled.py | 6 ++ tests_e2e/tests/lib/agent_log.py | 9 ++ .../{nsg_template.py => template.py} | 0 13 files changed, 233 insertions(+), 17 deletions(-) create mode 100644 tests_e2e/test_suites/agent_not_provisioned.yml create mode 100755 tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py create mode 100755 tests_e2e/tests/agent_not_provisioned/template.py rename tests_e2e/tests/no_outbound_connections/{nsg_template.py => template.py} (100%) diff --git a/azurelinuxagent/daemon/main.py b/azurelinuxagent/daemon/main.py index 1eb58ec99b..d051af202b 100644 --- a/azurelinuxagent/daemon/main.py +++ b/azurelinuxagent/daemon/main.py @@ -105,9 +105,8 @@ def sleep_if_disabled(self): agent_disabled_file_path = conf.get_disable_agent_file_path() if os.path.exists(agent_disabled_file_path): import threading - logger.warn("Disabling the guest agent by sleeping forever; " - "to re-enable, remove {0} and restart" - .format(agent_disabled_file_path)) + logger.warn("Disabling the guest agent by sleeping forever; to re-enable, remove {0} and restart".format(agent_disabled_file_path)) + logger.warn("To enable VM extensions, also ensure that the VM's osProfile.allowExtensionOperations property is set to true.") self.running = False disable_event = threading.Event() disable_event.wait() diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index fcfd35ae3c..a2576f9b6b 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -57,6 +57,8 @@ class TestSuiteInfo(object): location: str # Whether this suite must run on its own test VM owns_vm: bool + # Whether to install the test Agent on the test VM + install_test_agent: bool # Customization for the ARM template used when creating the test VM template: str @@ -170,7 +172,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: """ Loads the description of a TestSuite from its YAML file. - A test suite has 5 properties: name, tests, images, location, and owns_vm. For example: + A test suite is described by the properties listed below. Sample test suite: name: "AgentBvt" tests: @@ -180,6 +182,8 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: images: "endorsed" location: "eastuseaup" owns_vm: true + install_test_agent: true + template: "bvts/template.py" * name - A string used to identify the test suite * tests - A list of the tests in the suite. Each test can be specified by a string (the path for its source code relative to @@ -199,6 +203,9 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: This is useful for suites that modify the test VMs in such a way that the setup may cause problems in other test suites (for example, some tests targeted to the HGAP block internet access in order to force the agent to use the HGAP). + * install_test_agent - [Optional; boolean] By default the setup process installs the test Agent on the test VMs; set this property + to False to skip the installation. + * template - [Optional; string] If given, the ARM template for the test VM is customized using the given Python module. """ test_suite: Dict[str, Any] = AgentTestLoader._load_file(description_file) @@ -232,7 +239,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: test_suite_info.location = "" test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] - + test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"] test_suite_info.template = test_suite.get("template", "") return test_suite_info diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 98dd58583a..24f7525b19 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -275,7 +275,7 @@ def _clean_up(self) -> None: Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use. """ - def _setup_node(self) -> None: + def _setup_node(self, install_test_agent: bool) -> None: """ Prepares the remote node for executing the test suite (installs tools and the test agent, etc) """ @@ -317,11 +317,14 @@ def _setup_node(self) -> None: tarball_path: Path = Path("/tmp/waagent.tar") log.info("Creating %s with the files need on the test node", tarball_path) log.info("Adding orchestrator/scripts") - run_command(['tar', 'cvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"orchestrator"/"scripts"), '.']) - # log.info("Adding tests/scripts") - # run_command(['tar', 'rvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"tests"/"scripts"), '.']) + command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) + log.info("Adding tests/scripts") + # command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) + # log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Adding tests/lib") - run_command(['tar', 'rvf', str(tarball_path), '--transform=s,^,lib/,', '-C', str(self.context.test_source_directory.parent), '--exclude=__pycache__', 'tests_e2e/tests/lib']) + command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)])) # @@ -353,6 +356,8 @@ def _setup_node(self) -> None: if self.context.is_vhd: log.info("Using a VHD; will not install the Test Agent.") + elif not install_test_agent: + log.info("Will not install the Test Agent per the test suite configuration.") else: log.info("Installing the Test Agent on the test node") command = f"install-agent --package ~/tmp/{agent_package_path.name} --version {AGENT_VERSION}" @@ -424,10 +429,12 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]): self._setup() if not self.context.skip_setup: - self._setup_node() + # pylint seems to think self.context.test_suites is not iterable. Suppressing this warning here and a few lines below, since + # its type is List[AgentTestSuite]. + # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) + install_test_agent = all([suite.install_test_agent for suite in self.context.test_suites]) # pylint: disable=E1133 + self._setup_node(install_test_agent) - # pylint seems to think self.context.test_suites is not iterable. Suppressing warning, since its type is List[AgentTestSuite] - # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) for suite in self.context.test_suites: # pylint: disable=E1133 log.info("Executing test suite %s", suite.name) self.context.lisa_log.info("Executing Test Suite %s", suite.name) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index eb4ee2e996..6b0c46e75e 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index e7f78cb349..0811ebe920 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -129,7 +129,7 @@ check-version() { } if check-version "$version"; then - printf "\nThe agent was installed successfully\n" + printf "The agent was installed successfully\n" exit_code=0 else printf "************************************\n" diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 99d48d99ce..9a1cd0e4e7 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections, extensions_disabled + default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition diff --git a/tests_e2e/test_suites/agent_not_provisioned.yml b/tests_e2e/test_suites/agent_not_provisioned.yml new file mode 100644 index 0000000000..07dd5ccce2 --- /dev/null +++ b/tests_e2e/test_suites/agent_not_provisioned.yml @@ -0,0 +1,12 @@ +# +# Disables Agent provisioning using osProfile.linuxConfiguration.provisionVMAgent and verifies that the agent is disabled +# and extension operations are not allowed. +# +name: "AgentNotProvisioned" +tests: + - "agent_not_provisioned/agent_not_provisioned.py" +images: "random(endorsed)" +template: "agent_not_provisioned/template.py" +owns_vm: true +install_test_agent: false + diff --git a/tests_e2e/test_suites/no_outbound_connections.yml b/tests_e2e/test_suites/no_outbound_connections.yml index 6cf6c490f7..eaf6268860 100644 --- a/tests_e2e/test_suites/no_outbound_connections.yml +++ b/tests_e2e/test_suites/no_outbound_connections.yml @@ -16,5 +16,5 @@ tests: - "bvts/vm_access.py" - "no_outbound_connections/check_fallback_to_hgap.py" images: "random(endorsed)" -template: "no_outbound_connections/nsg_template.py" +template: "no_outbound_connections/template.py" owns_vm: true diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py new file mode 100755 index 0000000000..45ff903c39 --- /dev/null +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from assertpy import fail, assert_that +from typing import Any, Dict, List + +from azure.mgmt.compute.models import VirtualMachineInstanceView + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class AgentNotProvisioned(AgentTest): + """ + When osProfile.linuxConfiguration.provisionVMAgent is set to 'false', this test verifies that + the agent is disabled and that extension operations are not allowed. + """ + def run(self): + # + # Check the agent's log for the messages that indicate it is disabled. + # + ssh_client: SshClient = self._context.create_ssh_client() + + log.info("Checking the Agent's log to verify that it is disabled.") + try: + output = ssh_client.run_command(""" + grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \ + grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log + """) + log.info("The Agent is disabled, log message: [%s]", output.rstrip()) + except CommandError as e: + fail(f"The agent's log does not contain the expected messages: {e}") + + # + # Validate that the agent is not reporting status. + # + log.info("Verifying that the Agent status is 'Not Ready' (i.e. it is not reporting status).") + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + instance_view: VirtualMachineInstanceView = vm.get_instance_view() + log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) + assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) + assert_that(instance_view.vm_agent.statuses[0].code).described_as("The VM Agent should not be available").is_equal_to('ProvisioningState/Unavailable') + assert_that(instance_view.vm_agent.statuses[0].display_status).described_as("The VM Agent should not ready").is_equal_to('Not Ready') + log.info("The Agent status is 'Not Ready'") + + # + # Validate that extensions cannot be executed. + # + log.info("Verifying that extension processing is disabled.") + log.info("Executing CustomScript; it should fail.") + custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + try: + custom_script.enable(settings={'commandToExecute': "date"}, force_update=True, timeout=20 * 60) + fail("CustomScript should have failed") + except Exception as error: + assert_that("OperationNotAllowed" in str(error)) \ + .described_as(f"Expected an OperationNotAllowed: {error}") \ + .is_true() + log.info("CustomScript failed, as expected: %s", error) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + return [ + {'message': 'Disabling guest agent in accordance with ovf-env.xml'}, + {'message': 'Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart'} + ] + + +if __name__ == "__main__": + AgentNotProvisioned.run_from_command_line() + diff --git a/tests_e2e/tests/agent_not_provisioned/template.py b/tests_e2e/tests/agent_not_provisioned/template.py new file mode 100755 index 0000000000..c45b69f295 --- /dev/null +++ b/tests_e2e/tests/agent_not_provisioned/template.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Any + + +def update_arm_template(template: Any) -> None: + """ + Customizes the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false. + """ + # + # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is + # under the 'lisa' namespace. + # + # "getLinuxConfiguration": { + # "parameters": [ + # { + # "name": "keyPath", + # "type": "string" + # }, + # { + # "name": "publicKeyData", + # "type": "string" + # } + # ], + # "output": { + # "type": "object", + # "value": { + # "disablePasswordAuthentication": true, + # "ssh": { + # "publicKeys": [ + # { + # "path": "[parameters('keyPath')]", + # "keyData": "[parameters('publicKeyData')]" + # } + # ] + # }, + # "provisionVMAgent": true + # } + # } + # } + # + # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True, + # where template['functions'][i] is the 'lisa' namespace. + # + functions = template.get("functions") + if functions is None: + raise Exception('Cannot find "functions" in the LISA template.') + for namespace in functions: + name = namespace.get("namespace") + if name is None: + raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') + if name == "lisa": + members = namespace.get('members') + if members is None: + raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') + get_linux_configuration = members.get('getLinuxConfiguration') + if get_linux_configuration is None: + raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}') + output = get_linux_configuration.get('output') + if output is None: + raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}') + value = output.get('value') + if value is None: + raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}") + value['provisionVMAgent'] = False + break + else: + raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}') + diff --git a/tests_e2e/tests/extensions_disabled.py b/tests_e2e/tests/extensions_disabled.py index 98f74dc361..1ec9e58da3 100755 --- a/tests_e2e/tests/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled.py @@ -26,6 +26,7 @@ import pytz from assertpy import assert_that, fail +from typing import Any, Dict, List from azure.mgmt.compute.models import VirtualMachineInstanceView @@ -81,6 +82,11 @@ def run(self): .is_greater_than(pytz.utc.localize(disabled_timestamp)) log.info("The VM Agent reported status after extensions were disabled, as expected.") + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + return [ + {'message': 'No handler status found for Microsoft.Azure.Extensions.CustomScript'}, + ] + if __name__ == "__main__": ExtensionsDisabled.run_from_command_line() diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index e958d34d54..27953a0f17 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -209,6 +209,15 @@ def get_errors(self) -> List[AgentLogRecord]: 'if': lambda r: DISTRO_NAME == 'ubuntu' and DISTRO_VERSION >= '22.00' }, # + # Old daemons can produce this message + # + # 2023-05-24T18:04:27.467009Z WARNING Daemon Daemon Could not mount cgroups: [Errno 1] Operation not permitted: '/sys/fs/cgroup/cpu,cpuacct' -> '/sys/fs/cgroup/cpu' + # + { + 'message': r"Could not mount cgroups: \[Errno 1\] Operation not permitted", + 'if': lambda r: r.prefix == 'Daemon' + }, + # # 2022-02-09T04:50:37.384810Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: 2bed9b62-188e-4668-b1a8-87c35cfa4927 eTag: 7031887032544600793]: [Internal error in HostGAPlugin] [HTTP Failed] [502: Bad Gateway] b'{ "errorCode": "VMArtifactsProfileBlobContentNotFound", "message": "VM artifacts profile blob has no content in it.", "details": ""}' # # Fetching the goal state may catch the HostGAPlugin in the process of computing the vmSettings. This can be ignored, if the issue persist the log would include other errors as well. diff --git a/tests_e2e/tests/no_outbound_connections/nsg_template.py b/tests_e2e/tests/no_outbound_connections/template.py similarity index 100% rename from tests_e2e/tests/no_outbound_connections/nsg_template.py rename to tests_e2e/tests/no_outbound_connections/template.py From ded1f10fd58210f7514eb7884065a5d7905317da Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 26 May 2023 12:35:48 -0700 Subject: [PATCH 016/240] Enable suppression rules for waagent.log (#2829) Co-authored-by: narrieta --- tests_e2e/tests/lib/agent_log.py | 33 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 27953a0f17..0816033e4d 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -92,14 +92,6 @@ def get_errors(self) -> List[AgentLogRecord]: # # NOTE: This list was taken from the older agent tests and needs to be cleaned up. Feel free to un-comment rules as new tests are added. # - # # This warning is expected on CentOS/RedHat 7.4, 7.8 and Redhat 7.6 - # { - # 'message': r"Move rules file 70-persistent-net.rules to /var/lib/waagent/70-persistent-net.rules", - # 'if': lambda r: - # re.match(r"(((centos|redhat)7\.[48])|(redhat7\.6)|(redhat8\.2))\D*", DISTRO_NAME, flags=re.IGNORECASE) is not None - # and r.level == "WARNING" - # and r.prefix == "ExtHandler" and r.thread in ("", "EnvHandler") - # }, # # This warning is expected on SUSE 12 # { # 'message': r"WARNING EnvHandler ExtHandler Move rules file 75-persistent-net-generator.rules to /var/lib/waagent/75-persistent-net-generator.rules", @@ -146,15 +138,13 @@ def get_errors(self) -> List[AgentLogRecord]: # and r.level == "ERROR" # and r.prefix == "Daemon" # }, - # # - # # 2022-01-20T06:52:21.515447Z WARNING Daemon Daemon Fetch failed: [HttpError] [HTTP Failed] GET https://dcrgajhx62.blob.core.windows.net/$system/edprpwqbj6.5c2ddb5b-d6c3-4d73-9468-54419ca87a97.vmSettings -- IOError timed out -- 6 attempts made - # # - # # The daemon does not need the artifacts profile blob, but the request is done as part of protocol initialization. This timeout can be ignored, if the issue persist the log would include additional instances. - # # - # { - # 'message': r"\[HTTP Failed\] GET https://.*\.vmSettings -- IOError timed out", - # 'if': lambda r: r.level == "WARNING" and r.prefix == "Daemon" - # }, + # + # The environment thread performs this operation periodically + # + { + 'message': r"Move rules file 70-persistent-net.rules to /var/lib/waagent/70-persistent-net.rules", + 'if': lambda r: r.level == "WARNING" + }, # # Probably the agent should log this as INFO, but for now it is a warning # e.g. @@ -218,6 +208,15 @@ def get_errors(self) -> List[AgentLogRecord]: 'if': lambda r: r.prefix == 'Daemon' }, # + # The daemon does not need the artifacts profile blob, but the request is done as part of protocol initialization. This timeout can be ignored, if the issue persist the log would include additional instances. + # + # 2022-01-20T06:52:21.515447Z WARNING Daemon Daemon Fetch failed: [HttpError] [HTTP Failed] GET https://dcrgajhx62.blob.core.windows.net/$system/edprpwqbj6.5c2ddb5b-d6c3-4d73-9468-54419ca87a97.vmSettings -- IOError timed out -- 6 attempts made + # + { + 'message': r"\[HTTP Failed\] GET https://.*\.vmSettings -- IOError timed out", + 'if': lambda r: r.level == "WARNING" and r.prefix == "Daemon" + }, + # # 2022-02-09T04:50:37.384810Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: 2bed9b62-188e-4668-b1a8-87c35cfa4927 eTag: 7031887032544600793]: [Internal error in HostGAPlugin] [HTTP Failed] [502: Bad Gateway] b'{ "errorCode": "VMArtifactsProfileBlobContentNotFound", "message": "VM artifacts profile blob has no content in it.", "details": ""}' # # Fetching the goal state may catch the HostGAPlugin in the process of computing the vmSettings. This can be ignored, if the issue persist the log would include other errors as well. From 82d486f07c4107288e7e4e93227a44a2c9061fab Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 5 Jun 2023 09:37:58 -0700 Subject: [PATCH 017/240] Wait for service start when setting up test VMs; collect VM logs when setup fails (#2830) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_suite.py | 6 +++++- tests_e2e/orchestrator/scripts/install-agent | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 24f7525b19..3cd91ba4cb 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -433,7 +433,11 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]): # its type is List[AgentTestSuite]. # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) install_test_agent = all([suite.install_test_agent for suite in self.context.test_suites]) # pylint: disable=E1133 - self._setup_node(install_test_agent) + try: + self._setup_node(install_test_agent) + except: + test_suite_success = False + raise for suite in self.context.test_suites: # pylint: disable=E1133 log.info("Executing test suite %s", suite.name) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 0811ebe920..4b0c8f2497 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -83,7 +83,22 @@ echo "Agent Path: $waagent" echo "Agent Version:" $python "$waagent" --version echo "Service Status:" -service-status $service_name + +# Sometimes the service can take a while to start; give it a few minutes, +started=false +for i in {1..6} +do + if service-status $service_name; then + started=true + break + fi + echo "Waiting for service to start..." + sleep 30 +done +if [ $started == false ]; then + echo "Service failed to start." + exit 1 +fi # # Install the package From cce96659ca08bca24153d5ca935840ed0d0e617a Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 6 Jun 2023 14:18:31 -0700 Subject: [PATCH 018/240] Add vm arch to heartbeat telemetry (#2818) (#2838) * Add VM Arch to heartbeat telemetry * Remove outdated vmsize heartbeat tesT * Remove unused import * Use platform to get vmarch (cherry picked from commit 66e8b3d782fdf2ebc443212bbb731a89599201f6) --- azurelinuxagent/ga/update.py | 12 ++++++---- tests/ga/test_update.py | 43 ------------------------------------ 2 files changed, 8 insertions(+), 47 deletions(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index cd758b972a..2b0975b05b 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -19,6 +19,7 @@ import glob import json import os +import platform import re import shutil import signal @@ -462,6 +463,9 @@ def _get_vm_size(self, protocol): return self._vm_size + def _get_vm_arch(self): + return platform.machine() + def _check_daemon_running(self, debug): # Check that the parent process (the agent's daemon) is still running if not debug and self._is_orphaned: @@ -1265,13 +1269,13 @@ def _send_heartbeat_telemetry(self, protocol): if datetime.utcnow() >= (self._last_telemetry_heartbeat + UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD): dropped_packets = self.osutil.get_firewall_dropped_packets(protocol.get_endpoint()) auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 - # Include VMSize in the heartbeat message because the kusto table does not have - # a separate column for it (or architecture). - vmsize = self._get_vm_size(protocol) + # Include vm architecture in the heartbeat message because the kusto table does not have + # a separate column for it. + vmarch = self._get_vm_arch() telemetry_msg = "{0};{1};{2};{3};{4};{5}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, - auto_update_enabled, vmsize) + auto_update_enabled, vmarch) debug_log_msg = "[DEBUG HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ "UpdateGSErrors: {3};AutoUpdate: {4}]".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 1b84d6f1c4..e5f15fbd07 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -20,7 +20,6 @@ from datetime import datetime, timedelta from threading import current_thread -from azurelinuxagent.common.protocol.imds import ComputeInfo from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -2773,48 +2772,6 @@ def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_ self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") - @patch("azurelinuxagent.ga.update.add_event") - @patch("azurelinuxagent.common.protocol.imds.ImdsClient") - def test_telemetry_heartbeat_retries_failed_vm_size_fetch(self, mock_imds_factory, patch_add_event, *_): - - def validate_single_heartbeat_event_matches_vm_size(vm_size): - heartbeat_event_kwargs = [ - kwargs for _, kwargs in patch_add_event.call_args_list - if kwargs.get('op', None) == WALAEventOperation.HeartBeat - ] - - self.assertEqual(1, len(heartbeat_event_kwargs), "Expected exactly one HeartBeat event, got {0}"\ - .format(heartbeat_event_kwargs)) - - telemetry_message = heartbeat_event_kwargs[0].get("message", "") - self.assertTrue(telemetry_message.endswith(vm_size), - "Expected HeartBeat message ('{0}') to end with the test vmSize value, {1}."\ - .format(telemetry_message, vm_size)) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=mock_protocol) - - # Zero out the _vm_size parameter for test resiliency - update_handler._vm_size = None - - mock_imds_client = mock_imds_factory.return_value = Mock() - - # First force a vmSize retrieval failure - mock_imds_client.get_compute.side_effect = HttpError(msg="HTTP Test Failure") - update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) - - validate_single_heartbeat_event_matches_vm_size("unknown") - patch_add_event.reset_mock() - - # Now provide a vmSize - mock_imds_client.get_compute = lambda: ComputeInfo(vmSize="TestVmSizeValue") - update_handler._last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) - - validate_single_heartbeat_event_matches_vm_size("TestVmSizeValue") - class AgentMemoryCheckTestCase(AgentTestCase): From f9dcda9c960182f7b91feababf37fe58580bc34e Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 8 Jun 2023 09:29:42 -0700 Subject: [PATCH 019/240] Add regular expression to match logs from very old agents (#2839) Co-authored-by: narrieta --- tests_e2e/tests/lib/agent_log.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 0816033e4d..6094e033e7 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -398,12 +398,15 @@ def matches_ignore_rule(record: AgentLogRecord, ignore_rules: List[Dict[str, Any # # Older Agent: 2021/03/30 19:35:35.971742 INFO Daemon Azure Linux Agent Version:2.2.45 # + # Oldest Agent: 2023/06/07 08:04:35.336313 WARNING Disabling guest agent in accordance with ovf-env.xml + # # Extension: 2021/03/30 19:45:31 Azure Monitoring Agent for Linux started to handle. # 2021/03/30 19:45:31 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.7.0] cwd is /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.7.0 # _NEWER_AGENT_RECORD = re.compile(r'(?P[\d-]+T[\d:.]+Z)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P\S+)\s(?P(Daemon)|(ExtHandler)|(LogCollector)|(\[\S+\]))\s(?P.*)') _2_2_46_AGENT_RECORD = re.compile(r'(?P[\d-]+T[\d:.]+Z)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?PDaemon|ExtHandler|\[\S+\])\s(?P.*)') - _OLDER_AGENT_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?P\S*)\s(?P.*)') + _OLDER_AGENT_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?PDaemon|ExtHandler)\s(?P.*)') + _OLDEST_AGENT_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?PVERBOSE|INFO|WARNING|ERROR)\s(?P)(?P)(?P.*)') _EXTENSION_RECORD = re.compile(r'(?P[\d/]+\s[\d:.]+)\s(?P)(?P)((?P\[[^\]]+\])\s)?(?P.*)') def read(self) -> Iterable[AgentLogRecord]: @@ -420,7 +423,7 @@ def read(self) -> Iterable[AgentLogRecord]: raise IOError('{0} does not exist'.format(self._path)) def match_record(): - for regex in [self._NEWER_AGENT_RECORD, self._2_2_46_AGENT_RECORD, self._OLDER_AGENT_RECORD]: + for regex in [self._NEWER_AGENT_RECORD, self._2_2_46_AGENT_RECORD, self._OLDER_AGENT_RECORD, self._OLDEST_AGENT_RECORD]: m = regex.match(line) if m is not None: return m From 9cd9ed898ef1b7f08c4395f79b43ff95b186cb3b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 12 Jun 2023 16:24:44 -0700 Subject: [PATCH 020/240] Increase concurrency level for end-to-end tests (#2841) Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 6b0c46e75e..f0caabdac6 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -139,7 +139,7 @@ combinator: location: $(location) vm_size: $(vm_size) -concurrency: 16 +concurrency: 32 notifier: - type: agent.junit From 37a014b277a79341c363800d86405ab599240d7d Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:17:32 -0700 Subject: [PATCH 021/240] Agent update refactor supports GA versioning (#2810) * agent update refactor (#2706) * agent update refactor * address PR comments * updated available agents * fix pylint warn * updated test case warning * added kill switch flag * fix pylint warning * move last update attempt variables * report GA versioning supported feature. (#2752) * control agent updates in e2e tests and fix uts (#2743) * disable agent updates in dcr and fix uts * address comments * fix uts * report GA versioning feature * Don't report SF flag idf auto update is disabled (#2754) * fix uts (#2759) * agent versioning test_suite (#2770) * agent versioning test_suite * address PR comments * fix pylint warning * fix update assertion * fix pylint error * logging manifest type and don't log same error until next period in agent update. (#2778) * improve logging and don't log same error until next period * address comments * update comment * update comment * Added self-update time window. (#2794) * Added self-update time window * address comment * Wait and retry for rsm goal state (#2801) * wait for rsm goal state * address comments * Not sharing agent update tests vms and added scenario to daily run (#2809) * add own vm property * add agent_update to daily run * merge conflicts * address comments * address comments * additional comments addressed * fix pylint warning --- .../common/agent_supported_feature.py | 23 +- azurelinuxagent/common/conf.py | 12 +- azurelinuxagent/common/protocol/goal_state.py | 2 +- azurelinuxagent/common/protocol/wire.py | 4 +- azurelinuxagent/ga/agent_update_handler.py | 338 ++++++ azurelinuxagent/ga/guestagent.py | 316 ++++++ azurelinuxagent/ga/update.py | 679 +----------- makepkg.py | 5 +- tests/common/test_agent_supported_feature.py | 18 + tests/data/wire/ext_conf_missing_family.xml | 19 - ...requested_version_missing_in_manifest.xml} | 0 tests/data/wire/ga_manifest_no_uris.xml | 39 + tests/ga/mocks.py | 73 +- tests/ga/test_agent_update_handler.py | 372 +++++++ tests/ga/test_extension.py | 2 +- tests/ga/test_guestagent.py | 309 ++++++ tests/ga/test_report_status.py | 12 +- tests/ga/test_update.py | 964 +++++------------- tests/protocol/mockwiredata.py | 6 + tests/protocol/test_wire.py | 91 +- tests/test_agent.py | 1 + .../orchestrator/lib/agent_test_suite.py | 4 +- tests_e2e/orchestrator/scripts/install-agent | 22 +- .../orchestrator/scripts/waagent-version | 25 + tests_e2e/pipeline/pipeline.yml | 3 +- tests_e2e/test_suites/agent_update.yml | 6 + tests_e2e/tests/agent_update/__init__.py | 0 tests_e2e/tests/agent_update/rsm_update.py | 198 ++++ tests_e2e/tests/lib/retry.py | 20 + .../tests/scripts/modify-agent-version-config | 33 + .../scripts/verify_agent_supported_feature.py | 53 + .../tests/scripts/wait_for_rsm_goal_state.py | 74 ++ 32 files changed, 2272 insertions(+), 1451 deletions(-) create mode 100644 azurelinuxagent/ga/agent_update_handler.py create mode 100644 azurelinuxagent/ga/guestagent.py rename tests/data/wire/{ext_conf_missing_requested_version.xml => ext_conf_requested_version_missing_in_manifest.xml} (100%) create mode 100644 tests/data/wire/ga_manifest_no_uris.xml create mode 100644 tests/ga/test_agent_update_handler.py create mode 100644 tests/ga/test_guestagent.py create mode 100755 tests_e2e/orchestrator/scripts/waagent-version create mode 100644 tests_e2e/test_suites/agent_update.yml create mode 100644 tests_e2e/tests/agent_update/__init__.py create mode 100644 tests_e2e/tests/agent_update/rsm_update.py create mode 100755 tests_e2e/tests/scripts/modify-agent-version-config create mode 100755 tests_e2e/tests/scripts/verify_agent_supported_feature.py create mode 100755 tests_e2e/tests/scripts/wait_for_rsm_goal_state.py diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index d7f93e2245..c3e83c5142 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -14,6 +14,7 @@ # # Requires Python 2.6+ and Openssl 1.0+ # +from azurelinuxagent.common import conf class SupportedFeatureNames(object): @@ -23,6 +24,7 @@ class SupportedFeatureNames(object): MultiConfig = "MultipleExtensionsPerHandler" ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline" FastTrack = "FastTrack" + GAVersioningGovernance = "VersioningGovernance" # Guest Agent Versioning class AgentSupportedFeature(object): @@ -72,9 +74,28 @@ def __init__(self): supported=self.__SUPPORTED) +class _GAVersioningGovernanceFeature(AgentSupportedFeature): + """ + CRP would drive the RSM upgrade version if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version. + Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning. + + Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution. + """ + + __NAME = SupportedFeatureNames.GAVersioningGovernance + __VERSION = "1.0" + __SUPPORTED = conf.get_autoupdate_enabled() + + def __init__(self): + super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME, + version=self.__VERSION, + supported=self.__SUPPORTED) + + # This is the list of features that Agent supports and we advertise to CRP __CRP_ADVERTISED_FEATURES = { - SupportedFeatureNames.MultiConfig: _MultiConfigFeature() + SupportedFeatureNames.MultiConfig: _MultiConfigFeature(), + SupportedFeatureNames.GAVersioningGovernance: _GAVersioningGovernanceFeature() } diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 46765ea989..cb929e433a 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -129,6 +129,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "ResourceDisk.EnableSwapEncryption": False, "AutoUpdate.Enabled": True, "EnableOverProvisioning": True, + "GAUpdates.Enabled": True, # # "Debug" options are experimental and may be removed in later # versions of the Agent. @@ -502,6 +503,14 @@ def get_monitor_network_configuration_changes(conf=__conf__): return conf.get_switch("Monitor.NetworkConfigurationChanges", False) +def get_ga_updates_enabled(conf=__conf__): + """ + If True, the agent go through update logic to look for new agents otherwise it will stop agent updates. + NOTE: This option is needed in e2e tests to control agent updates. + """ + return conf.get_switch("GAUpdates.Enabled", True) + + def get_cgroup_check_period(conf=__conf__): """ How often to perform checks on cgroups (are the processes in the cgroups as expected, @@ -629,10 +638,9 @@ def get_normal_upgrade_frequency(conf=__conf__): def get_enable_ga_versioning(conf=__conf__): """ If True, the agent uses GA Versioning for auto-updating the agent vs automatically auto-updating to the highest version. - NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", False) + return conf.get_switch("Debug.EnableGAVersioning", True) def get_firewall_rules_log_period(conf=__conf__): diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 0980ca9d02..3d02268ced 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -185,7 +185,7 @@ def fetch_extension_manifest(self, extension_name, uris): def _fetch_manifest(self, manifest_type, name, uris): try: is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack - xml_text = self._wire_client.fetch_manifest(uris, use_verify_header=is_fast_track) + xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track) self._history.save_manifest(name, xml_text) return ExtensionManifest(xml_text) except Exception as e: diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 38a3e0621d..ea397f0497 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -582,8 +582,8 @@ def call_storage_service(http_req, *args, **kwargs): def fetch_artifacts_profile_blob(self, uri): return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple - def fetch_manifest(self, uris, use_verify_header): - uri, content = self._fetch_content("manifest", uris, use_verify_header=use_verify_header) + def fetch_manifest(self, manifest_type, uris, use_verify_header): + uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header) self.get_host_plugin().update_manifest_uri(uri) return content diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py new file mode 100644 index 0000000000..3acb5b14cf --- /dev/null +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -0,0 +1,338 @@ +import datetime +import glob +import os +import shutil + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.logger import LogLevel +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.utils import fileutil, textutil +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState + + +def get_agent_update_handler(protocol): + return AgentUpdateHandler(protocol) + + +class AgentUpgradeType(object): + """ + Enum for different modes of Agent Upgrade + """ + Hotfix = "Hotfix" + Normal = "Normal" + + +class AgentUpdateHandlerUpdateState(object): + """ + This class is primarily used to maintain the in-memory persistent state for the agent updates. + This state will be persisted throughout the current service run. + """ + def __init__(self): + self.last_attempted_requested_version_update_time = datetime.datetime.min + self.last_attempted_hotfix_update_time = datetime.datetime.min + self.last_attempted_normal_update_time = datetime.datetime.min + self.last_warning = "" + self.last_warning_time = datetime.datetime.min + + +class AgentUpdateHandler(object): + + def __init__(self, protocol): + self._protocol = protocol + self._ga_family = conf.get_autoupdate_gafamily() + self._autoupdate_enabled = conf.get_autoupdate_enabled() + self._gs_id = "unknown" + self._is_requested_version_update = True # This is to track the current update type(requested version or self update) + self.update_state = AgentUpdateHandlerUpdateState() + + def __should_update_agent(self, requested_version): + """ + requested version update: + update is allowed once per (as specified in the conf.get_autoupdate_frequency()) + return false when we don't allow updates. + largest version update(self-update): + update is allowed once per (as specified in the conf.get_hotfix_upgrade_frequency() or conf.get_normal_upgrade_frequency()) + return false when we don't allow updates. + """ + now = datetime.datetime.now() + + if self._is_requested_version_update: + if self.update_state.last_attempted_requested_version_update_time != datetime.datetime.min: + next_attempt_time = self.update_state.last_attempted_requested_version_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + # The time limit elapsed for us to allow updates. + return True + else: + next_hotfix_time, next_normal_time = self.__get_next_upgrade_times(now) + upgrade_type = self.__get_agent_upgrade_type(requested_version) + + if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( + upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): + return True + return False + + def __update_last_attempt_update_times(self): + now = datetime.datetime.now() + if self._is_requested_version_update: + self.update_state.last_attempted_requested_version_update_time = now + else: + self.update_state.last_attempted_normal_update_time = now + self.update_state.last_attempted_hotfix_update_time = now + + @staticmethod + def __get_agent_upgrade_type(requested_version): + # We follow semantic versioning for the agent, if .. is same, then has changed. + # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. + if requested_version.major == CURRENT_VERSION.major and requested_version.minor == CURRENT_VERSION.minor and requested_version.patch == CURRENT_VERSION.patch: + return AgentUpgradeType.Hotfix + return AgentUpgradeType.Normal + + def __get_next_upgrade_times(self, now): + """ + Get the next upgrade times + return: Next Hotfix Upgrade Time, Next Normal Upgrade Time + """ + + def get_next_process_time(last_val, frequency): + return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) + + next_hotfix_time = get_next_process_time(self.update_state.last_attempted_hotfix_update_time, + conf.get_hotfix_upgrade_frequency()) + next_normal_time = get_next_process_time(self.update_state.last_attempted_normal_update_time, + conf.get_normal_upgrade_frequency()) + + return next_hotfix_time, next_normal_time + + def __get_agent_family_manifests(self, goal_state): + """ + Get the agent_family from last GS for the given family + Returns: first entry of Manifest + Exception if no manifests found in the last GS + """ + family = self._ga_family + agent_families = goal_state.extensions_goal_state.agent_families + family_found = False + agent_family_manifests = [] + for m in agent_families: + if m.name == family: + family_found = True + if len(m.uris) > 0: + agent_family_manifests.append(m) + + if not family_found: + raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) + + if len(agent_family_manifests) == 0: + raise Exception( + u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( + self._ga_family, self._gs_id)) + return agent_family_manifests[0] + + @staticmethod + def __get_requested_version(agent_family): + """ + Get the requested version from agent family + Returns: Requested version if supported and available in the GS + None if requested version missing or GA versioning not enabled + """ + if conf.get_enable_ga_versioning() and agent_family.is_requested_version_specified: + if agent_family.requested_version is not None: + return FlexibleVersion(agent_family.requested_version) + return None + + @staticmethod + def __get_largest_version(agent_manifest): + largest_version = FlexibleVersion("0.0.0.0") + for pkg in agent_manifest.pkg_list.versions: + pkg_version = FlexibleVersion(pkg.version) + if pkg_version > largest_version: + largest_version = pkg_version + return largest_version + + def __download_and_get_agent(self, goal_state, agent_family, agent_manifest, requested_version): + """ + This function downloads the new agent(requested version) and returns the downloaded version. + """ + if agent_manifest is None: # Fetch agent manifest if it's not already done + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + package_to_download = self.__get_agent_package_to_download(agent_manifest, requested_version) + is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack + agent = GuestAgent.from_agent_package(package_to_download, self._protocol, is_fast_track_goal_state) + return agent + + def __get_agent_package_to_download(self, agent_manifest, version): + """ + Returns the package of the given Version found in the manifest. If not found, returns exception + """ + for pkg in agent_manifest.pkg_list.versions: + if FlexibleVersion(pkg.version) == version: + # Found a matching package, only download that one + return pkg + + raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " + "skipping agent update".format(str(version), self._gs_id)) + + @staticmethod + def __purge_extra_agents_from_disk(current_version, known_agents): + """ + Remove from disk all directories and .zip files of unknown agents + (without removing the current, running agent). + """ + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + + known_versions = [agent.version for agent in known_agents] + known_versions.append(current_version) + + for agent_path in glob.iglob(path): + try: + name = fileutil.trim_ext(agent_path, "zip") + m = AGENT_DIR_PATTERN.match(name) + if m is not None and FlexibleVersion(m.group(1)) not in known_versions: + if os.path.isfile(agent_path): + logger.info(u"Purging outdated Agent file {0}", agent_path) + os.remove(agent_path) + else: + logger.info(u"Purging outdated Agent directory {0}", agent_path) + shutil.rmtree(agent_path) + except Exception as e: + logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) + + def __proceed_with_update(self, requested_version): + """ + If requested version is specified, upgrade/downgrade to the specified version. + Raises: AgentUpgradeExitException + """ + if requested_version < CURRENT_VERSION: + # In case of a downgrade, we mark the current agent as bad version to avoid starting it back up ever again + # (the expectation here being that if we get request to a downgrade, + # there's a good reason for not wanting the current version). + prefix = "downgrade" + try: + # We should always have an agent directory for the CURRENT_VERSION + agents_on_disk = AgentUpdateHandler.__get_available_agents_on_disk() + current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) + msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ + "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION) + self.__log_event(LogLevel.INFO, msg) + current_agent.mark_failure(is_fatal=True, reason=msg) + except StopIteration: + logger.warn( + "Could not find a matching agent with current version {0} to blacklist, skipping it".format( + CURRENT_VERSION)) + else: + # In case of an upgrade, we don't need to exclude anything as the daemon will automatically + # start the next available highest version which would be the target version + prefix = "upgrade" + raise AgentUpgradeExitException("Agent update found, Exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) + + @staticmethod + def __get_available_agents_on_disk(): + available_agents = [agent for agent in AgentUpdateHandler.__get_all_agents_on_disk() if agent.is_available] + return sorted(available_agents, key=lambda agent: agent.version, reverse=True) + + @staticmethod + def __get_all_agents_on_disk(): + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + + @staticmethod + def __log_event(level, msg, success=True): + if level == LogLevel.INFO: + logger.info(msg) + elif level == LogLevel.WARNING: + logger.warn(msg) + elif level == LogLevel.ERROR: + logger.error(msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success, message=msg, log_event=False) + + def run(self, goal_state): + try: + # Ignore new agents if update is disabled. The latter flag only used in e2e tests. + if not self._autoupdate_enabled or not conf.get_ga_updates_enabled(): + return + + self._gs_id = goal_state.extensions_goal_state.id + agent_family = self.__get_agent_family_manifests(goal_state) + requested_version = self.__get_requested_version(agent_family) + agent_manifest = None # This is to make sure fetch agent manifest once per update + warn_msg = "" + if requested_version is None: + if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled + warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) + GAUpdateReportState.report_error_msg = warn_msg + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + requested_version = self.__get_largest_version(agent_manifest) + self._is_requested_version_update = False + else: + self._is_requested_version_update = True + # Save the requested version to report back + GAUpdateReportState.report_expected_version = requested_version + # Remove the missing requested version warning once requested version becomes available + if "Missing requested version" in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "" + + if requested_version == CURRENT_VERSION: + return + + # Check if an update is allowed + if not self.__should_update_agent(requested_version): + return + + if warn_msg != "": + self.__log_event(LogLevel.WARNING, warn_msg) + + msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + self._gs_id, str(requested_version)) + self.__log_event(LogLevel.INFO, msg) + + try: + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + + if agent.is_blacklisted or not agent.is_downloaded: + msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( + str(agent.version)) + self.__log_event(LogLevel.WARNING, msg) + return + + # We delete the directory and the zip package from the filesystem except current version and target version + self.__purge_extra_agents_from_disk(CURRENT_VERSION, known_agents=[agent]) + self.__proceed_with_update(requested_version) + + finally: + self.__update_last_attempt_update_times() + + except Exception as err: + if isinstance(err, AgentUpgradeExitException): + raise err + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) + self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False) + + def get_vmagent_update_status(self): + """ + This function gets the VMAgent update status as per the last attempted update. + Returns: None if fail to report or update never attempted with requested version + """ + try: + if conf.get_enable_ga_versioning(): + if not GAUpdateReportState.report_error_msg: + status = VMAgentUpdateStatuses.Success + code = 0 + else: + status = VMAgentUpdateStatuses.Error + code = 1 + return VMAgentUpdateStatus(expected_version=str(GAUpdateReportState.report_expected_version), status=status, code=code, message=GAUpdateReportState.report_error_msg) + except Exception as err: + self.__log_event(LogLevel.WARNING, "Unable to report agent update status: {0}".format( + textutil.format_exception(err)), success=False) + return None diff --git a/azurelinuxagent/ga/guestagent.py b/azurelinuxagent/ga/guestagent.py new file mode 100644 index 0000000000..56f3142447 --- /dev/null +++ b/azurelinuxagent/ga/guestagent.py @@ -0,0 +1,316 @@ +import json +import os +import shutil +import time + +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.utils import textutil + +from azurelinuxagent.common import logger, conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME, CURRENT_VERSION +from azurelinuxagent.ga.exthandlers import HandlerManifest + +AGENT_ERROR_FILE = "error.json" # File name for agent error record +AGENT_MANIFEST_FILE = "HandlerManifest.json" +MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent + + +class GAUpdateReportState(object): + """ + This class is primarily used to maintain the in-memory persistent state for the agent updates. + This state will be persisted throughout the current service run and might be modified by external classes. + """ + report_error_msg = "" + report_expected_version = FlexibleVersion("0.0.0.0") + + +class GuestAgent(object): + def __init__(self, path, pkg, protocol, is_fast_track_goal_state): + """ + If 'path' is given, the object is initialized to the version installed under that path. + + If 'pkg' is given, the version specified in the package information is downloaded and the object is + initialized to that version. + + 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. + + NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly + """ + self._is_fast_track_goal_state = is_fast_track_goal_state + self.pkg = pkg + self._protocol = protocol + version = None + if path is not None: + m = AGENT_DIR_PATTERN.match(path) + if m is None: + raise UpdateError(u"Illegal agent directory: {0}".format(path)) + version = m.group(1) + elif self.pkg is not None: + version = pkg.version + + if version is None: + raise UpdateError(u"Illegal agent version: {0}".format(version)) + self.version = FlexibleVersion(version) + + location = u"disk" if path is not None else u"package" + logger.verbose(u"Loading Agent {0} from {1}", self.name, location) + + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + + try: + self._ensure_downloaded() + self._ensure_loaded() + except Exception as e: + # If we're unable to download/unpack the agent, delete the Agent directory + try: + if os.path.isdir(self.get_agent_dir()): + shutil.rmtree(self.get_agent_dir(), ignore_errors=True) + except Exception as err: + logger.warn("Unable to delete Agent files: {0}".format(err)) + msg = u"Agent {0} install failed with exception:".format( + self.name) + detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) + if "Missing requested version" not in GAUpdateReportState.report_error_msg: + GAUpdateReportState.report_error_msg = detailed_msg # capture the download errors to report back + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=False, + message=detailed_msg) + + @staticmethod + def from_installed_agent(path): + """ + Creates an instance of GuestAgent using the agent installed in the given 'path'. + """ + return GuestAgent(path, None, None, False) + + @staticmethod + def from_agent_package(package, protocol, is_fast_track_goal_state): + """ + Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. + """ + return GuestAgent(None, package, protocol, is_fast_track_goal_state) + + @property + def name(self): + return "{0}-{1}".format(AGENT_NAME, self.version) + + def get_agent_cmd(self): + return self.manifest.get_enable_command() + + def get_agent_dir(self): + return os.path.join(conf.get_lib_dir(), self.name) + + def get_agent_error_file(self): + return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) + + def get_agent_manifest_path(self): + return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) + + def get_agent_pkg_path(self): + return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) + + def clear_error(self): + self.error.clear() + self.error.save() + + @property + def is_available(self): + return self.is_downloaded and not self.is_blacklisted + + @property + def is_blacklisted(self): + return self.error is not None and self.error.is_blacklisted + + @property + def is_downloaded(self): + return self.is_blacklisted or \ + os.path.isfile(self.get_agent_manifest_path()) + + def mark_failure(self, is_fatal=False, reason=''): + try: + if not os.path.isdir(self.get_agent_dir()): + os.makedirs(self.get_agent_dir()) + self.error.mark_failure(is_fatal=is_fatal, reason=reason) + self.error.save() + if self.error.is_blacklisted: + msg = u"Agent {0} is permanently blacklisted".format(self.name) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, + version=self.version) + except Exception as e: + logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) + + def _ensure_downloaded(self): + logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) + + if self.is_downloaded: + logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) + return + + if self.pkg is None: + raise UpdateError(u"Agent {0} is missing package and download URIs".format( + self.name)) + + self._download() + + msg = u"Agent {0} downloaded successfully".format(self.name) + logger.verbose(msg) + add_event( + AGENT_NAME, + version=self.version, + op=WALAEventOperation.Install, + is_success=True, + message=msg) + + def _ensure_loaded(self): + self._load_manifest() + self._load_error() + + def _download(self): + try: + self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) + except Exception as exception: + msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) + add_event( + AGENT_NAME, + op=WALAEventOperation.Download, + version=CURRENT_VERSION, + is_success=False, + message=msg) + raise UpdateError(msg) + + def _load_error(self): + try: + self.error = GuestAgentError(self.get_agent_error_file()) + self.error.load() + logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) + except Exception as e: + logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) + + def _load_manifest(self): + path = self.get_agent_manifest_path() + if not os.path.isfile(path): + msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + + with open(path, "r") as manifest_file: + try: + manifests = json.load(manifest_file) + except Exception as e: + msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) + raise UpdateError(msg) + if type(manifests) is list: + if len(manifests) <= 0: + msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) + raise UpdateError(msg) + manifest = manifests[0] + else: + manifest = manifests + + try: + self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 + if len(self.manifest.get_enable_command()) <= 0: + raise Exception(u"Manifest is missing the enable command") + except Exception as e: + msg = u"Agent {0} has an illegal {1}: {2}".format( + self.name, + AGENT_MANIFEST_FILE, + ustr(e)) + raise UpdateError(msg) + + logger.verbose( + u"Agent {0} loaded manifest from {1}", + self.name, + self.get_agent_manifest_path()) + logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", + self.name, + AGENT_MANIFEST_FILE, + ustr(self.manifest.data)) + return + + +class GuestAgentError(object): + def __init__(self, path): + self.last_failure = 0.0 + self.was_fatal = False + if path is None: + raise UpdateError(u"GuestAgentError requires a path") + self.path = path + self.failure_count = 0 + self.reason = '' + + self.clear() + return + + def mark_failure(self, is_fatal=False, reason=''): + self.last_failure = time.time() + self.failure_count += 1 + self.was_fatal = is_fatal + self.reason = reason + return + + def clear(self): + self.last_failure = 0.0 + self.failure_count = 0 + self.was_fatal = False + self.reason = '' + return + + @property + def is_blacklisted(self): + return self.was_fatal or self.failure_count >= MAX_FAILURE + + def load(self): + if self.path is not None and os.path.isfile(self.path): + try: + with open(self.path, 'r') as f: + self.from_json(json.load(f)) + except Exception as error: + # The error.json file is only supposed to be written only by the agent. + # If for whatever reason the file is malformed, just delete it to reset state of the errors. + logger.warn( + "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( + self.path, textutil.format_exception(error))) + try: + os.remove(self.path) + except Exception: + # We try best case efforts to delete the file, ignore error if we're unable to do so + pass + return + + def save(self): + if os.path.isdir(os.path.dirname(self.path)): + with open(self.path, 'w') as f: + json.dump(self.to_json(), f) + return + + def from_json(self, data): + self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) + self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) + self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) + reason = data.get(u"reason", '') + self.reason = reason if reason != '' else self.reason + return + + def to_json(self): + data = { + u"last_failure": self.last_failure, + u"failure_count": self.failure_count, + u"was_fatal": self.was_fatal, + u"reason": ustr(self.reason) + } + return data + + def __str__(self): + return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + self.last_failure, + self.failure_count, + self.was_fatal, + self.reason) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 2b0975b05b..6f666156f4 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -17,7 +17,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # import glob -import json import os import platform import re @@ -34,18 +33,17 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.protocol.imds import get_imds_client from azurelinuxagent.common.utils import fileutil, textutil -from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames +from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames, \ + get_agent_supported_features_list_for_crp from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY -from azurelinuxagent.common.exception import UpdateError, ExitException, AgentUpgradeExitException, AgentMemoryExceededException +from azurelinuxagent.common.exception import ExitException, AgentUpgradeExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler -from azurelinuxagent.common.protocol.goal_state import GoalStateSource from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList, \ - VERSION_0 +from azurelinuxagent.common.protocol.restapi import VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.archive import StateArchiver, AGENT_STATUS_FILE @@ -55,16 +53,16 @@ from azurelinuxagent.common.version import AGENT_LONG_NAME, AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_AGENT, AGENT_VERSION, \ CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION, get_lis_version, \ has_logrotate, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, get_daemon_version +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed from azurelinuxagent.ga.collect_telemetry_events import get_collect_telemetry_events_handler from azurelinuxagent.ga.env import get_env_handler -from azurelinuxagent.ga.exthandlers import HandlerManifest, ExtHandlersHandler, list_agent_lib_directory, \ +from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, list_agent_lib_directory, \ ExtensionStatusValue, ExtHandlerStatusValue +from azurelinuxagent.ga.guestagent import GuestAgent from azurelinuxagent.ga.monitor import get_monitor_handler from azurelinuxagent.ga.send_telemetry_events import get_send_telemetry_events_handler -AGENT_ERROR_FILE = "error.json" # File name for agent error record -AGENT_MANIFEST_FILE = "HandlerManifest.json" AGENT_PARTITION_FILE = "partition" CHILD_HEALTH_INTERVAL = 15 * 60 @@ -72,8 +70,6 @@ CHILD_LAUNCH_RESTART_MAX = 3 CHILD_POLL_INTERVAL = 60 -MAX_FAILURE = 3 # Max failure allowed for agent before blacklisted - GOAL_STATE_PERIOD_EXTENSIONS_DISABLED = 5 * 60 ORPHAN_POLL_INTERVAL = 3 @@ -122,14 +118,6 @@ def __str__(self): return ustr(self.summary) -class AgentUpgradeType(object): - """ - Enum for different modes of Agent Upgrade - """ - Hotfix = "Hotfix" - Normal = "Normal" - - def get_update_handler(): return UpdateHandler() @@ -144,11 +132,6 @@ def __init__(self): self._is_running = True - # Member variables to keep track of the Agent AutoUpgrade - self.last_attempt_time = None - self._last_hotfix_upgrade_time = None - self._last_normal_upgrade_time = None - self.agents = [] self.child_agent = None @@ -369,6 +352,7 @@ def run(self, debug=False): from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) + agent_update_handler = get_agent_update_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() @@ -402,7 +386,7 @@ def run(self, debug=False): while self.is_running: self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) - self._process_goal_state(exthandlers_handler, remote_access_handler) + self._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self._send_heartbeat_telemetry(protocol) self._check_agent_memory_usage() time.sleep(self._goal_state_period) @@ -523,80 +507,6 @@ def _try_update_goal_state(self, protocol): return True - def __update_guest_agent(self, protocol): - """ - This function checks for new Agent updates and raises AgentUpgradeExitException if available. - There are 2 different ways the agent checks for an update - - 1) Requested Version is specified in the Goal State. - - In this case, the Agent will download the requested version and upgrade/downgrade instantly. - 2) No requested version. - - In this case, the agent will periodically check (1 hr) for new agent versions in GA Manifest. - - If available, it will download all versions > CURRENT_VERSION. - - Depending on the highest version > CURRENT_VERSION, - the agent will update within 4 hrs (for a Hotfix update) or 24 hrs (for a Normal update) - """ - - def log_next_update_time(): - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - upgrade_type = self.__get_agent_upgrade_type(available_agent) - next_time = next_hotfix_time if upgrade_type == AgentUpgradeType.Hotfix else next_normal_time - message_ = "Discovered new {0} upgrade {1}; Will upgrade on or after {2}".format( - upgrade_type, available_agent.name, - datetime.utcfromtimestamp(next_time).strftime(logger.Logger.LogTimeFormatInUTC)) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, version=CURRENT_VERSION, is_success=True, - message=message_, log_event=False) - logger.info(message_) - - def handle_updates_for_requested_version(): - if requested_version < CURRENT_VERSION: - prefix = "downgrade" - # In case of a downgrade, we blacklist the current agent to avoid starting it back up ever again - # (the expectation here being that if RSM is asking us to a downgrade, - # there's a good reason for not wanting the current version). - try: - # We should always have an agent directory for the CURRENT_VERSION - # (unless the CURRENT_VERSION == daemon version, but since we don't support downgrading - # below daemon version, we will never reach this code path if that's the scenario) - current_agent = next(agent for agent in self.agents if agent.version == CURRENT_VERSION) - msg = "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " \ - "suggesting that we really don't want to execute any extensions using this version".format( - CURRENT_VERSION) - logger.info(msg) - current_agent.mark_failure(is_fatal=True, reason=msg) - except StopIteration: - logger.warn( - "Could not find a matching agent with current version {0} to blacklist, skipping it".format( - CURRENT_VERSION)) - else: - # In case of an upgrade, we don't need to blacklist anything as the daemon will automatically - # start the next available highest version which would be the requested version - prefix = "upgrade" - raise AgentUpgradeExitException( - "Exiting current process to {0} to the request Agent version {1}".format(prefix, requested_version)) - - # Skip the update if there is no goal state yet or auto-update is disabled - if self._goal_state is None or not conf.get_autoupdate_enabled(): - return False - - if self._download_agent_if_upgrade_available(protocol): - # The call to get_latest_agent_greater_than_daemon() also finds all agents in directory and sets the self.agents property. - # This state is used to find the GuestAgent object with the current version later if requested version is available in last GS. - available_agent = self.get_latest_agent_greater_than_daemon() - requested_version, _ = self.__get_requested_version_and_agent_family_from_last_gs() - if requested_version is not None: - # If requested version specified, upgrade/downgrade to the specified version instantly as this is - # driven by the goal state (as compared to the agent periodically checking for new upgrades every hour) - handle_updates_for_requested_version() - elif available_agent is None: - # Legacy behavior: The current agent can become unavailable and needs to be reverted. - # In that case, self._upgrade_available() returns True and available_agent would be None. Handling it here. - raise AgentUpgradeExitException( - "Agent {0} is reverting to the installed agent -- exiting".format(CURRENT_AGENT)) - else: - log_next_update_time() - - self.__upgrade_agent_if_permitted() - def _processing_new_incarnation(self): """ True if we are currently processing a new incarnation (i.e. WireServer goal state) @@ -610,18 +520,18 @@ def _processing_new_extensions_goal_state(self): egs = self._goal_state.extensions_goal_state return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated - def _process_goal_state(self, exthandlers_handler, remote_access_handler): + def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_update_handler): protocol = exthandlers_handler.protocol # update self._goal_state if not self._try_update_goal_state(protocol): - # agent updates and status reporting should be done even when the goal state is not updated - self.__update_guest_agent(protocol) - self._report_status(exthandlers_handler) + agent_update_handler.run(self._goal_state) + # status reporting should be done even when the goal state is not updated + self._report_status(exthandlers_handler, agent_update_handler) return # check for agent updates - self.__update_guest_agent(protocol) + agent_update_handler.run(self._goal_state) try: if self._processing_new_extensions_goal_state(): @@ -639,7 +549,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler): CGroupConfigurator.get_instance().check_cgroups(cgroup_metrics=[]) # report status before processing the remote access, since that operation can take a long time - self._report_status(exthandlers_handler) + self._report_status(exthandlers_handler, agent_update_handler) if self._processing_new_incarnation(): remote_access_handler.run() @@ -668,48 +578,11 @@ def _cleanup_legacy_goal_state_history(): except Exception as exception: logger.warn("Error removing legacy history files: {0}", ustr(exception)) - def __get_vmagent_update_status(self, goal_state_changed): - """ - This function gets the VMAgent update status as per the last GoalState. - Returns: None if the last GS does not ask for requested version else VMAgentUpdateStatus - """ - if not conf.get_enable_ga_versioning(): - return None - - update_status = None - - try: - requested_version, manifest = self.__get_requested_version_and_agent_family_from_last_gs() - if manifest is None and goal_state_changed: - logger.info("Unable to report update status as no matching manifest found for family: {0}".format( - conf.get_autoupdate_gafamily())) - return None - - if requested_version is not None: - if CURRENT_VERSION == requested_version: - status = VMAgentUpdateStatuses.Success - code = 0 - else: - status = VMAgentUpdateStatuses.Error - code = 1 - update_status = VMAgentUpdateStatus(expected_version=manifest.requested_version_string, status=status, - code=code) - except Exception as error: - if goal_state_changed: - err_msg = "[This error will only be logged once per goal state] " \ - "Ran into error when trying to fetch updateStatus for the agent, skipping reporting update satus. Error: {0}".format( - textutil.format_exception(error)) - logger.warn(err_msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=err_msg, log_event=False) - - return update_status - - def _report_status(self, exthandlers_handler): - vm_agent_update_status = self.__get_vmagent_update_status(self._processing_new_extensions_goal_state()) + def _report_status(self, exthandlers_handler, agent_update_handler): # report_ext_handlers_status does its own error handling and returns None if an error occurred vm_status = exthandlers_handler.report_ext_handlers_status( goal_state_changed=self._processing_new_extensions_goal_state(), - vm_agent_update_status=vm_agent_update_status, vm_agent_supports_fast_track=self._supports_fast_track) + vm_agent_update_status=agent_update_handler.get_vmagent_update_status(), vm_agent_supports_fast_track=self._supports_fast_track) if vm_status is not None: self._report_extensions_summary(vm_status) @@ -831,6 +704,16 @@ def log_if_op_disabled(name, value): if not value: log_event("{0} is set to False, not processing the operation".format(name)) + def log_if_agent_versioning_feature_disabled(): + supports_ga_versioning = False + for _, feature in get_agent_supported_features_list_for_crp().items(): + if feature.name == SupportedFeatureNames.GAVersioningGovernance: + supports_ga_versioning = True + break + if not supports_ga_versioning: + msg = "Agent : {0} doesn't support GA Versioning".format(CURRENT_VERSION) + log_event(msg) + log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period(), "Changing this value affects how often extensions are processed and status for the VM is reported. Too small a value may report the VM as unresponsive") log_if_int_changed_from_default("Extensions.InitialGoalStatePeriod", conf.get_initial_goal_state_period(), @@ -851,6 +734,8 @@ def log_if_op_disabled(name, value): if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format(conf.get_lib_dir())) + log_if_agent_versioning_feature_disabled() + except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e)) @@ -1071,173 +956,6 @@ def _shutdown(self): str(e)) return - def __get_requested_version_and_agent_family_from_last_gs(self): - """ - Get the requested version and corresponding manifests from last GS if supported - Returns: (Requested Version, Manifest) if supported and available - (None, None) if no manifests found in the last GS - (None, manifest) if not supported or not specified in GS - """ - family_name = conf.get_autoupdate_gafamily() - agent_families = self._goal_state.extensions_goal_state.agent_families - agent_families = [m for m in agent_families if m.name == family_name and len(m.uris) > 0] - if len(agent_families) == 0: - return None, None - if conf.get_enable_ga_versioning() and agent_families[0].is_requested_version_specified: - return agent_families[0].requested_version, agent_families[0] - return None, agent_families[0] - - def _download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION): - """ - This function downloads the new agent if an update is available. - If a requested version is available in goal state, then only that version is downloaded (new-update model) - Else, we periodically (1hr by default) checks if new Agent upgrade is available and download it on filesystem if available (old-update model) - rtype: Boolean - return: True if current agent is no longer available or an agent with a higher version number is available - else False - """ - - def report_error(msg_, version_=CURRENT_VERSION, op=WALAEventOperation.Download): - logger.warn(msg_) - add_event(AGENT_NAME, op=op, version=version_, is_success=False, message=msg_, log_event=False) - - def can_proceed_with_requested_version(): - if not gs_updated: - # If the goal state didn't change, don't process anything. - return False - - # With the new model, we will get a new GS when CRP wants us to auto-update using required version. - # If there's no new goal state, don't proceed with anything - msg_ = "Found requested version in manifest: {0} for goal state {1}".format( - requested_version, goal_state_id) - logger.info(msg_) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg_, log_event=False) - - if requested_version < daemon_version: - # Don't process the update if the requested version is lesser than daemon version, - # as we don't support downgrades below daemon versions. - report_error( - "Can't process the upgrade as the requested version: {0} is < current daemon version: {1}".format( - requested_version, daemon_version), op=WALAEventOperation.AgentUpgrade) - return False - - return True - - def agent_upgrade_time_elapsed(now_): - if self.last_attempt_time is not None: - next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() - else: - next_attempt_time = now_ - if next_attempt_time > now_: - return False - return True - - agent_family_name = conf.get_autoupdate_gafamily() - gs_updated = False - daemon_version = self.__get_daemon_version_for_update() - try: - # Fetch the agent manifests from the latest Goal State - goal_state_id = self._goal_state.extensions_goal_state.id - gs_updated = self._processing_new_extensions_goal_state() - requested_version, agent_family = self.__get_requested_version_and_agent_family_from_last_gs() - if agent_family is None: - logger.verbose( - u"No manifest links found for agent family: {0} for goal state {1}, skipping update check".format( - agent_family_name, goal_state_id)) - return False - except Exception as err: - # If there's some issues in fetching the agent manifests, report it only on goal state change - msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err)) - if gs_updated: - report_error(msg) - else: - logger.verbose(msg) - return False - - if requested_version is not None: - # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic - if not can_proceed_with_requested_version(): - return False - else: - # If no requested version specified in the Goal State, follow the old auto-update logic - # Note: If the first Goal State contains a requested version, this timer won't start (i.e. self.last_attempt_time won't be updated). - # If any subsequent goal state does not contain requested version, this timer will start then, and we will - # download all versions available in PIR and auto-update to the highest available version on that goal state. - now = time.time() - if not agent_upgrade_time_elapsed(now): - return False - - logger.info("No requested version specified, checking for all versions for agent update (family: {0})", - agent_family_name) - self.last_attempt_time = now - - try: - # If we make it to this point, then either there is a requested version in a new GS (new auto-update model), - # or the 1hr time limit has elapsed for us to check the agent manifest for updates (old auto-update model). - pkg_list = ExtHandlerPackageList() - - # If the requested version is the current version, don't download anything; - # the call to purge() below will delete all other agents from disk - # In this case, no need to even fetch the GA family manifest as we don't need to download any agent. - if requested_version is not None and requested_version == CURRENT_VERSION: - packages_to_download = [] - msg = "The requested version is running as the current version: {0}".format(requested_version) - logger.info(msg) - add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg) - else: - agent_manifest = self._goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) - pkg_list = agent_manifest.pkg_list - packages_to_download = pkg_list.versions - - # Verify the requested version is in GA family manifest (if specified) - if requested_version is not None and requested_version != CURRENT_VERSION: - for pkg in pkg_list.versions: - if FlexibleVersion(pkg.version) == requested_version: - # Found a matching package, only download that one - packages_to_download = [pkg] - break - else: - msg = "No matching package found in the agent manifest for requested version: {0} in goal state {1}, skipping agent update".format( - requested_version, goal_state_id) - report_error(msg, version_=requested_version) - return False - - # Set the agents to those available for download at least as current as the existing agent - # or to the requested version (if specified) - is_fast_track_goal_state = self._goal_state.extensions_goal_state.source == GoalStateSource.FastTrack - agents_to_download = [GuestAgent.from_agent_package(pkg, protocol, is_fast_track_goal_state) for pkg in packages_to_download] - - # Filter out the agents that were downloaded/extracted successfully. If the agent was not installed properly, - # we delete the directory and the zip package from the filesystem - self._set_and_sort_agents([agent for agent in agents_to_download if agent.is_available]) - - # Remove from disk any agent no longer needed in the VM. - # If requested version is provided, this would delete all other agents present on the VM except - - # - the current version and the requested version if requested version != current version - # - only the current version if requested version == current version - # Note: - # The code leaves on disk available, but blacklisted, agents to preserve the state. - # Otherwise, those agents could be downloaded again and inappropriately retried. - self._purge_agents() - self._filter_blacklisted_agents() - - # If there are no agents available to upgrade/downgrade to, return False - if len(self.agents) == 0: - return False - - if requested_version is not None: - # In case of requested version, return True if an agent with a different version number than the - # current version is available that is higher than the current daemon version - return self.agents[0].version != base_version and self.agents[0].version > daemon_version - else: - # Else, return True if the highest agent is > base_version (CURRENT_VERSION) - return self.agents[0].version > base_version - - except Exception as err: - msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err)) - report_error(msg) - return False - def _write_pid_file(self): pid_files = self._get_pid_files() @@ -1425,58 +1143,6 @@ def _execute_run_command(command): msg = "Error while checking ip table rules:{0}".format(ustr(e)) logger.error(msg) - def __get_next_upgrade_times(self): - """ - Get the next upgrade times - return: Next Normal Upgrade Time, Next Hotfix Upgrade Time - """ - - def get_next_process_time(last_val, frequency): - return now if last_val is None else last_val + frequency - - now = time.time() - next_hotfix_time = get_next_process_time(self._last_hotfix_upgrade_time, conf.get_hotfix_upgrade_frequency()) - next_normal_time = get_next_process_time(self._last_normal_upgrade_time, conf.get_normal_upgrade_frequency()) - - return next_normal_time, next_hotfix_time - - @staticmethod - def __get_agent_upgrade_type(available_agent): - # We follow semantic versioning for the agent, if . is same, then . has changed. - # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. - if available_agent.version.major == CURRENT_VERSION.major and available_agent.version.minor == CURRENT_VERSION.minor: - return AgentUpgradeType.Hotfix - return AgentUpgradeType.Normal - - def __upgrade_agent_if_permitted(self): - """ - Check every 4hrs for a Hotfix Upgrade and 24 hours for a Normal upgrade and upgrade the agent if available. - raises: ExitException when a new upgrade is available in the relevant time window, else returns - """ - - next_normal_time, next_hotfix_time = self.__get_next_upgrade_times() - now = time.time() - # Not permitted to update yet for any of the AgentUpgradeModes - if next_hotfix_time > now and next_normal_time > now: - return - - # Update the last upgrade check time even if no new agent is available for upgrade - self._last_hotfix_upgrade_time = now if next_hotfix_time <= now else self._last_hotfix_upgrade_time - self._last_normal_upgrade_time = now if next_normal_time <= now else self._last_normal_upgrade_time - - available_agent = self.get_latest_agent_greater_than_daemon() - if available_agent is None or available_agent.version <= CURRENT_VERSION: - logger.verbose("No agent upgrade discovered") - return - - upgrade_type = self.__get_agent_upgrade_type(available_agent) - upgrade_message = "{0} Agent upgrade discovered, updating to {1} -- exiting".format(upgrade_type, - available_agent.name) - - if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( - upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): - raise AgentUpgradeExitException(upgrade_message) - def _reset_legacy_blacklisted_agents(self): # Reset the state of all blacklisted agents that were blacklisted by legacy agents (i.e. not during auto-update) @@ -1489,290 +1155,3 @@ def _reset_legacy_blacklisted_agents(self): agent.clear_error() except Exception as err: logger.warn("Unable to reset legacy blacklisted agents due to: {0}".format(err)) - - -class GuestAgent(object): - def __init__(self, path, pkg, protocol, is_fast_track_goal_state): - """ - If 'path' is given, the object is initialized to the version installed under that path. - - If 'pkg' is given, the version specified in the package information is downloaded and the object is - initialized to that version. - - 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. - - NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly - """ - self._is_fast_track_goal_state = is_fast_track_goal_state - self.pkg = pkg - self._protocol = protocol - version = None - if path is not None: - m = AGENT_DIR_PATTERN.match(path) - if m is None: - raise UpdateError(u"Illegal agent directory: {0}".format(path)) - version = m.group(1) - elif self.pkg is not None: - version = pkg.version - - if version is None: - raise UpdateError(u"Illegal agent version: {0}".format(version)) - self.version = FlexibleVersion(version) - - location = u"disk" if path is not None else u"package" - logger.verbose(u"Loading Agent {0} from {1}", self.name, location) - - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - - try: - self._ensure_downloaded() - self._ensure_loaded() - except Exception as e: - # If we're unable to download/unpack the agent, delete the Agent directory - try: - if os.path.isdir(self.get_agent_dir()): - shutil.rmtree(self.get_agent_dir(), ignore_errors=True) - except Exception as err: - logger.warn("Unable to delete Agent files: {0}".format(err)) - msg = u"Agent {0} install failed with exception:".format( - self.name) - detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=False, - message=detailed_msg) - - @staticmethod - def from_installed_agent(path): - """ - Creates an instance of GuestAgent using the agent installed in the given 'path'. - """ - return GuestAgent(path, None, None, False) - - @staticmethod - def from_agent_package(package, protocol, is_fast_track_goal_state): - """ - Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. - """ - return GuestAgent(None, package, protocol, is_fast_track_goal_state) - - @property - def name(self): - return "{0}-{1}".format(AGENT_NAME, self.version) - - def get_agent_cmd(self): - return self.manifest.get_enable_command() - - def get_agent_dir(self): - return os.path.join(conf.get_lib_dir(), self.name) - - def get_agent_error_file(self): - return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) - - def get_agent_manifest_path(self): - return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) - - def get_agent_pkg_path(self): - return ".".join((os.path.join(conf.get_lib_dir(), self.name), "zip")) - - def clear_error(self): - self.error.clear() - self.error.save() - - @property - def is_available(self): - return self.is_downloaded and not self.is_blacklisted - - @property - def is_blacklisted(self): - return self.error is not None and self.error.is_blacklisted - - @property - def is_downloaded(self): - return self.is_blacklisted or \ - os.path.isfile(self.get_agent_manifest_path()) - - def mark_failure(self, is_fatal=False, reason=''): - try: - if not os.path.isdir(self.get_agent_dir()): - os.makedirs(self.get_agent_dir()) - self.error.mark_failure(is_fatal=is_fatal, reason=reason) - self.error.save() - if self.error.is_blacklisted: - msg = u"Agent {0} is permanently blacklisted".format(self.name) - logger.warn(msg) - add_event(op=WALAEventOperation.AgentBlacklisted, is_success=False, message=msg, log_event=False, - version=self.version) - except Exception as e: - logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) - - def _ensure_downloaded(self): - logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) - - if self.is_downloaded: - logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) - return - - if self.pkg is None: - raise UpdateError(u"Agent {0} is missing package and download URIs".format( - self.name)) - - self._download() - - msg = u"Agent {0} downloaded successfully".format(self.name) - logger.verbose(msg) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=True, - message=msg) - - def _ensure_loaded(self): - self._load_manifest() - self._load_error() - - def _download(self): - try: - self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) - except Exception as exception: - msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) - add_event( - AGENT_NAME, - op=WALAEventOperation.Download, - version=CURRENT_VERSION, - is_success=False, - message=msg) - raise UpdateError(msg) - - def _load_error(self): - try: - self.error = GuestAgentError(self.get_agent_error_file()) - self.error.load() - logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) - except Exception as e: - logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e)) - - def _load_manifest(self): - path = self.get_agent_manifest_path() - if not os.path.isfile(path): - msg = u"Agent {0} is missing the {1} file".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - - with open(path, "r") as manifest_file: - try: - manifests = json.load(manifest_file) - except Exception as e: - msg = u"Agent {0} has a malformed {1} ({2})".format(self.name, AGENT_MANIFEST_FILE, ustr(e)) - raise UpdateError(msg) - if type(manifests) is list: - if len(manifests) <= 0: - msg = u"Agent {0} has an empty {1}".format(self.name, AGENT_MANIFEST_FILE) - raise UpdateError(msg) - manifest = manifests[0] - else: - manifest = manifests - - try: - self.manifest = HandlerManifest(manifest) # pylint: disable=W0201 - if len(self.manifest.get_enable_command()) <= 0: - raise Exception(u"Manifest is missing the enable command") - except Exception as e: - msg = u"Agent {0} has an illegal {1}: {2}".format( - self.name, - AGENT_MANIFEST_FILE, - ustr(e)) - raise UpdateError(msg) - - logger.verbose( - u"Agent {0} loaded manifest from {1}", - self.name, - self.get_agent_manifest_path()) - logger.verbose(u"Successfully loaded Agent {0} {1}: {2}", - self.name, - AGENT_MANIFEST_FILE, - ustr(self.manifest.data)) - return - - -class GuestAgentError(object): - def __init__(self, path): - self.last_failure = 0.0 - self.was_fatal = False - if path is None: - raise UpdateError(u"GuestAgentError requires a path") - self.path = path - self.failure_count = 0 - self.reason = '' - - self.clear() - return - - def mark_failure(self, is_fatal=False, reason=''): - self.last_failure = time.time() - self.failure_count += 1 - self.was_fatal = is_fatal - self.reason = reason - return - - def clear(self): - self.last_failure = 0.0 - self.failure_count = 0 - self.was_fatal = False - self.reason = '' - return - - @property - def is_blacklisted(self): - return self.was_fatal or self.failure_count >= MAX_FAILURE - - def load(self): - if self.path is not None and os.path.isfile(self.path): - try: - with open(self.path, 'r') as f: - self.from_json(json.load(f)) - except Exception as error: - # The error.json file is only supposed to be written only by the agent. - # If for whatever reason the file is malformed, just delete it to reset state of the errors. - logger.warn( - "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( - self.path, textutil.format_exception(error))) - try: - os.remove(self.path) - except Exception: - # We try best case efforts to delete the file, ignore error if we're unable to do so - pass - return - - def save(self): - if os.path.isdir(os.path.dirname(self.path)): - with open(self.path, 'w') as f: - json.dump(self.to_json(), f) - return - - def from_json(self, data): - self.last_failure = max(self.last_failure, data.get(u"last_failure", 0.0)) - self.failure_count = max(self.failure_count, data.get(u"failure_count", 0)) - self.was_fatal = self.was_fatal or data.get(u"was_fatal", False) - reason = data.get(u"reason", '') - self.reason = reason if reason != '' else self.reason - return - - def to_json(self): - data = { - u"last_failure": self.last_failure, - u"failure_count": self.failure_count, - u"was_fatal": self.was_fatal, - u"reason": ustr(self.reason) - } - return data - - def __str__(self): - return "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - self.last_failure, - self.failure_count, - self.was_fatal, - self.reason) diff --git a/makepkg.py b/makepkg.py index 5ec04d5d89..bc4aad4c3b 100755 --- a/makepkg.py +++ b/makepkg.py @@ -8,8 +8,9 @@ import subprocess import sys -from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, AGENT_LONG_VERSION -from azurelinuxagent.ga.update import AGENT_MANIFEST_FILE +from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, \ + AGENT_LONG_VERSION +from azurelinuxagent.ga.guestagent import AGENT_MANIFEST_FILE MANIFEST = '''[{{ "name": "{0}", diff --git a/tests/common/test_agent_supported_feature.py b/tests/common/test_agent_supported_feature.py index cf367f90ba..c2d3b1981e 100644 --- a/tests/common/test_agent_supported_feature.py +++ b/tests/common/test_agent_supported_feature.py @@ -53,3 +53,21 @@ def test_it_should_return_extension_supported_features_properly(self): self.assertEqual(SupportedFeatureNames.ExtensionTelemetryPipeline, get_supported_feature_by_name(SupportedFeatureNames.ExtensionTelemetryPipeline).name, "Invalid/Wrong feature returned") + + def test_it_should_return_ga_versioning_governance_feature_properly(self): + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", True): + self.assertIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), + "GAVersioningGovernance should be fetched in crp_supported_features") + + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): + self.assertNotIn(SupportedFeatureNames.GAVersioningGovernance, get_agent_supported_features_list_for_crp(), + "GAVersioningGovernance should not be fetched in crp_supported_features as not supported") + + self.assertEqual(SupportedFeatureNames.GAVersioningGovernance, + get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance).name, + "Invalid/Wrong feature returned") + + # Raise error if feature name not found + with self.assertRaises(NotImplementedError): + get_supported_feature_by_name("ABC") + diff --git a/tests/data/wire/ext_conf_missing_family.xml b/tests/data/wire/ext_conf_missing_family.xml index 058c40a881..10760a975b 100644 --- a/tests/data/wire/ext_conf_missing_family.xml +++ b/tests/data/wire/ext_conf_missing_family.xml @@ -7,25 +7,6 @@ Prod - - Test - - https://mock-goal-state/rdfepirv2bl2prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl2prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/rdfepirv2bl3prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr01.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl4prdstr03.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr02.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr04.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr06.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl5prdstr09a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - https://mock-goal-state/zrdfepirv2bl6prdstr02a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_useast_manifest.xml - - eastus diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml similarity index 100% rename from tests/data/wire/ext_conf_missing_requested_version.xml rename to tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml diff --git a/tests/data/wire/ga_manifest_no_uris.xml b/tests/data/wire/ga_manifest_no_uris.xml new file mode 100644 index 0000000000..89573ad63b --- /dev/null +++ b/tests/data/wire/ga_manifest_no_uris.xml @@ -0,0 +1,39 @@ + + + + + 1.0.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.0.0 + + + + 1.1.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.1.0 + + + + 1.2.0 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__1.2.0 + + + + 2.0.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.0.0 + + + 2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0 + + + 9.9.9.10 + + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0 + + + + 99999.0.0.0 + + + + diff --git a/tests/ga/mocks.py b/tests/ga/mocks.py index 6fbc63d7da..588825f780 100644 --- a/tests/ga/mocks.py +++ b/tests/ga/mocks.py @@ -18,6 +18,8 @@ import contextlib from mock import PropertyMock + +from azurelinuxagent.ga.agent_update_handler import AgentUpdateHandler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler from azurelinuxagent.ga.update import UpdateHandler, get_update_handler @@ -30,6 +32,7 @@ def mock_update_handler(protocol, on_new_iteration=lambda _: None, exthandlers_handler=None, remote_access_handler=None, + agent_update_handler=None, autoupdate_enabled=False, check_daemon_running=False, start_background_threads=False, @@ -71,6 +74,9 @@ def is_running(*args): # mock for property UpdateHandler.is_running, which cont if remote_access_handler is None: remote_access_handler = RemoteAccessHandler(protocol) + if agent_update_handler is None: + agent_update_handler = AgentUpdateHandler(protocol) + cleanup_functions = [] def patch_object(target, attribute): @@ -80,39 +86,40 @@ def patch_object(target, attribute): try: with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): - with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): - with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: - with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: - if not check_daemon_running: - patch_object(UpdateHandler, "_check_daemon_running") - if not start_background_threads: - patch_object(UpdateHandler, "_start_threads") - if not check_background_threads: - patch_object(UpdateHandler, "_check_threads_running") - - def get_exit_code(): - if mock_exit.call_count == 0: - raise Exception("The UpdateHandler did not exit") - if mock_exit.call_count != 1: - raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) - args, _ = mock_exit.call_args - return args[0] - - def get_iterations(): - return iteration_count[0] - - def get_iterations_completed(): - return sleep.call_count - - update_handler = get_update_handler() - update_handler.protocol_util.get_protocol = Mock(return_value=protocol) - update_handler.get_exit_code = get_exit_code - update_handler.get_iterations = get_iterations - update_handler.get_iterations_completed = get_iterations_completed - - yield update_handler + with patch("azurelinuxagent.ga.agent_update_handler.get_agent_update_handler", return_value=agent_update_handler): + with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): + with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: + with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: + if not check_daemon_running: + patch_object(UpdateHandler, "_check_daemon_running") + if not start_background_threads: + patch_object(UpdateHandler, "_start_threads") + if not check_background_threads: + patch_object(UpdateHandler, "_check_threads_running") + + def get_exit_code(): + if mock_exit.call_count == 0: + raise Exception("The UpdateHandler did not exit") + if mock_exit.call_count != 1: + raise Exception("The UpdateHandler exited multiple times ({0})".format(mock_exit.call_count)) + args, _ = mock_exit.call_args + return args[0] + + def get_iterations(): + return iteration_count[0] + + def get_iterations_completed(): + return sleep.call_count + + update_handler = get_update_handler() + update_handler.protocol_util.get_protocol = Mock(return_value=protocol) + update_handler.get_exit_code = get_exit_code + update_handler.get_iterations = get_iterations + update_handler.get_iterations_completed = get_iterations_completed + + yield update_handler finally: for f in cleanup_functions: f() diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py new file mode 100644 index 0000000000..5b7800b8c6 --- /dev/null +++ b/tests/ga/test_agent_update_handler.py @@ -0,0 +1,372 @@ +import contextlib +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.event import WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.future import ustr, httpclient +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses + +from azurelinuxagent.common.protocol.util import ProtocolUtil +from azurelinuxagent.common.version import CURRENT_VERSION +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler +from azurelinuxagent.ga.guestagent import GAUpdateReportState +from tests.ga.test_update import UpdateTestCase +from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.protocol.mockwiredata import DATA_FILE +from tests.tools import clear_singleton_instances, load_bin_data, patch + + +class TestAgentUpdate(UpdateTestCase): + + def setUp(self): + UpdateTestCase.setUp(self) + # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not + # reuse a previous state + clear_singleton_instances(ProtocolUtil) + + @contextlib.contextmanager + def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True): + # Default to DATA_FILE of test_data parameter raises the pylint warning + # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) + test_data = DATA_FILE if test_data is None else test_data + + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + protocol.mock_wire_data.call_counts['agentArtifact'] += 1 + return MockHttpResponse(status=httpclient.OK, body=agent_pkg) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.ga.agent_update_handler.add_event") as mock_telemetry: + agent_update_handler = get_agent_update_handler(protocol) + agent_update_handler._protocol = protocol + yield agent_update_handler, mock_telemetry + + def __assert_agent_directories_available(self, versions): + for version in versions: + self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) + + def __assert_agent_directories_exist_and_others_dont_exist(self, versions): + self.__assert_agent_directories_available(versions=versions) + other_agents = [agent_dir for agent_dir in self.agent_dirs() if + agent_dir not in [self.agent_dir(version) for version in versions]] + self.assertFalse(any(other_agents), + "All other agents should be purged from agent dir: {0}".format(other_agents)) + + def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Goal state incarnation_{0} is requesting a new agent version {1}'.format(inc, version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent requested version found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the agent package not found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def test_it_should_not_update_when_autoupdate_disabled(self): + self.prepare_agents(count=1) + with self.__get_agent_update_handler(autoupdate_enabled=False) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "should not check for requested version") + + def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_update_to_largest_version_if_time_window_not_elapsed(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_update_to_largest_version_if_time_window_elapsed(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=0.001): + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + version = "5.2.0.1" + with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + # Now we shouldn't check for download if update not allowed.This run should not add new logs + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "requesting a new agent version" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "requested version should be same as current version") + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + downgraded_version = "1.2.0" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) + self.__assert_agent_directories_exist_and_others_dont_exist( + versions=[downgraded_version, str(CURRENT_VERSION)]) + self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + + def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + version = "5.2.0.4" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self.assertFalse(os.path.exists(self.agent_dir(version)), + "New agent directory should not be found") + + self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + + def test_handles_missing_agent_family(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + def test_it_should_report_update_status_with_success(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + GAUpdateReportState.report_error_msg = "" + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) + self.assertEqual(0, vm_agent_update_status.code) + self.assertEqual(str(CURRENT_VERSION), vm_agent_update_status.expected_version) + + def test_it_should_report_update_status_with_error_on_download_fail(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) + self.assertIn("Unable to download Agent", vm_agent_update_status.message) + + def test_it_should_report_update_status_with_missing_requested_version_error(self): + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + + @contextlib.contextmanager + def mock_agent_update_handler(test_data): + with mock_wire_protocol(test_data) as protocol: + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + protocol.set_http_handlers(http_get_handler=get_handler) + + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): + agent_update_handler_local = get_agent_update_handler(protocol) + yield agent_update_handler_local + + with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + GAUpdateReportState.report_error_msg = "" + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertIn("Missing requested version", vm_agent_update_status.message) + + def test_it_should_not_log_same_error_next_hours(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") \ No newline at end of file diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 10f442749d..5309b80566 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3185,7 +3185,7 @@ def manifest_location_handler(url, **kwargs): wire._DOWNLOAD_TIMEOUT = datetime.timedelta(minutes=0) try: with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, use_verify_header=False) + protocol.client.fetch_manifest("extension", ext_handlers[0].manifest_uris, use_verify_header=False) finally: wire._DOWNLOAD_TIMEOUT = download_timeout diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py new file mode 100644 index 0000000000..81e248bb04 --- /dev/null +++ b/tests/ga/test_guestagent.py @@ -0,0 +1,309 @@ +import json +import os + +from azurelinuxagent.common import conf +from azurelinuxagent.common.exception import UpdateError +from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE, AGENT_ERROR_FILE, GuestAgentError, \ + MAX_FAILURE +from azurelinuxagent.common.future import httpclient +from azurelinuxagent.common.protocol.restapi import ExtHandlerPackage +from azurelinuxagent.common.version import AGENT_NAME +from tests.ga.test_update import UpdateTestCase, EMPTY_MANIFEST, WITH_ERROR, NO_ERROR +from tests.protocol import mockwiredata +from tests.protocol.mocks import MockHttpResponse, mock_wire_protocol +from tests.tools import load_bin_data, patch + + +class TestGuestAgent(UpdateTestCase): + def setUp(self): + UpdateTestCase.setUp(self) + self.copy_agents(self._get_agent_file_path()) + self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) + + def test_creation(self): + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("A very bad file name") + + with self.assertRaises(UpdateError): + GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) + + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertNotEqual(None, agent) + self.assertEqual(self._get_agent_name(), agent.name) + self.assertEqual(self._get_agent_version(), agent.version) + + self.assertEqual(self.agent_path, agent.get_agent_dir()) + + path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) + self.assertEqual(path, agent.get_agent_manifest_path()) + + self.assertEqual( + os.path.join(self.agent_path, AGENT_ERROR_FILE), + agent.get_agent_error_file()) + + path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) + self.assertEqual(path, agent.get_agent_pkg_path()) + + self.assertTrue(agent.is_downloaded) + self.assertFalse(agent.is_blacklisted) + self.assertTrue(agent.is_available) + + def test_clear_error(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.mark_failure(is_fatal=True) + + self.assertTrue(agent.error.last_failure > 0.0) + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.clear_error() + self.assertEqual(0.0, agent.error.last_failure) + self.assertEqual(0, agent.error.failure_count) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_available(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + + self.assertTrue(agent.is_available) + agent.mark_failure(is_fatal=True) + self.assertFalse(agent.is_available) + + def test_is_blacklisted(self): + self.expand_agents() + + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertFalse(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) + + def test_is_downloaded(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(agent.is_downloaded) + + def test_mark_failure(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + + agent.mark_failure() + self.assertEqual(1, agent.error.failure_count) + + agent.mark_failure(is_fatal=True) + self.assertEqual(2, agent.error.failure_count) + self.assertTrue(agent.is_blacklisted) + + def test_load_manifest(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + agent._load_manifest() + self.assertEqual(agent.manifest.get_enable_command(), + agent.get_agent_cmd()) + + def test_load_manifest_missing(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + os.remove(agent.get_agent_manifest_path()) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_empty(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + json.dump(EMPTY_MANIFEST, file) + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_manifest_is_malformed(self): + self.expand_agents() + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) + + with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin + file.write("This is not JSON data") + self.assertRaises(UpdateError, agent._load_manifest) + + def test_load_error(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.error = None + + agent._load_error() + self.assertTrue(agent.error is not None) + + def test_download(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri == agent_uri: + response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertTrue(os.path.isdir(agent.get_agent_dir())) + self.assertTrue(agent.is_downloaded) + + def test_download_fail(self): + self.remove_agents() + self.assertFalse(os.path.isdir(self.agent_path)) + + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return None + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with patch("azurelinuxagent.ga.guestagent.add_event") as add_event: + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(os.path.isfile(self.agent_path)) + + messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] + self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) + self.assertIn('[UpdateError] Unable to download Agent WALinuxAgent-9.9.9.9', messages[0], "The install error does not include the expected message") + + self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") + + def test_invalid_agent_package_does_not_blacklist_the_agent(self): + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") + return MockHttpResponse(status=httpclient.OK, body=response) + return None + + pkg = ExtHandlerPackage(version="9.9.9.9") + pkg.uris.append(agent_uri) + + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + agent = GuestAgent.from_agent_package(pkg, protocol, False) + + self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") + self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") + + @patch("azurelinuxagent.ga.update.GuestAgent._download") + def test_ensure_download_skips_blacklisted(self, mock_download): + agent = GuestAgent.from_installed_agent(self.agent_path) + self.assertEqual(0, mock_download.call_count) + + agent.clear_error() + agent.mark_failure(is_fatal=True) + self.assertTrue(agent.is_blacklisted) + + pkg = ExtHandlerPackage(version=str(self._get_agent_version())) + pkg.uris.append(None) + # _download is mocked so there will be no http request; passing a None protocol + agent = GuestAgent.from_agent_package(pkg, None, False) + + self.assertEqual(1, agent.error.failure_count) + self.assertTrue(agent.error.was_fatal) + self.assertTrue(agent.is_blacklisted) + self.assertEqual(0, mock_download.call_count) + + +class TestGuestAgentError(UpdateTestCase): + def test_creation(self): + self.assertRaises(TypeError, GuestAgentError) + self.assertRaises(UpdateError, GuestAgentError, None) + + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) + self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) + self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) + return + + def test_clear(self): + with self.get_error_file(error_data=WITH_ERROR) as path: + err = GuestAgentError(path.name) + err.load() + self.assertEqual(path.name, err.path) + self.assertNotEqual(None, err) + + err.clear() + self.assertEqual(NO_ERROR["last_failure"], err.last_failure) + self.assertEqual(NO_ERROR["failure_count"], err.failure_count) + self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) + return + + def test_save(self): + err1 = self.create_error() + err1.mark_failure() + err1.mark_failure(is_fatal=True) + + err2 = self.create_error(err1.to_json()) + self.assertEqual(err1.last_failure, err2.last_failure) + self.assertEqual(err1.failure_count, err2.failure_count) + self.assertEqual(err1.was_fatal, err2.was_fatal) + + def test_mark_failure(self): + err = self.create_error() + self.assertFalse(err.is_blacklisted) + + for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable + err.mark_failure() + + # Agent failed >= MAX_FAILURE, it should be blacklisted + self.assertTrue(err.is_blacklisted) + self.assertEqual(MAX_FAILURE, err.failure_count) + return + + def test_mark_failure_permanent(self): + err = self.create_error() + + self.assertFalse(err.is_blacklisted) + + # Fatal errors immediately blacklist + err.mark_failure(is_fatal=True) + self.assertTrue(err.is_blacklisted) + self.assertTrue(err.failure_count < MAX_FAILURE) + return + + def test_str(self): + err = self.create_error(error_data=NO_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + NO_ERROR["last_failure"], + NO_ERROR["failure_count"], + NO_ERROR["was_fatal"], + NO_ERROR["reason"]) + self.assertEqual(s, str(err)) + + err = self.create_error(error_data=WITH_ERROR) + s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( + WITH_ERROR["last_failure"], + WITH_ERROR["failure_count"], + WITH_ERROR["was_fatal"], + WITH_ERROR["reason"]) + self.assertEqual(s, str(err)) + return diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index c5a20b5167..8f4ce58f4d 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,6 +3,7 @@ import json +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler from tests.ga.mocks import mock_update_handler @@ -78,21 +79,22 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): update_handler = get_update_handler() update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here exthandlers_handler = ExtHandlersHandler(protocol) - update_handler._report_status(exthandlers_handler) + agent_update_handler = get_agent_update_handler(protocol) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e5f15fbd07..b73ad3db8f 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -20,6 +20,8 @@ from datetime import datetime, timedelta from threading import current_thread +from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ + AGENT_ERROR_FILE, GAUpdateReportState from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -27,7 +29,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.event import EVENTS_DIRECTORY, WALAEventOperation -from azurelinuxagent.common.exception import ProtocolError, UpdateError, HttpError, \ +from azurelinuxagent.common.exception import HttpError, \ ExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler @@ -41,13 +43,12 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, \ - __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue -from azurelinuxagent.ga.update import GuestAgent, GuestAgentError, MAX_FAILURE, AGENT_MANIFEST_FILE, \ - get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, AGENT_ERROR_FILE, ORPHAN_WAIT_INTERVAL, \ +from azurelinuxagent.ga.update import \ + get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, GOAL_STATE_PERIOD_EXTENSIONS_DISABLED, UpdateHandler, \ - READONLY_FILE_GLOBS, ExtensionsSummary, AgentUpgradeType + READONLY_FILE_GLOBS, ExtensionsSummary from tests.ga.mocks import mock_update_handler from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS @@ -56,6 +57,7 @@ from tests.protocol import mockwiredata from tests.protocol.HttpRequestPredicates import HttpRequestPredicates + NO_ERROR = { "last_failure": 0.0, "failure_count": 0, @@ -99,7 +101,7 @@ def faux_logger(): @contextlib.contextmanager -def _get_update_handler(iterations=1, test_data=None, protocol=None): +def _get_update_handler(iterations=1, test_data=None, protocol=None, autoupdate_enabled=True): """ This function returns a mocked version of the UpdateHandler object to be used for testing. It will only run the main loop [iterations] no of times. @@ -110,10 +112,10 @@ def _get_update_handler(iterations=1, test_data=None, protocol=None): with patch.object(HostPluginProtocol, "is_default_channel", False): if protocol is None: with mock_wire_protocol(test_data) as mock_protocol: - with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(mock_protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, mock_protocol else: - with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=True) as update_handler: + with mock_update_handler(protocol, iterations=iterations, autoupdate_enabled=autoupdate_enabled) as update_handler: yield update_handler, protocol @@ -315,302 +317,6 @@ def replicate_agents(self, return dst_v -class TestGuestAgentError(UpdateTestCase): - def test_creation(self): - self.assertRaises(TypeError, GuestAgentError) - self.assertRaises(UpdateError, GuestAgentError, None) - - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - self.assertEqual(WITH_ERROR["last_failure"], err.last_failure) - self.assertEqual(WITH_ERROR["failure_count"], err.failure_count) - self.assertEqual(WITH_ERROR["was_fatal"], err.was_fatal) - return - - def test_clear(self): - with self.get_error_file(error_data=WITH_ERROR) as path: - err = GuestAgentError(path.name) - err.load() - self.assertEqual(path.name, err.path) - self.assertNotEqual(None, err) - - err.clear() - self.assertEqual(NO_ERROR["last_failure"], err.last_failure) - self.assertEqual(NO_ERROR["failure_count"], err.failure_count) - self.assertEqual(NO_ERROR["was_fatal"], err.was_fatal) - return - - def test_save(self): - err1 = self.create_error() - err1.mark_failure() - err1.mark_failure(is_fatal=True) - - err2 = self.create_error(err1.to_json()) - self.assertEqual(err1.last_failure, err2.last_failure) - self.assertEqual(err1.failure_count, err2.failure_count) - self.assertEqual(err1.was_fatal, err2.was_fatal) - - def test_mark_failure(self): - err = self.create_error() - self.assertFalse(err.is_blacklisted) - - for i in range(0, MAX_FAILURE): # pylint: disable=unused-variable - err.mark_failure() - - # Agent failed >= MAX_FAILURE, it should be blacklisted - self.assertTrue(err.is_blacklisted) - self.assertEqual(MAX_FAILURE, err.failure_count) - return - - def test_mark_failure_permanent(self): - err = self.create_error() - - self.assertFalse(err.is_blacklisted) - - # Fatal errors immediately blacklist - err.mark_failure(is_fatal=True) - self.assertTrue(err.is_blacklisted) - self.assertTrue(err.failure_count < MAX_FAILURE) - return - - def test_str(self): - err = self.create_error(error_data=NO_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - NO_ERROR["last_failure"], - NO_ERROR["failure_count"], - NO_ERROR["was_fatal"], - NO_ERROR["reason"]) - self.assertEqual(s, str(err)) - - err = self.create_error(error_data=WITH_ERROR) - s = "Last Failure: {0}, Total Failures: {1}, Fatal: {2}, Reason: {3}".format( - WITH_ERROR["last_failure"], - WITH_ERROR["failure_count"], - WITH_ERROR["was_fatal"], - WITH_ERROR["reason"]) - self.assertEqual(s, str(err)) - return - - -class TestGuestAgent(UpdateTestCase): - def setUp(self): - UpdateTestCase.setUp(self) - self.copy_agents(self._get_agent_file_path()) - self.agent_path = os.path.join(self.tmp_dir, self._get_agent_name()) - - def test_creation(self): - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("A very bad file name") - - with self.assertRaises(UpdateError): - GuestAgent.from_installed_agent("{0}-a.bad.version".format(AGENT_NAME)) - - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertNotEqual(None, agent) - self.assertEqual(self._get_agent_name(), agent.name) - self.assertEqual(self._get_agent_version(), agent.version) - - self.assertEqual(self.agent_path, agent.get_agent_dir()) - - path = os.path.join(self.agent_path, AGENT_MANIFEST_FILE) - self.assertEqual(path, agent.get_agent_manifest_path()) - - self.assertEqual( - os.path.join(self.agent_path, AGENT_ERROR_FILE), - agent.get_agent_error_file()) - - path = ".".join((os.path.join(conf.get_lib_dir(), self._get_agent_name()), "zip")) - self.assertEqual(path, agent.get_agent_pkg_path()) - - self.assertTrue(agent.is_downloaded) - self.assertFalse(agent.is_blacklisted) - self.assertTrue(agent.is_available) - - def test_clear_error(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.mark_failure(is_fatal=True) - - self.assertTrue(agent.error.last_failure > 0.0) - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.clear_error() - self.assertEqual(0.0, agent.error.last_failure) - self.assertEqual(0, agent.error.failure_count) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_available(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - - self.assertTrue(agent.is_available) - agent.mark_failure(is_fatal=True) - self.assertFalse(agent.is_available) - - def test_is_blacklisted(self): - self.expand_agents() - - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertFalse(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(agent.is_blacklisted, agent.error.is_blacklisted) - - def test_is_downloaded(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(agent.is_downloaded) - - def test_mark_failure(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - - agent.mark_failure() - self.assertEqual(1, agent.error.failure_count) - - agent.mark_failure(is_fatal=True) - self.assertEqual(2, agent.error.failure_count) - self.assertTrue(agent.is_blacklisted) - - def test_load_manifest(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - agent._load_manifest() - self.assertEqual(agent.manifest.get_enable_command(), - agent.get_agent_cmd()) - - def test_load_manifest_missing(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - os.remove(agent.get_agent_manifest_path()) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_empty(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - json.dump(EMPTY_MANIFEST, file) - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_manifest_is_malformed(self): - self.expand_agents() - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertTrue(os.path.isfile(agent.get_agent_manifest_path())) - - with open(agent.get_agent_manifest_path(), "w") as file: # pylint: disable=redefined-builtin - file.write("This is not JSON data") - self.assertRaises(UpdateError, agent._load_manifest) - - def test_load_error(self): - agent = GuestAgent.from_installed_agent(self.agent_path) - agent.error = None - - agent._load_error() - self.assertTrue(agent.error is not None) - - def test_download(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri == agent_uri: - response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertTrue(os.path.isdir(agent.get_agent_dir())) - self.assertTrue(agent.is_downloaded) - - def test_download_fail(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return None - - agent_version = self._get_agent_version() - pkg = ExtHandlerPackage(version=str(agent_version)) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with patch("azurelinuxagent.ga.update.add_event") as add_event: - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(os.path.isfile(self.agent_path)) - - messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] - self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) - self.assertIn(str.format('[UpdateError] Unable to download Agent WALinuxAgent-{0}', agent_version), messages[0], "The install error does not include the expected message") - - self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") - - def test_invalid_agent_package_does_not_blacklist_the_agent(self): - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version="9.9.9.9") - pkg.uris.append(agent_uri) - - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") - self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") - - @patch("azurelinuxagent.ga.update.GuestAgent._download") - def test_ensure_download_skips_blacklisted(self, mock_download): - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertEqual(0, mock_download.call_count) - - agent.clear_error() - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(None) - # _download is mocked so there will be no http request; passing a None protocol - agent = GuestAgent.from_agent_package(pkg, None, False) - - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(0, mock_download.call_count) - - class TestUpdate(UpdateTestCase): def setUp(self): UpdateTestCase.setUp(self) @@ -628,8 +334,6 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) def test_creation(self): - self.assertEqual(None, self.update_handler.last_attempt_time) - self.assertEqual(0, len(self.update_handler.agents)) self.assertEqual(None, self.update_handler.child_agent) @@ -853,9 +557,6 @@ def test_get_latest_agent(self): def test_get_latest_agent_excluded(self): self.prepare_agent(AGENT_VERSION) - self.assertFalse(self._test_upgrade_available( - versions=self.agent_versions(), - count=1)) self.assertEqual(None, self.update_handler.get_latest_agent_greater_than_daemon()) def test_get_latest_agent_no_updates(self): @@ -1192,85 +893,6 @@ def test_shutdown_ignores_exceptions(self): except Exception as e: # pylint: disable=unused-variable self.assertTrue(False, "Unexpected exception") # pylint: disable=redundant-unittest-assert - def _test_upgrade_available( - self, - base_version=FlexibleVersion(AGENT_VERSION), - protocol=None, - versions=None, - count=20): - - if protocol is None: - protocol = self._create_protocol(count=count, versions=versions) - - self.update_handler.protocol_util = protocol - self.update_handler._goal_state = protocol.get_goal_state() - self.update_handler._goal_state.extensions_goal_state.is_outdated = False - conf.get_autoupdate_gafamily = Mock(return_value=protocol.family) - - return self.update_handler._download_agent_if_upgrade_available(protocol, base_version=base_version) - - def test_upgrade_available_returns_true_on_first_use(self): - self.assertTrue(self._test_upgrade_available()) - - def test_upgrade_available_handles_missing_family(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" - - with mock_wire_protocol(data_file) as protocol: - self.update_handler.protocol_util = protocol - with patch('azurelinuxagent.common.logger.warn') as mock_logger: - with patch('azurelinuxagent.common.protocol.goal_state.GoalState.fetch_agent_manifest', side_effect=ProtocolError): - self.assertFalse(self.update_handler._download_agent_if_upgrade_available(protocol, base_version=CURRENT_VERSION)) - self.assertEqual(0, mock_logger.call_count) - - def test_upgrade_available_includes_old_agents(self): - self.prepare_agents() - - old_version = self.agent_versions()[-1] - old_count = old_version.version[-1] - - self.replicate_agents(src_v=old_version, count=old_count, increment=-1) - all_count = len(self.agent_versions()) - - self.assertTrue(self._test_upgrade_available(versions=self.agent_versions())) - self.assertEqual(all_count, len(self.update_handler.agents)) - - def test_upgrade_available_purges_old_agents(self): - self.prepare_agents() - agent_count = self.agent_count() - self.assertEqual(20, agent_count) - - agent_versions = self.agent_versions()[:3] - self.assertTrue(self._test_upgrade_available(versions=agent_versions)) - self.assertEqual(len(agent_versions), len(self.update_handler.agents)) - - # Purging always keeps the running agent - if CURRENT_VERSION not in agent_versions: - agent_versions.append(CURRENT_VERSION) - self.assertEqual(agent_versions, self.agent_versions()) - - def test_upgrade_available_skips_if_too_frequent(self): - conf.get_autoupdate_frequency = Mock(return_value=10000) - self.update_handler.last_attempt_time = time.time() - self.assertFalse(self._test_upgrade_available()) - - def test_upgrade_available_skips_when_no_new_versions(self): - self.prepare_agents() - base_version = self.agent_versions()[0] + 1 - self.assertFalse(self._test_upgrade_available(base_version=base_version)) - - def test_upgrade_available_skips_when_no_versions(self): - self.assertFalse(self._test_upgrade_available(protocol=ProtocolMock())) - - def test_upgrade_available_sorts(self): - self.prepare_agents() - self._test_upgrade_available() - - v = FlexibleVersion("100000") - for a in self.update_handler.agents: - self.assertTrue(v > a.version) - v = a.version - def test_write_pid_file(self): for n in range(1112): fileutil.write_file(os.path.join(self.tmp_dir, str(n) + "_waagent.pid"), ustr(n + 1)) @@ -1295,7 +917,7 @@ def test_update_happens_when_extensions_disabled(self): behavior never changes. """ with patch('azurelinuxagent.common.conf.get_extensions_enabled', return_value=False): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True) as download_agent: + with patch('azurelinuxagent.ga.agent_update_handler.AgentUpdateHandler.run') as download_agent: with mock_wire_protocol(DATA_FILE) as protocol: with mock_update_handler(protocol, autoupdate_enabled=True) as update_handler: update_handler.run() @@ -1358,7 +980,7 @@ def match_expected_info(): def test_it_should_recreate_handler_env_on_service_startup(self): iterations = 5 - with _get_update_handler(iterations) as (update_handler, protocol): + with _get_update_handler(iterations, autoupdate_enabled=False) as (update_handler, protocol): update_handler.run(debug=True) expected_handler = self._get_test_ext_handler_instance(protocol) @@ -1375,7 +997,7 @@ def test_it_should_recreate_handler_env_on_service_startup(self): # re-runnning the update handler. Then,ensure that the HandlerEnvironment file is recreated with eventsFolder # flag in HandlerEnvironment.json file. self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1) as (update_handler, protocol): + with _get_update_handler(iterations=1, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) @@ -1573,7 +1195,7 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): @contextlib.contextmanager def _setup_test_for_ext_event_dirs_retention(self): try: - with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT) as (update_handler, protocol): + with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT, autoupdate_enabled=False) as (update_handler, protocol): with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) expected_events_dirs = glob.glob(os.path.join(conf.get_ext_log_dir(), "*", EVENTS_DIRECTORY)) @@ -1623,62 +1245,69 @@ def test_it_should_recreate_extension_event_directories_for_existing_extensions_ def test_it_should_report_update_status_in_status_blob(self): with mock_wire_protocol(DATA_FILE) as protocol: - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.common.logger.warn") as patch_warn: - - protocol.aggregate_status = None - protocol.incarnation = 1 - - def mock_http_put(url, *args, **_): - if HttpRequestPredicates.is_host_plugin_status_request(url): - # Skip reading the HostGA request data as its encoded - return MockHttpResponse(status=500) - protocol.aggregate_status = json.loads(args[0]) - return MockHttpResponse(status=201) - - def update_goal_state_and_run_handler(): - protocol.incarnation += 1 - protocol.mock_wire_data.set_incarnation(protocol.incarnation) - self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1, protocol=protocol) as (update_handler, _): - update_handler.run(debug=True) - self.assertEqual(0, update_handler.get_exit_code(), - "Exit code should be 0; List of all warnings logged by the agent: {0}".format( - patch_warn.call_args_list)) - - protocol.set_http_handlers(http_put_handler=mock_http_put) - - # Case 1: No requested version in GS; updateStatus should not be reported - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") - - # Case 2: Requested version in GS != Current Version; updateStatus should be error - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") - self.assertEqual(update_status['expectedVersion'], "9.9.9.10", "incorrect version reported") - self.assertEqual(update_status['code'], 1, "incorrect code reported") - - # Case 3: Requested version in GS == Current Version; updateStatus should be Success - protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be reported if asked in GS") - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") - self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") - self.assertEqual(update_status['code'], 0, "incorrect code reported") - - # Case 4: Requested version removed in GS; no updateStatus should be reported - protocol.mock_wire_data.reload() - update_goal_state_and_run_handler() - self.assertFalse("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should not be reported if not asked in GS") + with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): + with patch("azurelinuxagent.common.logger.warn") as patch_warn: + + protocol.aggregate_status = None + protocol.incarnation = 1 + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + def update_goal_state_and_run_handler(autoupdate_enabled = True): + protocol.incarnation += 1 + protocol.mock_wire_data.set_incarnation(protocol.incarnation) + self._add_write_permission_to_goal_state_files() + with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): + GAUpdateReportState.report_error_msg = "" + update_handler.run(debug=True) + self.assertEqual(0, update_handler.get_exit_code(), + "Exit code should be 0; List of all warnings logged by the agent: {0}".format( + patch_warn.call_args_list)) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + # Case 1: Requested version removed in GS; report missing requested version errr + protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") + protocol.mock_wire_data.reload() + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['code'], 1, "incorrect code reported") + self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") + + # Case 2: Requested version in GS == Current Version; updateStatus should be Success + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported if asked in GS") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") + self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") + self.assertEqual(update_status['code'], 0, "incorrect code reported") + + # Case 3: Requested version in GS != Current Version; update fail and report error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['expectedVersion'], "5.2.0.1", "incorrect version reported") + self.assertEqual(update_status['code'], 1, "incorrect code reported") def test_it_should_wait_to_fetch_first_goal_state(self): with _get_update_handler() as (update_handler, protocol): @@ -1721,7 +1350,7 @@ def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): else: self.assertFalse(agent.is_blacklisted, "Agent {0} should not be blacklisted".format(agent.name)) - with _get_update_handler() as (update_handler, _): + with _get_update_handler(autoupdate_enabled=False) as (update_handler, _): update_handler.run(debug=True) self.assertEqual(20, self.agent_count(), "All agents should be available on disk") # Ensure none of the agents are blacklisted @@ -1776,11 +1405,6 @@ def _test_run(self, autoupdate_enabled=False, check_daemon_running=False, expect def test_run(self): self._test_run() - def test_run_stops_if_update_available(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', return_value=True): - update_handler = self._test_run(autoupdate_enabled=True) - self.assertEqual(0, update_handler.get_iterations_completed()) - def test_run_stops_if_orphaned(self): with patch('os.getppid', return_value=1): update_handler = self._test_run(check_daemon_running=True) @@ -1791,7 +1415,7 @@ def test_run_clears_sentinel_on_successful_exit(self): self.assertFalse(os.path.isfile(update_handler._sentinel_file_path())) def test_run_leaves_sentinel_on_unsuccessful_exit(self): - with patch('azurelinuxagent.ga.update.UpdateHandler._download_agent_if_upgrade_available', side_effect=Exception): + with patch('azurelinuxagent.ga.agent_update_handler.AgentUpdateHandler.run', side_effect=Exception): update_handler = self._test_run(autoupdate_enabled=True,expected_exit_code=1) self.assertTrue(os.path.isfile(update_handler._sentinel_file_path())) @@ -1803,20 +1427,18 @@ def test_run_emits_restart_event(self): class TestAgentUpgrade(UpdateTestCase): @contextlib.contextmanager - def create_conf_mocks(self, hotfix_frequency, normal_frequency): + def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequency): # Disabling extension processing to speed up tests as this class deals with testing agent upgrades with patch("azurelinuxagent.common.conf.get_extensions_enabled", return_value=False): - with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", - return_value=hotfix_frequency): - with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", - return_value=normal_frequency): + with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): + with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=hotfix_frequency): + with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=normal_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): yield @contextlib.contextmanager - def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1.0, normal_frequency=2.0, - reload_conf=None): + def __get_update_handler(self, iterations=1, test_data=None, + reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0): test_data = DATA_FILE if test_data is None else test_data @@ -1842,32 +1464,23 @@ def put_handler(url, *args, **_): return MockHttpResponse(status=201) protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - with self.create_conf_mocks(hotfix_frequency, normal_frequency): - with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry: + with self.create_conf_mocks(autoupdate_frequency, hotfix_frequency, normal_frequency): + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: update_handler._protocol = protocol yield update_handler, mock_telemetry def __assert_exit_code_successful(self, update_handler): self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0") - def __assert_upgrade_telemetry_emitted_for_requested_version(self, mock_telemetry, upgrade=True, version="99999.0.0.0"): + def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Exiting current process to {0} to the request Agent version {1}'.format( + 'Agent update found, Exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent was upgraded. Got: {0}".format( mock_telemetry.call_args_list)) - def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade_type=AgentUpgradeType.Normal): - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - '{0} Agent upgrade discovered, updating to WALinuxAgent-99999.0.0.0 -- exiting'.format( - upgrade_type) in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade] - self.assertEqual(1, len(upgrade_event_msgs), - "Did not find the event indicating that the agent was upgraded. Got: {0}".format( - mock_telemetry.call_args_list)) - def __assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) @@ -1879,11 +1492,6 @@ def __assert_agent_directories_exist_and_others_dont_exist(self, versions): self.assertFalse(any(other_agents), "All other agents should be purged from agent dir: {0}".format(other_agents)) - def __assert_no_agent_upgrade_telemetry(self, mock_telemetry): - self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - "Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "Unwanted upgrade") - def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VERSION)): self.assertIsNotNone(aggregate_status, "Status should be reported") self.assertEqual(aggregate_status['aggregateStatus']['guestAgentStatus']['version'], version, @@ -1892,128 +1500,64 @@ def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VE "Guest Agent should be reported as Ready") def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): - + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file, iterations=10) as (update_handler, mock_telemetry): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertEqual(1, update_handler.get_iterations(), "Update handler should've exited after the first run") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) + self.__assert_agent_directories_available(versions=["9.9.9.10"]) self.__assert_upgrade_telemetry_emitted(mock_telemetry) - def test_it_should_download_new_agents_and_not_auto_upgrade_if_not_permitted(self): + def test_it_should_not_update_agent_if_last_update_time_not_permitted(self): no_of_iterations = 10 data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations/2: - reload_conf.call_count += 1 - # Ensure the first set of versions were downloaded as part of the first manifest - self.__assert_agent_directories_available(versions=["1.0.0", "1.1.0", "1.2.0"]) - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=10, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + data_file['ext_conf'] = "wire/ext_conf_requested_version.xml" + + self.prepare_agents(1) + test_frequency = 10 + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, + autoupdate_frequency=test_frequency) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version("5.2.0.1") + update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") self.__assert_exit_code_successful(update_handler) self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler should've run its course") - # Ensure the new agent versions were also downloaded once the manifest was updated - self.__assert_agent_directories_available(versions=["2.0.0", "2.1.0", "99999.0.0.0"]) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - - def test_it_should_upgrade_agent_in_given_time_window_if_permitted(self): - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= 2: - reload_conf.call_count += 1 - # Ensure no new agent available so far - self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - test_normal_frequency = 0.1 - with self.__get_update_handler(iterations=50, test_data=data_file, reload_conf=reload_conf, - normal_frequency=test_normal_frequency) as (update_handler, mock_telemetry): - start_time = time.time() - update_handler.run(debug=True) - diff = time.time() - start_time - - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertGreaterEqual(update_handler.get_iterations(), 3, - "Update handler should've run at least until the new GA was available") - # A bare-bone check to ensure that the agent waited for the new agent at least for the preset frequency time - self.assertGreater(diff, test_normal_frequency, "The test run should be at least greater than the set frequency") - self.__assert_agent_directories_available(versions=["99999.0.0.0"]) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) + self.assertFalse(os.path.exists(self.agent_dir("5.2.0.1")), + "New agent directory should not be found") def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): - with self.__get_update_handler(iterations=10) as (update_handler, mock_telemetry): + with self.__get_update_handler(iterations=10) as (update_handler, _): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) self.assertGreaterEqual(update_handler.get_iterations(), 10, "Update handler should've run 10 times") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_not_auto_upgrade_if_corresponding_time_not_elapsed(self): - # On Normal upgrade, should not upgrade if Hotfix time elapsed - no_of_iterations = 10 - data_file = DATA_FILE.copy() - data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - - def reload_conf(url, protocol): - mock_wire_data = protocol.mock_wire_data - # This function reloads the conf mid-run to mimic an actual customer scenario - if HttpRequestPredicates.is_ga_manifest_request(url) and mock_wire_data.call_counts["manifest_of_ga.xml"] >= no_of_iterations / 2: - reload_conf.call_count += 1 - # As per our current agent upgrade model, we don't rely on an incarnation update to upgrade the agent. Mocking the same - mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" - mock_wire_data.reload() - - reload_conf.call_count = 0 - - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, hotfix_frequency=0.01, - normal_frequency=10, reload_conf=reload_conf) as (update_handler, mock_telemetry): + def test_it_should_download_only_requested_version_if_available(self): + data_file = mockwiredata.DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) - self.assertGreater(reload_conf.call_count, 0, "Ensure the conf reload was called") - self.__assert_exit_code_successful(update_handler) - self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler didn't run completely") - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - self.assertGreater(len([msg for msg in upgrade_event_msgs if - 'Discovered new {0} upgrade WALinuxAgent-99999.0.0.0; Will upgrade on or after'.format( - AgentUpgradeType.Normal) in msg]), 0, "Error message not propagated properly") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) - def test_it_should_download_only_requested_version_if_available(self): + def test_it_should_download_largest_version_if_ga_versioning_disabled(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): + with patch.object(conf, "get_enable_ga_versioning", return_value=False): update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0"]) def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): data_file = mockwiredata.DATA_FILE.copy() @@ -2024,37 +1568,36 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi self.assertEqual(20, self.agent_count(), "Agent directories not set properly") with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): + self.prepare_agents(1) data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_missing_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_requested_version_missing_in_manifest.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + update_handler.run(debug=True) - self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in agent_msgs if - "Found requested version in manifest: 5.2.1.0 for goal state incarnation_1" in kwarg['message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in agent_msgs if - "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation_1, skipping agent update" in kwarg['message']) - self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"), - "The not found message should be reported from requested agent version") - self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") - - def test_it_should_only_try_downloading_requested_version_on_new_incarnation(self): + self.__assert_exit_code_successful(update_handler) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if + kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] + # This will throw if corresponding message not found so not asserting on that + requested_version_found = next(kwarg for kwarg in agent_msgs if + "Goal state incarnation_1 is requesting a new agent version 5.2.1.0, will update the agent before processing the goal state" in kwarg['message']) + self.assertTrue(requested_version_found['is_success'], + "The requested version found op should be reported as a success") + + skipping_update = next(kwarg for kwarg in agent_msgs if + "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation: incarnation_1, skipping agent update" in kwarg['message']) + self.assertEqual(skipping_update['version'], str(CURRENT_VERSION), + "The not found message should be reported from current agent version") + self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") + + def test_it_should_try_downloading_requested_version_on_new_incarnation(self): no_of_iterations = 1000 # Set the test environment by adding 20 random agents to the agent directory @@ -2069,7 +1612,7 @@ def reload_conf(url, protocol): "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15: # Ensure we didn't try to download any agents except during the incarnation change - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the requested version to "99999.0.0.0" update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0") @@ -2083,23 +1626,21 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.01, hotfix_frequency=0.01) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1, "only 1 agent should've been downloaded - 1 per incarnation") - self.assertEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, + self.assertGreaterEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, "only 1 agent manifest call should've been made - 1 per incarnation") - def test_it_should_fallback_to_old_update_logic_if_requested_version_not_available(self): + def test_it_should_update_to_largest_version_if_requested_version_not_available(self): no_of_iterations = 100 # Set the test environment by adding 20 random agents to the agent directory @@ -2115,7 +1656,7 @@ def reload_conf(url, protocol): reload_conf.call_count += 1 # By this point, the GS with requested version should've been executed. Verify that - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) # Update the ext-conf and incarnation and remove requested versions from GS, # this should download all versions requested in config @@ -2130,20 +1671,96 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + + def test_it_should_not_update_largest_version_if_time_window_not_elapsed(self): + no_of_iterations = 20 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 5: + reload_conf.call_count += 1 + + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) + + # Update the ga_manifest and incarnation to send largest version manifest + mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + # This is to fail the agent update at first attempt so that agent doesn't go through update + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=0.001) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + hotfix_frequency=10, normal_frequency=10) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist( - versions=["1.0.0", "1.1.0", "1.2.0", "2.0.0", "2.1.0", "9.9.9.10", "99999.0.0.0", str(CURRENT_VERSION)]) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + def test_it_should_update_largest_version_if_time_window_elapsed(self): + no_of_iterations = 20 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[ + "goalstate"] >= 5: + reload_conf.call_count += 1 - def test_it_should_not_download_anything_if_requested_version_is_current_version_and_delete_all_agents(self): + self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) + + # Update the ga_manifest and incarnation to send largest version manifest + mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" + mock_wire_data.reload() + self._add_write_permission_to_goal_state_files() + reload_conf.incarnation += 1 + mock_wire_data.set_incarnation(reload_conf.incarnation) + + reload_conf.call_count = 0 + reload_conf.incarnation = 2 + + data_file = mockwiredata.DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, + hotfix_frequency=0.001, normal_frequency=0.001) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + + def test_it_should_not_download_anything_if_requested_version_is_current_version(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" @@ -2151,15 +1768,14 @@ def test_it_should_not_download_anything_if_requested_version_is_current_version self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_handler._protocol.mock_wire_data.set_incarnation(2) - update_handler.run(debug=True) + with self.__get_update_handler(test_data=data_file) as (update_handler, _): + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_no_agent_upgrade_telemetry(mock_telemetry) - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") def test_it_should_skip_wait_to_update_if_requested_version_available(self): no_of_iterations = 100 @@ -2185,18 +1801,18 @@ def reload_conf(url, protocol): data_file = mockwiredata.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, - normal_frequency=10, hotfix_frequency=10) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler.run(debug=True) + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(20) + update_handler.run(debug=True) self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.assertLess(update_handler.get_iterations(), no_of_iterations, "The code should've exited as soon as requested version was found") self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, version="9.9.9.10") + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") - def test_it_should_blacklist_current_agent_on_downgrade(self): + def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): # Create Agent directory for current agent self.prepare_agents(count=1) self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) @@ -2207,53 +1823,19 @@ def test_it_should_blacklist_current_agent_on_downgrade(self): data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) - update_handler._protocol.mock_wire_data.set_incarnation(2) - try: - set_daemon_version("1.0.0.0") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + update_handler._protocol.mock_wire_data.set_incarnation(2) + update_handler.run(debug=True) self.__assert_exit_code_successful(update_handler) - self.__assert_upgrade_telemetry_emitted_for_requested_version(mock_telemetry, upgrade=False, + self.__assert_upgrade_telemetry_emitted(mock_telemetry, upgrade=False, version=downgraded_version) current_agent = next(agent for agent in self.agents() if agent.version == CURRENT_VERSION) self.assertTrue(current_agent.is_blacklisted, "The current agent should be blacklisted") - self.assertEqual(current_agent.error.reason, "Blacklisting the agent {0} since a downgrade was requested in the GoalState, " + self.assertEqual(current_agent.error.reason, "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION), "Invalid reason specified for blacklisting agent") - - def test_it_should_not_downgrade_below_daemon_version(self): - data_file = mockwiredata.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - with patch.object(conf, "get_enable_ga_versioning", return_value=True): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version("1.0.0.0") - update_handler._protocol.mock_wire_data.set_incarnation(2) - - try: - set_daemon_version("1.2.3.4") - update_handler.run(debug=True) - finally: - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - - self.__assert_exit_code_successful(update_handler) - upgrade_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if - kwarg['op'] == WALAEventOperation.AgentUpgrade] - # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in upgrade_msgs if - "Found requested version in manifest: 1.0.0.0 for goal state incarnation_2" in kwarg[ - 'message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") - - skipping_update = next(kwarg for kwarg in upgrade_msgs if - "Can't process the upgrade as the requested version: 1.0.0.0 is < current daemon version: 1.2.3.4" in - kwarg['message']) - self.assertFalse(skipping_update['is_success'], "Failed Event should be reported as a failure") - self.__assert_ga_version_in_status(update_handler._protocol.aggregate_status) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[downgraded_version, str(CURRENT_VERSION)]) @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @@ -2287,12 +1869,13 @@ def iterator(*_, **__): mock_is_running.__get__ = Mock(side_effect=iterator) with patch('azurelinuxagent.ga.exthandlers.get_exthandlers_handler'): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): - with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff - with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): - with patch('time.sleep'): - with patch('sys.exit'): - self.update_handler.run() + with patch('azurelinuxagent.ga.agent_update_handler.get_agent_update_handler'): + with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): + with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff + with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): + with patch('time.sleep'): + with patch('sys.exit'): + self.update_handler.run() def _setup_mock_thread_and_start_test_run(self, mock_thread, is_alive=True, invocations=0): thread = MagicMock() @@ -2622,34 +2205,41 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() # process a goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on the first goal state") self.assertEqual(1, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the first goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on the first goal state") + self.assertEqual(1, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the first goal state") # process the same goal state - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(1, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have not been called on the same goal state") self.assertEqual(2, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on the same goal state") self.assertEqual(1, remote_access_handler.run.call_count, "remote_access_handler.run() should not have been called on the same goal state") + self.assertEqual(2, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the same goal state") # process a new goal state exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) exthandlers_handler.protocol.client.update_goal_state() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(2, exthandlers_handler.run.call_count, "exthandlers_handler.run() should have been called on a new goal state") self.assertEqual(3, exthandlers_handler.report_ext_handlers_status.call_count, "exthandlers_handler.report_ext_handlers_status() should have been called on a new goal state") self.assertEqual(2, remote_access_handler.run.call_count, "remote_access_handler.run() should have been called on a new goal state") + self.assertEqual(3, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the new goal state") def test_it_should_write_the_agent_status_to_the_history_folder(self): with _mock_exthandlers_handler() as exthandlers_handler: update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() + agent_update_handler = Mock() + agent_update_handler.run = Mock() - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) incarnation = exthandlers_handler.protocol.get_goal_state().incarnation matches = glob.glob(os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME, "*_{0}".format(incarnation))) @@ -2850,16 +2440,17 @@ def test_update_handler_should_use_the_initial_goal_state_period_until_the_goal_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.success]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # the goal state converged (the extension succeeded), so we should switch to the regular goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period after the goal state converged") def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_goal_state_does_not_converges(self): @@ -2868,17 +2459,18 @@ def test_update_handler_should_switch_to_the_regular_goal_state_period_when_the_ with patch('azurelinuxagent.common.conf.get_goal_state_period', return_value=goal_state_period): with _mock_exthandlers_handler([ExtensionStatusValue.transitioning, ExtensionStatusValue.transitioning]) as exthandlers_handler: remote_access_handler = Mock() + agent_update_handler = Mock() update_handler = _create_update_handler() self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period") # the extension is transisioning, so we should still be using the initial goal state period - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(initial_goal_state_period, update_handler._goal_state_period, "Expected the initial goal state period when the extension is transitioning") # a new goal state arrives before the current goal state converged (the extension is transitioning), so we should switch to the regular goal state period exthandlers_handler.protocol.mock_wire_data.set_incarnation(100) - update_handler._process_goal_state(exthandlers_handler, remote_access_handler) + update_handler._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) self.assertEqual(goal_state_period, update_handler._goal_state_period, "Expected the regular goal state period when the goal state does not converge") diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py index 196ed32db8..c3beabf566 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/protocol/mockwiredata.py @@ -460,5 +460,11 @@ def set_manifest_version(self, version): def set_extension_config(self, ext_conf_file): self.ext_conf = load_data(ext_conf_file) + def set_ga_manifest(self, ga_manifest): + self.ga_manifest = load_data(ga_manifest) + def set_extension_config_requested_version(self, version): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "Version", version) + + def set_ga_manifest_version_version(self, version): + self.ga_manifest = WireProtocolData.replace_xml_element_value(self.ga_manifest, "Version", version) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 2a36fc2913..cc73b0fb45 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -360,41 +360,60 @@ def mock_http_put(url, *args, **__): exthandlers_handler = get_exthandlers_handler(protocol) with patch("azurelinuxagent.common.agent_supported_feature._MultiConfigFeature.is_supported", True): - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") - self.assertIn("supportedFeatures", protocol.aggregate_status, "supported features not reported") - multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) - found = False - for feature in protocol.aggregate_status['supportedFeatures']: - if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: - found = True - break - self.assertTrue(found, "Multi-config name should be present in supportedFeatures") + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", True): + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") + self.assertIn("supportedFeatures", protocol.aggregate_status, "supported features not reported") + multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: + found = True + break + self.assertTrue(found, "Multi-config name should be present in supportedFeatures") + + ga_versioning_feature = get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == ga_versioning_feature.name and feature['Value'] == ga_versioning_feature.version: + found = True + break + self.assertTrue(found, "ga versioning name should be present in supportedFeatures") # Feature should not be reported if not present with patch("azurelinuxagent.common.agent_supported_feature._MultiConfigFeature.is_supported", False): - exthandlers_handler.run() - exthandlers_handler.report_ext_handlers_status() - - self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") - if "supportedFeatures" not in protocol.aggregate_status: - # In the case Multi-config was the only feature available, 'supportedFeatures' should not be - # reported in the status blob as its not supported as of now. - # Asserting no other feature was available to report back to crp - self.assertEqual(0, len(get_agent_supported_features_list_for_crp()), - "supportedFeatures should be available if there are more features") - return - - # If there are other features available, confirm MultiConfig was not reported - multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) - found = False - for feature in protocol.aggregate_status['supportedFeatures']: - if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: - found = True - break - self.assertFalse(found, "Multi-config name should be present in supportedFeatures") + with patch("azurelinuxagent.common.agent_supported_feature._GAVersioningGovernanceFeature.is_supported", False): + + exthandlers_handler.run() + exthandlers_handler.report_ext_handlers_status() + + self.assertIsNotNone(protocol.aggregate_status, "Aggregate status should not be None") + if "supportedFeatures" not in protocol.aggregate_status: + # In the case Multi-config and GA Versioning only features available, 'supportedFeatures' should not be + # reported in the status blob as its not supported as of now. + # Asserting no other feature was available to report back to crp + self.assertEqual(0, len(get_agent_supported_features_list_for_crp()), + "supportedFeatures should be available if there are more features") + return + + # If there are other features available, confirm MultiConfig and GA versioning was not reported + multi_config_feature = get_supported_feature_by_name(SupportedFeatureNames.MultiConfig) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == multi_config_feature.name and feature['Value'] == multi_config_feature.version: + found = True + break + self.assertFalse(found, "Multi-config name should not be present in supportedFeatures") + + ga_versioning_feature = get_supported_feature_by_name(SupportedFeatureNames.GAVersioningGovernance) + found = False + for feature in protocol.aggregate_status['supportedFeatures']: + if feature['Key'] == ga_versioning_feature.name and feature['Value'] == ga_versioning_feature.version: + found = True + break + self.assertFalse(found, "ga versioning name should not be present in supportedFeatures") @patch("azurelinuxagent.common.utils.restutil.http_request") def test_send_encoded_event(self, mock_http_request, *args): @@ -665,7 +684,7 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -688,7 +707,7 @@ def http_get_handler(url, *_, **kwargs): HostPluginProtocol.is_default_channel = False try: - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -725,7 +744,7 @@ def http_get_handler(url, *_, **kwargs): protocol.client.get_host_plugin() protocol.set_http_handlers(http_get_handler=http_get_handler) - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml) @@ -759,7 +778,7 @@ def http_get_handler(url, *_, **kwargs): protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(len(urls), 4, "Unexpected number of HTTP requests: [{0}]".format(urls)) diff --git a/tests/test_agent.py b/tests/test_agent.py index f0f773f059..f5e91405a6 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -51,6 +51,7 @@ Extensions.Enabled = True Extensions.GoalStatePeriod = 6 Extensions.InitialGoalStatePeriod = 6 +GAUpdates.Enabled = True HttpProxy.Host = None HttpProxy.Port = None Lib.Dir = /var/lib/waagent diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 3cd91ba4cb..caa499b341 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -320,8 +320,8 @@ def _setup_node(self, install_test_agent: bool) -> None: command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Adding tests/scripts") - # command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) - # log.info("%s\n%s", command, run_command(command, shell=True)) + command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) log.info("Adding tests/lib") command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) log.info("%s\n%s", command, run_command(command, shell=True)) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 4b0c8f2497..b494ac8e28 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -73,6 +73,7 @@ fi # # Output the initial version of the agent + # python=$(get-agent-python) waagent=$(get-agent-bin-path) @@ -107,22 +108,25 @@ echo "========== Installing Agent ==========" echo "Installing $package as version $version..." unzip.py "$package" "/var/lib/waagent/WALinuxAgent-$version" -# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar, don't have a waagent.conf -# but AutoUpdate defaults to True so there is no need to do anything in that case. -if [[ -e /etc/waagent.conf ]]; then - sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' /etc/waagent.conf -fi +python=$(get-agent-python) +# Ensure that AutoUpdate is enabled. some distros, e.g. Flatcar have a waagent.conf in different path +waagent_conf_path=$($python -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') +echo "Agent's conf path: $waagent_conf_path" +sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" +# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. +# But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. +sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path" # # Restart the service # echo "Restarting service..." -service-stop $service_name +agent-service stop # Rename the previous log to ensure the new log starts with the agent we just installed mv /var/log/waagent.log /var/log/waagent."$(date --iso-8601=seconds)".log -service-start $service_name +agent-service start # # Verify that the new agent is running and output its status. @@ -134,7 +138,7 @@ check-version() { # We need to wait for the extension handler to start, give it a couple of minutes for i in {1..12} do - if $python "$waagent" --version | grep -E "Goal state agent:\s+$version" > /dev/null; then + if waagent-version | grep -E "Goal state agent:\s+$version" > /dev/null; then return 0 fi sleep 10 @@ -157,6 +161,6 @@ printf "\n" echo "========== Final Status ==========" $python "$waagent" --version printf "\n" -service-status $service_name +agent-service status exit $exit_code diff --git a/tests_e2e/orchestrator/scripts/waagent-version b/tests_e2e/orchestrator/scripts/waagent-version new file mode 100755 index 0000000000..842ae91d29 --- /dev/null +++ b/tests_e2e/orchestrator/scripts/waagent-version @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# returns the version of the agent +# +set -euo pipefail + +python=$(get-agent-python) +waagent=$(get-agent-bin-path) +$python "$waagent" --version \ No newline at end of file diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 9a1cd0e4e7..21d36d0b33 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,8 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned - + default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml new file mode 100644 index 0000000000..77a0144d57 --- /dev/null +++ b/tests_e2e/test_suites/agent_update.yml @@ -0,0 +1,6 @@ +name: "AgentUpdate" +tests: + - "agent_update/rsm_update.py" +images: "endorsed" +location: "eastus2euap" +owns_vm: true \ No newline at end of file diff --git a/tests_e2e/tests/agent_update/__init__.py b/tests_e2e/tests/agent_update/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py new file mode 100644 index 0000000000..cfa1a7d18b --- /dev/null +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# BVT for the agent update scenario +# +# The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. + # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. +# +import json +from typing import List, Dict, Any + +import requests +from azure.identity import DefaultAzureCredential +from azure.mgmt.compute.models import VirtualMachine +from msrestazure.azure_cloud import Cloud + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient + + +class RsmUpdateBvt(AgentTest): + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = SshClient( + ip_address=self._context.vm_ip_address, + username=self._context.username, + private_key_file=self._context.private_key_file) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # This is expected as we validate the downgrade scenario + # + # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted + # + { + 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + } + + ] + return ignore_rules + + def run(self) -> None: + # Allow agent to send supported feature flag + self._verify_agent_reported_supported_feature_flag() + + log.info("*******Verifying the Agent Downgrade scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + downgrade_version: str = "1.3.0.0" + log.info("Attempting downgrade version %s", downgrade_version) + self._request_rsm_update(downgrade_version) + self._check_rsm_gs(downgrade_version) + self._prepare_agent() + + # Verify downgrade scenario + self._verify_guest_agent_update(downgrade_version) + + # Verify upgrade scenario + log.info("*******Verifying the Agent Upgrade scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + upgrade_version: str = "1.3.1.0" + log.info("Attempting upgrade version %s", upgrade_version) + self._request_rsm_update(upgrade_version) + self._check_rsm_gs(upgrade_version) + self._verify_guest_agent_update(upgrade_version) + + # verify no version update. There is bug in CRP and will enable once it's fixed + log.info("*******Verifying the no version update scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm is \n%s", stdout) + version: str = "1.3.1.0" + log.info("Attempting update version same as current version %s", upgrade_version) + self._request_rsm_update(version) + self._verify_guest_agent_update(version) + + def _check_rsm_gs(self, requested_version: str) -> None: + # This checks if RSM GS available to the agent after we mock the rsm update request + output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) + log.info('Verifying requested version GS available to the agent \n%s', output) + + def _prepare_agent(self) -> None: + """ + This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags + 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. + 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. + """ + output = self._ssh_client.run_command("modify-agent-version-config", use_sudo=True) + log.info('Updating agent update required config \n%s', output) + + @staticmethod + def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: + result: VirtualMachine = vm.get_description() + flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates + if flag is None: + return False + return flag + + def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None: + osprofile = { + "location": self._context.vm.location, # location is required field + "properties": { + "osProfile": { + "linuxConfiguration": { + "enableVMAgentPlatformUpdates": True + } + } + } + } + vm.update(osprofile) + + def _request_rsm_update(self, requested_version: str) -> None: + """ + This method is to simulate the rsm request. + First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api + """ + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + if not self._verify_agent_update_flag_enabled(vm): + # enable the flag + log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") + self._enable_agent_update_flag(vm) + log.info("Set the enableVMAgentPlatformUpdates flag to True") + else: + log.info("Already enableVMAgentPlatformUpdates flag set to True") + + cloud: Cloud = AZURE_CLOUDS[self._context.vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + token = credential.get_token(cloud.endpoints.resource_manager + "/.default") + headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} + # Later this api call will be replaced by azure-python-sdk wrapper + base_url = cloud.endpoints.resource_manager + url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ + "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) + data = { + "target": "Microsoft.OSTCLinuxAgent.Test", + "targetVersion": requested_version + } + + response = requests.post(url, data=json.dumps(data), headers=headers) + if response.status_code == 202: + log.info("RSM upgrade request accepted") + else: + raise Exception("Error occurred while RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content)) + + def _verify_guest_agent_update(self, requested_version: str) -> None: + """ + Verify current agent version running on rsm requested version + """ + def _check_agent_version(requested_version: str) -> bool: + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + expected_version = f"Goal state agent: {requested_version}" + if expected_version in stdout: + return True + else: + raise Exception("Guest agent didn't update to requested version {0} but found \n {1}. \n " + "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, stdout)) + + log.info("Verifying agent updated to requested version") + retry_if_false(lambda: _check_agent_version(requested_version)) + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") + + def _verify_agent_reported_supported_feature_flag(self): + """ + RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log + """ + + log.info("Verifying agent reported supported feature flag") + self._ssh_client.run_command("verify_agent_supported_feature.py", use_sudo=True) + log.info("Agent reported VersioningGovernance supported feature flag") + + +if __name__ == "__main__": + RsmUpdateBvt.run_from_command_line() diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index e399efdda5..31ce94cb20 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -57,3 +57,23 @@ def retry_ssh_run(operation: Callable[[], Any]) -> Any: raise log.warning("The operation failed, retrying in 30 secs.\n%s", e) time.sleep(30) + + +def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: + """ + This method attempts the given operation retrying a few times + (after a short delay) + Note: Method used for operations which are return True or False + """ + found: bool = False + while attempts > 0 and not found: + attempts -= 1 + try: + found = operation() + except Exception: + if attempts == 0: + raise + if not found: + log.info(f"Current execution didn't find it, retrying in {duration} secs.") + time.sleep(duration) + return found diff --git a/tests_e2e/tests/scripts/modify-agent-version-config b/tests_e2e/tests/scripts/modify-agent-version-config new file mode 100755 index 0000000000..f121e6f4b0 --- /dev/null +++ b/tests_e2e/tests/scripts/modify-agent-version-config @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script to update necessary flags to make agent ready for rsm updates +# +set -euo pipefail + +PYTHON=$(get-agent-python) +echo "Agent's Python: $PYTHON" +# some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. +version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') +version_file_full_path="$version_file_dir/version.py" +sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path +waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') +sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path" +sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path" +echo "Restarting service..." +agent-service restart \ No newline at end of file diff --git a/tests_e2e/tests/scripts/verify_agent_supported_feature.py b/tests_e2e/tests/scripts/verify_agent_supported_feature.py new file mode 100755 index 0000000000..b6c1307e2d --- /dev/null +++ b/tests_e2e/tests/scripts/verify_agent_supported_feature.py @@ -0,0 +1,53 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify if the agent reported supportedfeature VersioningGovernance flag to CRP via status file +# +import glob +import json +import logging +import sys + +from tests_e2e.tests.lib.retry import retry_if_false + + +def check_agent_supports_versioning() -> bool: + agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" + file_paths = glob.glob(agent_status_file, recursive=True) + for file in file_paths: + with open(file, 'r') as f: + data = json.load(f) + logging.info("Agent status file is %s and it's content %s", file, data) + status = data["__status__"] + supported_features = status["supportedFeatures"] + for supported_feature in supported_features: + if supported_feature["Key"] == "VersioningGovernance": + return True + return False + + +try: + found: bool = retry_if_false(check_agent_supports_versioning) + if not found: + raise Exception("Agent failed to report supported feature flag, so skipping agent update validations") + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py new file mode 100755 index 0000000000..5905a5fe1d --- /dev/null +++ b/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py @@ -0,0 +1,74 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify the latest goal state included rsm requested version and if not, retry +# +import argparse +import sys +import logging + +from azurelinuxagent.common.protocol.util import get_protocol_util +from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties +from azurelinuxagent.common.protocol.wire import WireProtocol +from tests_e2e.tests.lib.retry import retry_if_false + + +def get_requested_version(gs: GoalState) -> str: + agent_families = gs.extensions_goal_state.agent_families + agent_family_manifests = [m for m in agent_families if m.name == "Test" and len(m.uris) > 0] + if len(agent_family_manifests) == 0: + raise Exception( + u"No manifest links found for agent family Test, skipping agent update verification") + manifest = agent_family_manifests[0] + if manifest.is_requested_version_specified and manifest.requested_version is not None: + return str(manifest.requested_version) + return "" + + +def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: str) -> bool: + wire_protocol.client.update_goal_state() + goal_state = wire_protocol.client.get_goal_state() + requested_version = get_requested_version(goal_state) + if requested_version == expected_version: + return True + else: + return False + + +try: + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--version', required=True) + args = parser.parse_args() + + protocol = get_protocol_util().get_protocol(init_goal_state=False) + protocol.client.reset_goal_state( + goal_state_properties=GoalStateProperties.ExtensionsGoalState) + + found: bool = retry_if_false(lambda: verify_rsm_requested_version(protocol, args.version)) + + if not found: + raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version)) + else: + logging.info("Latest GS includes rsm requested version : %s", args.version) + + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) From d34fe4e2ca7abae133f1a4bb5ed5c6e981cb60d3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 14 Jun 2023 13:20:31 -0700 Subject: [PATCH 022/240] Add test for FIPS (#2842) * Add test for FIPS * add test * increase sleep * remove unused file * added comment * check uptime --------- Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/pipeline/pipeline.yml | 2 +- tests_e2e/test_suites/agent_bvt.yml | 6 +- tests_e2e/test_suites/extensions_disabled.yml | 2 +- tests_e2e/test_suites/fips.yml | 10 +++ .../extension_operations.py | 4 +- .../tests/{bvts => agent_bvt}/run_command.py | 0 .../tests/{bvts => agent_bvt}/vm_access.py | 0 tests_e2e/tests/bvts/__init__.py | 0 .../extensions_disabled.py | 0 tests_e2e/tests/fips/fips.py | 88 +++++++++++++++++++ tests_e2e/tests/lib/retry.py | 14 ++- tests_e2e/tests/lib/ssh_client.py | 26 +++--- tests_e2e/tests/lib/virtual_machine_client.py | 61 +++++++++++-- 14 files changed, 184 insertions(+), 31 deletions(-) create mode 100644 tests_e2e/test_suites/fips.yml rename tests_e2e/tests/{bvts => agent_bvt}/extension_operations.py (98%) rename tests_e2e/tests/{bvts => agent_bvt}/run_command.py (100%) rename tests_e2e/tests/{bvts => agent_bvt}/vm_access.py (100%) delete mode 100644 tests_e2e/tests/bvts/__init__.py rename tests_e2e/tests/{ => extensions_disabled}/extensions_disabled.py (100%) create mode 100755 tests_e2e/tests/fips/fips.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index f0caabdac6..661472e8b6 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 21d36d0b33..d5d3eaf6d1 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -9,7 +9,7 @@ parameters: - name: test_suites displayName: Test Suites type: string - default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update + default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if diff --git a/tests_e2e/test_suites/agent_bvt.yml b/tests_e2e/test_suites/agent_bvt.yml index 1f0f91405f..8c840670fd 100644 --- a/tests_e2e/test_suites/agent_bvt.yml +++ b/tests_e2e/test_suites/agent_bvt.yml @@ -1,8 +1,8 @@ name: "AgentBvt" tests: - - "bvts/extension_operations.py" - - "bvts/run_command.py" - - "bvts/vm_access.py" + - "agent_bvt/extension_operations.py" + - "agent_bvt/run_command.py" + - "agent_bvt/vm_access.py" images: - "endorsed" - "endorsed-arm64" diff --git a/tests_e2e/test_suites/extensions_disabled.yml b/tests_e2e/test_suites/extensions_disabled.yml index 3fbff2ebde..1e98dd9cc7 100644 --- a/tests_e2e/test_suites/extensions_disabled.yml +++ b/tests_e2e/test_suites/extensions_disabled.yml @@ -4,6 +4,6 @@ # name: "ExtensionsDisabled" tests: - - "extensions_disabled.py" + - "extensions_disabled/extensions_disabled.py" images: "random(endorsed)" owns_vm: true diff --git a/tests_e2e/test_suites/fips.yml b/tests_e2e/test_suites/fips.yml new file mode 100644 index 0000000000..785671d0c1 --- /dev/null +++ b/tests_e2e/test_suites/fips.yml @@ -0,0 +1,10 @@ +# +# FIPS should not affect extension processing. The test enables FIPS and then executes an extension. +# +# NOTE: Enabling FIPS is very specific to the distro. This test is only executed on RHEL 9.0. +# +name: "FIPS" +tests: + - source: "fips/fips.py" +images: "rhel_90" +owns_vm: true diff --git a/tests_e2e/tests/bvts/extension_operations.py b/tests_e2e/tests/agent_bvt/extension_operations.py similarity index 98% rename from tests_e2e/tests/bvts/extension_operations.py rename to tests_e2e/tests/agent_bvt/extension_operations.py index 0815728740..e5c607c1d1 100755 --- a/tests_e2e/tests/bvts/extension_operations.py +++ b/tests_e2e/tests/agent_bvt/extension_operations.py @@ -58,7 +58,7 @@ def run(self): log.info("Installing %s", custom_script_2_0) message = f"Hello {uuid.uuid4()}!" custom_script_2_0.enable( - settings={ + protected_settings={ 'commandToExecute': f"echo \'{message}\'" }, auto_upgrade_minor_version=False @@ -77,7 +77,7 @@ def run(self): message = f"Hello {uuid.uuid4()}!" custom_script_2_1.enable( - settings={ + protected_settings={ 'commandToExecute': f"echo \'{message}\'" } ) diff --git a/tests_e2e/tests/bvts/run_command.py b/tests_e2e/tests/agent_bvt/run_command.py similarity index 100% rename from tests_e2e/tests/bvts/run_command.py rename to tests_e2e/tests/agent_bvt/run_command.py diff --git a/tests_e2e/tests/bvts/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py similarity index 100% rename from tests_e2e/tests/bvts/vm_access.py rename to tests_e2e/tests/agent_bvt/vm_access.py diff --git a/tests_e2e/tests/bvts/__init__.py b/tests_e2e/tests/bvts/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests_e2e/tests/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py similarity index 100% rename from tests_e2e/tests/extensions_disabled.py rename to tests_e2e/tests/extensions_disabled/extensions_disabled.py diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py new file mode 100755 index 0000000000..f8c27b900b --- /dev/null +++ b/tests_e2e/tests/fips/fips.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import uuid +from assertpy import fail +from typing import Any, Dict, List + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.identifiers import VmExtensionIds + + +class Fips(AgentTest): + """ + Enables FIPS on the test VM, which is a RHEL 9 VM (see https://access.redhat.com/solutions/137833#rhel9), then executes the CustomScript extension. + + TODO: Investigate whether extensions with protected settings are supported on FIPS-enabled systems. The Agent has issues handling the tenant + certificate on those systems (additional configuration on FIPS may be needed). + """ + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + try: + command = "fips-mode-setup --enable" + log.info("Enabling FIPS on the test VM [%s]", command) + output = ssh_client.run_command(command, use_sudo=True) + log.info("Enable FIPS completed\n%s", output) + except CommandError as e: + raise Exception(f"Failed to enable FIPS: {e}") + + log.info("Restarting test VM") + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + vm.restart(wait_for_boot=True, ssh_client=ssh_client) + + try: + command = "fips-mode-setup --check" + log.info("Verifying that FIPS is enabled [%s]", command) + output = ssh_client.run_command(command).rstrip() + if output != "FIPS mode is enabled.": + fail(f"FIPS i not enabled - '{command}' returned '{output}'") + log.info(output) + except CommandError as e: + raise Exception(f"Failed to verify that FIPS is enabled: {e}") + + custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + + log.info("Installing %s", custom_script) + message = f"Hello {uuid.uuid4()}!" + custom_script.enable( + settings={ + 'commandToExecute': f"echo \'{message}\'" + }, + auto_upgrade_minor_version=False + ) + custom_script.assert_instance_view(expected_version="2.0", expected_message=message) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + """ + Some extensions added by policy on the test subscription use protected settings, which produce this error. + """ + return [ + {'message': r'Failed to decrypt /var/lib/waagent/Certificates.p7m'} + ] + + +if __name__ == "__main__": + Fips.run_from_command_line() + diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 31ce94cb20..3366aecdc9 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -40,24 +40,22 @@ def execute_with_retry(operation: Callable[[], Any]) -> Any: time.sleep(30) -def retry_ssh_run(operation: Callable[[], Any]) -> Any: +def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: int) -> Any: """ This method attempts to retry ssh run command a few times if operation failed with connection time out """ - attempts = 3 - while attempts > 0: - attempts -= 1 + i = 1 + while i <= attempts: try: return operation() except Exception as e: # We raise CommandError on !=0 exit codes in the called method if isinstance(e, CommandError): # Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError - if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member + if e.exit_code != 255 or i == attempts: # pylint: disable=no-member raise - log.warning("The operation failed, retrying in 30 secs.\n%s", e) - time.sleep(30) - + log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", e, attempt_delay, i, attempts) + time.sleep(attempt_delay) def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: """ diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index fda9911d92..3e0d7269c3 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -23,6 +23,9 @@ from tests_e2e.tests.lib import shell from tests_e2e.tests.lib.retry import retry_ssh_run +ATTEMPTS: int = 3 +ATTEMPT_DELAY: int = 30 + class SshClient(object): def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22): @@ -31,7 +34,7 @@ def __init__(self, ip_address: str, username: str, private_key_file: Path, port: self._private_key_file: Path = private_key_file self._port: int = port - def run_command(self, command: str, use_sudo: bool = False) -> str: + def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> str: """ Executes the given command over SSH and returns its stdout. If the command returns a non-zero exit code, the function raises a CommandError. @@ -44,9 +47,12 @@ def run_command(self, command: str, use_sudo: bool = False) -> str: # Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there. # Note, too, that when using sudo we need to carry over the value of PATH to the sudo session sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else '' - return retry_ssh_run(lambda: shell.run_command([ - "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, destination, - f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}"])) + command = [ + "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, + destination, + f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}" + ] + return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay) @staticmethod def generate_ssh_key(private_key_file: Path): @@ -59,19 +65,19 @@ def generate_ssh_key(private_key_file: Path): def get_architecture(self): return self.run_command("uname -m").rstrip() - def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False) -> None: + def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ File copy to a remote node """ - self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive) + self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay) - def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False) -> None: + def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ File copy from a remote node """ - self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive) + self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay) - def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool) -> None: + def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool, attempts: int, attempt_delay: int) -> None: if remote_source: source = f"{self._username}@{self._ip_address}:{source}" if remote_target: @@ -82,4 +88,4 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: command.append("-r") command.extend([str(source), str(target)]) - shell.run_command(command) + return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index f7e67a8236..38d35aee52 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -19,6 +19,9 @@ # This module includes facilities to execute operations on virtual machines (list extensions, restart, etc). # +import datetime +import json +import time from typing import Any, Dict, List from azure.identity import DefaultAzureCredential @@ -32,6 +35,8 @@ from tests_e2e.tests.lib.identifiers import VmIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient class VirtualMachineClient(AzureClient): @@ -54,11 +59,11 @@ def __init__(self, vm: VmIdentifier): base_url=cloud.endpoints.resource_manager, credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - def get_description(self) -> VirtualMachine: + def get_model(self) -> VirtualMachine: """ - Retrieves the description of the virtual machine. + Retrieves the model of the virtual machine. """ - log.info("Retrieving description for %s", self._identifier) + log.info("Retrieving VM model for %s", self._identifier) return execute_with_retry( lambda: self._compute_client.virtual_machines.get( resource_group_name=self._identifier.resource_group, @@ -103,10 +108,25 @@ def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT operation_name=f"Update {self._identifier}", timeout=timeout) - def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def restart( + self, + wait_for_boot, + ssh_client: SshClient = None, + boot_timeout: datetime.timedelta = datetime.timedelta(minutes=5), + timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: """ - Restarts the virtual machine or scale set + Restarts (reboots) the virtual machine. + + NOTES: + * If wait_for_boot is True, an SshClient must be provided in order to verify that the restart was successful. + * 'timeout' is the timeout for the restart operation itself, while 'boot_timeout' is the timeout for waiting + the boot to complete. """ + if wait_for_boot and ssh_client is None: + raise ValueError("An SshClient must be provided if wait_for_boot is True") + + before_restart = datetime.datetime.utcnow() + self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_restart( resource_group_name=self._identifier.resource_group, @@ -114,6 +134,37 @@ def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: operation_name=f"Restart {self._identifier}", timeout=timeout) + if not wait_for_boot: + return + + start = datetime.datetime.utcnow() + while datetime.datetime.utcnow() < start + boot_timeout: + log.info("Waiting for VM %s to boot", self._identifier) + time.sleep(15) # Note that we always sleep at least 1 time, to give the reboot time to start + instance_view = self.get_instance_view() + power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code] + if len(power_state) != 1: + raise Exception(f"Could not find PowerState in the instance view statuses:\n{json.dumps(instance_view.statuses)}") + log.info("VM's Power State: %s", power_state[0]) + if power_state[0] == "PowerState/running": + # We may get an instance view captured before the reboot actually happened; verify + # that the reboot actually happened by checking the system's uptime. + log.info("Verifying VM's uptime to ensure the reboot has completed...") + try: + uptime = ssh_client.run_command("cat /proc/uptime | sed 's/ .*//'", attempts=1).rstrip() # The uptime is the first field in the file + log.info("Uptime: %s", uptime) + boot_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=float(uptime)) + if boot_time > before_restart: + log.info("VM %s completed boot and is running. Boot time: %s", self._identifier, boot_time) + return + log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) + except CommandError as e: + if e.exit_code == 255 and "Connection refused" in str(e): + log.info("VM %s is not yet accepting SSH connections", self._identifier) + else: + raise + raise Exception(f"VM {self._identifier} did not boot after {boot_timeout}") + def __str__(self): return f"{self._identifier}" From f86c003699404d2a10c1e36a00cfae2a3f720b20 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 14 Jun 2023 16:32:20 -0700 Subject: [PATCH 023/240] Eliminate duplicate list of test suites to run (#2844) * Eliminate duplicate list of test suites to run * fix paths * add agent update --------- Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/pipeline/pipeline.yml | 11 +++++++---- tests_e2e/pipeline/scripts/execute_tests.sh | 9 +++++++-- tests_e2e/test_suites/no_outbound_connections.yml | 6 +++--- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 661472e8b6..b19148ceec 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -49,7 +49,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index d5d3eaf6d1..fdc086a0af 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -5,17 +5,20 @@ # parameters: + # # See the test wiki for a description of the parameters - - name: test_suites - displayName: Test Suites - type: string - default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips + # # NOTES: # * 'image', 'location' and 'vm_size' override any values in the test suites/images definition # files. Those parameters are useful for 1-off tests, like testing a VHD or checking if # an image is supported in a particular location. # * Azure Pipelines do not allow empty string for the parameter value, using "-" instead. # + - name: test_suites + displayName: Test Suites (comma-separated list of test suites to run) + type: string + default: "-" + - name: image displayName: Image (image/image set name, URN, or VHD) type: string diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index 15c9f0b5f6..2a76244920 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -39,6 +39,11 @@ docker pull waagenttests.azurecr.io/waagenttests:latest # Azure Pipelines does not allow an empty string as the value for a pipeline parameter; instead we use "-" to indicate # an empty value. Change "-" to "" for the variables that capture the parameter values. +if [[ $TEST_SUITES == "-" ]]; then + TEST_SUITES="" # Don't set the test_suites variable +else + TEST_SUITES="-v test_suites:\"$TEST_SUITES\"" +fi if [[ $IMAGE == "-" ]]; then IMAGE="" fi @@ -69,13 +74,13 @@ docker run --rm \ -v cloud:$CLOUD \ -v subscription_id:$SUBSCRIPTION_ID \ -v identity_file:\$HOME/.ssh/id_rsa \ - -v test_suites:\"$TEST_SUITES\" \ -v log_path:\$HOME/logs \ -v collect_logs:\"$COLLECT_LOGS\" \ -v keep_environment:\"$KEEP_ENVIRONMENT\" \ -v image:\"$IMAGE\" \ -v location:\"$LOCATION\" \ - -v vm_size:\"$VM_SIZE\"" \ + -v vm_size:\"$VM_SIZE\" \ + $TEST_SUITES" \ || echo "exit $?" > /tmp/exit.sh # diff --git a/tests_e2e/test_suites/no_outbound_connections.yml b/tests_e2e/test_suites/no_outbound_connections.yml index eaf6268860..2e2e2d1a84 100644 --- a/tests_e2e/test_suites/no_outbound_connections.yml +++ b/tests_e2e/test_suites/no_outbound_connections.yml @@ -11,9 +11,9 @@ name: "NoOutboundConnections" tests: - source: "no_outbound_connections/check_no_outbound_connections.py" blocks_suite: true # If the NSG is not setup correctly, there is no point in executing the rest of the tests. - - "bvts/extension_operations.py" - - "bvts/run_command.py" - - "bvts/vm_access.py" + - "agent_bvt/extension_operations.py" + - "agent_bvt/run_command.py" + - "agent_bvt/vm_access.py" - "no_outbound_connections/check_fallback_to_hgap.py" images: "random(endorsed)" template: "no_outbound_connections/template.py" From 01bc32064fc047f2ef613e11f129ff2e3bac0fbd Mon Sep 17 00:00:00 2001 From: sebastienb-stormshield Date: Thu, 15 Jun 2023 17:00:34 +0200 Subject: [PATCH 024/240] Port NSBSD system to the latest version of waagent (#2828) * nsbsd: adapt to recent dns.resolver * osutil: Provide a get_root_username function for systems where its not 'root' (like in nsbsd) * nsbsd: tune the configuration filepath * nsbsd: fix lib installation path --------- Co-authored-by: Norberto Arrieta --- azurelinuxagent/agent.py | 2 +- azurelinuxagent/common/osutil/default.py | 3 +++ azurelinuxagent/common/osutil/nsbsd.py | 7 ++++++- config/nsbsd/waagent.conf | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 8c303482e8..af63c068b5 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -104,7 +104,7 @@ def __init__(self, verbose, conf_file_path=None): if os.path.isfile(ext_log_dir): raise Exception("{0} is a file".format(ext_log_dir)) if not os.path.isdir(ext_log_dir): - fileutil.mkdir(ext_log_dir, mode=0o755, owner="root") + fileutil.mkdir(ext_log_dir, mode=0o755, owner=self.osutil.get_root_username()) except Exception as e: logger.error( "Exception occurred while creating extension " diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 9fb97f157f..6430f83ec8 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -374,6 +374,9 @@ def get_userentry(username): except KeyError: return None + def get_root_username(self): + return "root" + def is_sys_user(self, username): """ Check whether use is a system user. diff --git a/azurelinuxagent/common/osutil/nsbsd.py b/azurelinuxagent/common/osutil/nsbsd.py index 016f506f0d..00723aa0b5 100644 --- a/azurelinuxagent/common/osutil/nsbsd.py +++ b/azurelinuxagent/common/osutil/nsbsd.py @@ -28,6 +28,7 @@ class NSBSDOSUtil(FreeBSDOSUtil): def __init__(self): super(NSBSDOSUtil, self).__init__() + self.agent_conf_file_path = '/etc/waagent.conf' if self.resolver is None: # NSBSD doesn't have a system resolver, configure a python one @@ -37,7 +38,7 @@ def __init__(self): except ImportError: raise OSUtilError("Python DNS resolver not available. Cannot proceed!") - self.resolver = dns.resolver.Resolver() + self.resolver = dns.resolver.Resolver(configure=False) servers = [] cmd = "getconf /usr/Firewall/ConfigFiles/dns Servers | tail -n +2" ret, output = shellutil.run_get_output(cmd) # pylint: disable=W0612 @@ -47,6 +48,7 @@ def __init__(self): server = server[:-1] # remove last '=' cmd = "grep '{}' /etc/hosts".format(server) + " | awk '{print $1}'" ret, ip = shellutil.run_get_output(cmd) + ip = ip.strip() # Remove new line char servers.append(ip) self.resolver.nameservers = servers dns.resolver.override_system_resolver(self.resolver) @@ -74,6 +76,9 @@ def conf_sshd(self, disable_password): logger.info("{0} SSH password-based authentication methods." .format("Disabled" if disable_password else "Enabled")) + def get_root_username(self): + return "admin" + def useradd(self, username, expiration=None, comment=None): """ Create user account with 'username' diff --git a/config/nsbsd/waagent.conf b/config/nsbsd/waagent.conf index 9d0ce74d8b..d7f6f27595 100644 --- a/config/nsbsd/waagent.conf +++ b/config/nsbsd/waagent.conf @@ -80,7 +80,7 @@ OS.SudoersDir=/usr/local/etc/sudoers.d # DetectScvmmEnv=n # -Lib.Dir=/usr/Firewall/var/waagent +Lib.Dir=/usr/Firewall/lib/waagent # # DVD.MountPoint=/mnt/cdrom/secure From 42e084dcd94d1710121c22a837419e1bc91d5ddf Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 15 Jun 2023 10:06:09 -0700 Subject: [PATCH 025/240] Fix method name in update test (#2845) Co-authored-by: narrieta --- tests_e2e/tests/agent_update/rsm_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index cfa1a7d18b..05112fc19b 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -114,7 +114,7 @@ def _prepare_agent(self) -> None: @staticmethod def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: - result: VirtualMachine = vm.get_description() + result: VirtualMachine = vm.get_model() flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates if flag is None: return False From b0a12e6aae000dc0aca917c500dd2b92c08945bc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 15 Jun 2023 16:32:46 -0700 Subject: [PATCH 026/240] Expose run name as a runbook variable (#2846) Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index b19148ceec..a733b8abd4 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -1,4 +1,4 @@ -name: WALinuxAgent +name: $(name) testcase: - criteria: @@ -8,6 +8,8 @@ extension: - "./lib" variable: + - name: name + value: "WALinuxAgent" # # These variables define parameters handled by LISA. # From 8ddadf87c2f28d4c159c9e79d735283c0564bdc3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 19 Jun 2023 13:39:11 -0700 Subject: [PATCH 027/240] Collect test artifacts as a separate step in the test pipeline (#2848) * Collect test artifacts as a separate step in the test pipeline --------- Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 10 +++ .../pipeline/scripts/collect_artifacts.sh | 69 +++++++++++++++++++ tests_e2e/pipeline/scripts/execute_tests.sh | 49 +------------ 3 files changed, 81 insertions(+), 47 deletions(-) create mode 100755 tests_e2e/pipeline/scripts/collect_artifacts.sh diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index fdc086a0af..468203cd22 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -43,6 +43,11 @@ parameters: - failed - no + - name: collect_lisa_logs + displayName: Collect LISA logs + type: boolean + default: true + - name: keep_environment displayName: Keep the test VMs (do not delete them) type: string @@ -117,6 +122,11 @@ jobs: TEST_SUITES: ${{ parameters.test_suites }} VM_SIZE: ${{ parameters.vm_size }} + - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/collect_artifacts.sh + displayName: "Collect test artifacts" + env: + COLLECT_LISA_LOGS: ${{ parameters.collect_lisa_logs }} + - publish: $(Build.ArtifactStagingDirectory) artifact: 'artifacts' displayName: 'Publish test artifacts' diff --git a/tests_e2e/pipeline/scripts/collect_artifacts.sh b/tests_e2e/pipeline/scripts/collect_artifacts.sh new file mode 100755 index 0000000000..4dc8ae0f51 --- /dev/null +++ b/tests_e2e/pipeline/scripts/collect_artifacts.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Moves the relevant logs to the staging directory +# +set -euxo pipefail + +# +# The execute_test.sh script gives ownership of the log directory to the 'waagent' user in +# the Docker container; re-take ownership +# +sudo find "$LOGS_DIRECTORY" -exec chown "$USER" {} \; + +# +# Move the logs for failed tests to a temporary location +# +mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp +for log in $(grep -l MARKER-LOG-WITH-ERRORS "$LOGS_DIRECTORY"/*.log); do + mv "$log" "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp +done + +# +# Move the environment logs to "environment_logs" +# +if ls "$LOGS_DIRECTORY"/env-*.log > /dev/null 2>&1; then + mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/environment_logs + mv "$LOGS_DIRECTORY"/env-*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/environment_logs +fi + +# +# Move the rest of the logs to "test_logs" +# +if ls "$LOGS_DIRECTORY"/*.log > /dev/null 2>&1; then + mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/test_logs + mv "$LOGS_DIRECTORY"/*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/test_logs +fi + +# +# Move the logs for failed tests to the main directory +# +if ls "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp/*.log > /dev/null 2>&1; then + mv "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp/*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY" +fi +rmdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp + +# +# Move the logs collected from the test VMs to vm_logs +# +if ls "$LOGS_DIRECTORY"/*.tgz > /dev/null 2>&1; then + mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/vm_logs + mv "$LOGS_DIRECTORY"/*.tgz "$BUILD_ARTIFACTSTAGINGDIRECTORY"/vm_logs +fi + +# +# Move the main LISA log and the JUnit report to "runbook_logs" +# +# Note that files created by LISA are under .../lisa//" +# +mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs +mv "$LOGS_DIRECTORY"/lisa/*/*/lisa-*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs +mv "$LOGS_DIRECTORY"/lisa/*/*/agent.junit.xml "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs + +# +# Move the rest of the LISA logs to "lisa_logs" +# +if [[ ${COLLECT_LISA_LOGS,,} == 'true' ]]; then # case-insensitive comparison + mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/lisa_logs + mv "$LOGS_DIRECTORY"/lisa/*/*/* "$BUILD_ARTIFACTSTAGINGDIRECTORY"/lisa_logs +fi + diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index 2a76244920..a822e2dd94 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -27,6 +27,7 @@ chmod a+w "$BUILD_SOURCESDIRECTORY" # Create the directory where the Docker container will create the test logs and give ownership to 'waagent' # LOGS_DIRECTORY="$HOME/logs" +echo "##vso[task.setvariable variable=logs_directory]$LOGS_DIRECTORY" mkdir "$LOGS_DIRECTORY" sudo chown "$WAAGENT_UID" "$LOGS_DIRECTORY" @@ -54,10 +55,6 @@ if [[ $VM_SIZE == "-" ]]; then VM_SIZE="" fi -# A test failure will cause automation to exit with an error code and we don't want this script to stop so we force the command -# to succeed and capture the exit code to return it at the end of the script. -echo "exit 0" > /tmp/exit.sh - docker run --rm \ --volume "$BUILD_SOURCESDIRECTORY:/home/waagent/WALinuxAgent" \ --volume "$HOME"/ssh:/home/waagent/.ssh \ @@ -80,46 +77,4 @@ docker run --rm \ -v image:\"$IMAGE\" \ -v location:\"$LOCATION\" \ -v vm_size:\"$VM_SIZE\" \ - $TEST_SUITES" \ -|| echo "exit $?" > /tmp/exit.sh - -# -# Re-take ownership of the logs directory -# -sudo find "$LOGS_DIRECTORY" -exec chown "$USER" {} \; - -# -# Move the relevant logs to the staging directory -# -# Move the logs for failed tests to a temporary location -mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp -for log in $(grep -l MARKER-LOG-WITH-ERRORS "$LOGS_DIRECTORY"/*.log); do - mv "$log" "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp -done -# Move the environment logs to "environment_logs" -if ls "$LOGS_DIRECTORY"/env-*.log > /dev/null 2>&1; then - mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/environment_logs - mv "$LOGS_DIRECTORY"/env-*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/environment_logs -fi -# Move the rest of the logs to "test_logs" -if ls "$LOGS_DIRECTORY"/*.log > /dev/null 2>&1; then - mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/test_logs - mv "$LOGS_DIRECTORY"/*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/test_logs -fi -# Move the logs for failed tests to the main directory -if ls "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp/*.log > /dev/null 2>&1; then - mv "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp/*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY" -fi -rmdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/tmp -# Move the logs collected from the test VMs to vm_logs -if ls "$LOGS_DIRECTORY"/*.tgz > /dev/null 2>&1; then - mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/vm_logs - mv "$LOGS_DIRECTORY"/*.tgz "$BUILD_ARTIFACTSTAGINGDIRECTORY"/vm_logs -fi -# Files created by LISA are under .../lisa//" -mkdir "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs -mv "$LOGS_DIRECTORY"/lisa/*/*/lisa-*.log "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs -mv "$LOGS_DIRECTORY"/lisa/*/*/agent.junit.xml "$BUILD_ARTIFACTSTAGINGDIRECTORY"/runbook_logs - -cat /tmp/exit.sh -bash /tmp/exit.sh + $TEST_SUITES" From 2acdf7f58b90c2c5cf7a1be57267d019e54782aa Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 20 Jun 2023 16:42:34 -0700 Subject: [PATCH 028/240] remove agent update test and py27 version from build (#2853) --- .github/workflows/ci_pr.yml | 6 +++--- tests_e2e/orchestrator/runbook.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 4e8b299671..da8519f8a6 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -56,9 +56,9 @@ jobs: fail-fast: false matrix: include: - - - python-version: 2.7 - PYLINTOPTS: "--rcfile=ci/2.7.pylintrc --ignore=tests_e2e,makepkg.py" +# py27 version no longer supported by github build runners. So disabling it until fix is found. +# - python-version: 2.7 +# PYLINTOPTS: "--rcfile=ci/2.7.pylintrc --ignore=tests_e2e,makepkg.py" - python-version: 3.5 PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py" diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index a733b8abd4..a1370bd6e7 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips" - name: cloud value: "AzureCloud" is_case_visible: true From 8c6721057e65b6c47c420214763ae9d8551d85da Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 21 Jun 2023 13:16:34 -0700 Subject: [PATCH 029/240] Fix infinite retry loop in end to end tests (#2855) * Fix infinite retry loop * fix message --------- Co-authored-by: narrieta --- tests_e2e/tests/lib/retry.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 3366aecdc9..79a209ba44 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -44,19 +44,19 @@ def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: in """ This method attempts to retry ssh run command a few times if operation failed with connection time out """ - i = 1 - while i <= attempts: + i = 0 + while True: + i += 1 try: return operation() - except Exception as e: - # We raise CommandError on !=0 exit codes in the called method - if isinstance(e, CommandError): - # Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError - if e.exit_code != 255 or i == attempts: # pylint: disable=no-member - raise - log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", e, attempt_delay, i, attempts) + except CommandError as e: + retryable = e.exit_code == 255 and ("Connection timed out" in e.stderr or "Connection refused" in e.stderr) + if not retryable or i >= attempts: + raise + log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", attempt_delay, i, attempts, e) time.sleep(attempt_delay) + def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: """ This method attempts the given operation retrying a few times From e7d6628c78026141a16fe62e01230728d023630d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 21 Jun 2023 13:16:55 -0700 Subject: [PATCH 030/240] Remove empty "distro" module (#2854) Co-authored-by: narrieta --- azurelinuxagent/distro/__init__.py | 17 ----------------- azurelinuxagent/distro/suse/__init__.py | 17 ----------------- 2 files changed, 34 deletions(-) delete mode 100644 azurelinuxagent/distro/__init__.py delete mode 100644 azurelinuxagent/distro/suse/__init__.py diff --git a/azurelinuxagent/distro/__init__.py b/azurelinuxagent/distro/__init__.py deleted file mode 100644 index de7be33642..0000000000 --- a/azurelinuxagent/distro/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.6+ and Openssl 1.0+ -# - diff --git a/azurelinuxagent/distro/suse/__init__.py b/azurelinuxagent/distro/suse/__init__.py deleted file mode 100644 index de7be33642..0000000000 --- a/azurelinuxagent/distro/suse/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.6+ and Openssl 1.0+ -# - From 33493d04614f6896c3bf87bb039516447c66a773 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 21 Jun 2023 13:33:46 -0700 Subject: [PATCH 031/240] Enable Python 2.7 for unit tests (#2856) * Enable Python 2.7 for unit tests --------- Co-authored-by: narrieta --- .github/workflows/ci_pr.yml | 38 +++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index da8519f8a6..84b3ab68e7 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -8,7 +8,7 @@ on: workflow_dispatch: jobs: - test-legacy-python-versions: + test-python-2_6-and-3_4-versions: strategy: fail-fast: false @@ -50,16 +50,42 @@ jobs: ./ci/nosetests.sh exit $? + test-python-2_7: + + strategy: + fail-fast: false + + name: "Python 2.7 Unit Tests" + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash -l {0} + + env: + NOSEOPTS: "--verbose" + + steps: + - uses: actions/checkout@v3 + + - name: Install Python 2.7 + run: | + apt-get update + apt-get install -y curl bzip2 sudo + curl https://dcrdata.blob.core.windows.net/python/python-2.7.tar.bz2 -o python-2.7.tar.bz2 + sudo tar xjvf python-2.7.tar.bz2 --directory / + + - name: Test with nosetests + run: | + source /home/waagent/virtualenv/python2.7.16/bin/activate + ./ci/nosetests.sh + exit $? + test-current-python-versions: strategy: fail-fast: false matrix: include: -# py27 version no longer supported by github build runners. So disabling it until fix is found. -# - python-version: 2.7 -# PYLINTOPTS: "--rcfile=ci/2.7.pylintrc --ignore=tests_e2e,makepkg.py" - - python-version: 3.5 PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py" @@ -125,4 +151,4 @@ jobs: if: matrix.python-version == 3.9 uses: codecov/codecov-action@v3 with: - file: ./coverage.xml \ No newline at end of file + file: ./coverage.xml From 9d90a2dcd9a9e9ed6b0cd21d66479866629fd72a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 21 Jun 2023 13:40:54 -0700 Subject: [PATCH 032/240] Skip downgrade if requested version below daemon version (#2850) * skip downgrade for agent update * add test * report it in status * address comments * revert change * improved error msg * address comment --- azurelinuxagent/common/exception.py | 9 ++++ azurelinuxagent/ga/agent_update_handler.py | 48 +++++++++++++++------- tests/data/wire/ga_manifest.xml | 3 ++ tests/ga/test_agent_update_handler.py | 26 +++++++++++- tests/ga/test_update.py | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 19 +++++++++ 6 files changed, 90 insertions(+), 17 deletions(-) diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 0484662327..603ed1aa21 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -75,6 +75,15 @@ def __init__(self, msg=None, inner=None): super(AgentNetworkError, self).__init__(msg, inner) +class AgentUpdateError(AgentError): + """ + When agent failed to update. + """ + + def __init__(self, msg=None, inner=None): + super(AgentUpdateError, self).__init__(msg, inner) + + class CGroupsException(AgentError): """ Exception to classify any cgroups related issue. diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 3acb5b14cf..9200075055 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -5,14 +5,14 @@ from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import AgentUpgradeExitException +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.logger import LogLevel from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState @@ -37,8 +37,6 @@ def __init__(self): self.last_attempted_requested_version_update_time = datetime.datetime.min self.last_attempted_hotfix_update_time = datetime.datetime.min self.last_attempted_normal_update_time = datetime.datetime.min - self.last_warning = "" - self.last_warning_time = datetime.datetime.min class AgentUpdateHandler(object): @@ -130,10 +128,10 @@ def __get_agent_family_manifests(self, goal_state): agent_family_manifests.append(m) if not family_found: - raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) + raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) if len(agent_family_manifests) == 0: - raise Exception( + raise AgentUpdateError( u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( self._ga_family, self._gs_id)) return agent_family_manifests[0] @@ -179,7 +177,7 @@ def __get_agent_package_to_download(self, agent_manifest, version): # Found a matching package, only download that one return pkg - raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " + raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " "skipping agent update".format(str(version), self._gs_id)) @staticmethod @@ -245,6 +243,15 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + @staticmethod + def __get_daemon_version_for_update(): + daemon_version = get_daemon_version() + if daemon_version != FlexibleVersion(VERSION_0): + return daemon_version + # We return 0.0.0.0 if daemon version is not specified. In that case, + # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. + return FlexibleVersion("2.2.53") + @staticmethod def __log_event(level, msg, success=True): if level == LogLevel.INFO: @@ -291,11 +298,20 @@ def run(self, goal_state): if warn_msg != "": self.__log_event(LogLevel.WARNING, warn_msg) - msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( - self._gs_id, str(requested_version)) - self.__log_event(LogLevel.INFO, msg) - try: + daemon_version = self.__get_daemon_version_for_update() + if requested_version < daemon_version: + # Don't process the update if the requested version is less than daemon version, + # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with + # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version. + # + raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than " + "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version)) + + msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( + self._gs_id, str(requested_version)) + self.__log_event(LogLevel.INFO, msg) + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) if agent.is_blacklisted or not agent.is_downloaded: @@ -314,9 +330,13 @@ def run(self, goal_state): except Exception as err: if isinstance(err, AgentUpgradeExitException): raise err + elif isinstance(err, AgentUpdateError): + error_msg = ustr(err) + else: + error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) + self.__log_event(LogLevel.WARNING, error_msg, success=False) if "Missing requested version" not in GAUpdateReportState.report_error_msg: - GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False) + GAUpdateReportState.report_error_msg = error_msg def get_vmagent_update_status(self): """ diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml index e12f054916..799e1f111c 100644 --- a/tests/data/wire/ga_manifest.xml +++ b/tests/data/wire/ga_manifest.xml @@ -25,6 +25,9 @@ 2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0 + + 2.5.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.5.0 + 9.9.9.10 diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 5b7800b8c6..9e01d0b6c2 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -80,7 +80,7 @@ def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent package not found. Got: {0}".format( @@ -217,7 +217,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - downgraded_version = "1.2.0" + downgraded_version = "2.5.0" with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) @@ -230,6 +230,28 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c versions=[downgraded_version, str(CURRENT_VERSION)]) self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + def test_it_should_not_downgrade_below_daemon_version(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + downgraded_version = "1.2.0" + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)), + "New agent directory should not be found") + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "The Agent received a request to downgrade to version" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version") + def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index b73ad3db8f..5b4babfd14 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1818,7 +1818,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION))) self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted, "The current agent should not be blacklisted") - downgraded_version = "1.2.0" + downgraded_version = "2.5.0" data_file = mockwiredata.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 05112fc19b..0493efcad1 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -27,6 +27,7 @@ from typing import List, Dict, Any import requests +from assertpy import assert_that from azure.identity import DefaultAzureCredential from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud @@ -96,8 +97,19 @@ def run(self) -> None: version: str = "1.3.1.0" log.info("Attempting update version same as current version %s", upgrade_version) self._request_rsm_update(version) + self._check_rsm_gs(version) self._verify_guest_agent_update(version) + # verify requested version below daemon version + log.info("*******Verifying requested version below daemon version scenario*******") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm before update \n%s", stdout) + version: str = "0.5.0" + log.info("Attempting requested version %s", version) + self._request_rsm_update(version) + self._check_rsm_gs(version) + self._verify_no_guest_agent_update(stdout) + def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) @@ -184,6 +196,13 @@ def _check_agent_version(requested_version: str) -> bool: stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") + def _verify_no_guest_agent_update(self, previous_agent: str) -> None: + """ + verify current agent version is same as previous after update attempt + """ + current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}") + def _verify_agent_reported_supported_feature_flag(self): """ RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log From 51322761d8377d1db7af5767991b0bcd422e26e5 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 23 Jun 2023 17:08:58 -0700 Subject: [PATCH 033/240] update location schema and added skip clouds in suite yml (#2852) * update location schema in suite yml * address comments * . * pylint warn * comment --- .../orchestrator/lib/agent_test_loader.py | 52 ++++++++++++++----- .../lib/agent_test_suite_combinator.py | 12 ++++- tests_e2e/test_suites/agent_update.yml | 7 ++- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index a2576f9b6b..31c6e52a40 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -53,14 +53,16 @@ class TestSuiteInfo(object): tests: List[TestInfo] # Images or image sets (as defined in images.yml) on which the suite must run. images: List[str] - # The location (region) on which the suite must run; if empty, the suite can run on any location - location: str + # The locations (regions) on which the suite must run; if empty, the suite can run on any location + locations: List[str] # Whether this suite must run on its own test VM owns_vm: bool # Whether to install the test Agent on the test VM install_test_agent: bool # Customization for the ARM template used when creating the test VM template: str + # skip test suite if the test not supposed to run on specific clouds + skip_on_clouds: List[str] def __str__(self): return self.name @@ -137,15 +139,24 @@ def _validate(self): if image not in self.images: raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") - # If the suite specifies a location, validate that the images it uses are available in that location - if suite.location != '': + # If the suite specifies a cloud and it's location, validate that location string is start with and then validate that the images it uses are available in that location + for suite_location in suite.locations: + if suite_location.startswith(self.__cloud + ":"): + suite_location = suite_location.split(":")[1] + else: + continue for suite_image in suite.images: for image in self.images[suite_image]: # If the image has a location restriction, validate that it is available on the location the suite must run on if image.locations: locations = image.locations.get(self.__cloud) - if locations is not None and not any(suite.location in l for l in locations): - raise Exception(f"Test suite {suite.name} must be executed in {suite.location}, but <{image.urn}> is not available in that location") + if locations is not None and not any(suite_location in l for l in locations): + raise Exception(f"Test suite {suite.name} must be executed in {suite_location}, but <{image.urn}> is not available in that location") + + # if the suite specifies skip clouds, validate that cloud used in our tests + for suite_skip_cloud in suite.skip_on_clouds: + if suite_skip_cloud not in ["AzureCloud", "AzureChinaCloud", "AzureUSGovernment"]: + raise Exception(f"Invalid cloud {suite_skip_cloud} for in {suite.name}") @staticmethod def _load_test_suites(test_suites: str) -> List[TestSuiteInfo]: @@ -180,7 +191,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: - "bvts/run_command.py" - "bvts/vm_access.py" images: "endorsed" - location: "eastuseaup" + locations: "AzureCloud:eastuseaup" owns_vm: true install_test_agent: true template: "bvts/template.py" @@ -195,8 +206,8 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: * images - A string, or a list of strings, specifying the images on which the test suite must be executed. Each value can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed"). The names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml. - * location - [Optional; string] If given, the test suite must be executed on that location. If not specified, - or set to an empty string, the test suite will be executed in the default location. This is useful + * locations - [Optional; string or list of strings] If given, the test suite must be executed on that cloud location(e.g. "AzureCloud:eastus2euap"). + If not specified, or set to an empty string, the test suite will be executed in the default location. This is useful for test suites that exercise a feature that is enabled only in certain regions. * owns_vm - [Optional; boolean] By default all suites in a test run are executed on the same test VMs; if this value is set to True, new test VMs will be created and will be used exclusively for this test suite. @@ -206,6 +217,9 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: * install_test_agent - [Optional; boolean] By default the setup process installs the test Agent on the test VMs; set this property to False to skip the installation. * template - [Optional; string] If given, the ARM template for the test VM is customized using the given Python module. + * skip_on_clouds - [Optional; string or list of strings] If given, the test suite will be skipped in the specified cloud(e.g. "AzureCloud"). + If not specified, the test suite will be executed in all the clouds that we use. This is useful + if you want to skip a test suite validation in a particular cloud when certain feature is not available in that cloud. """ test_suite: Dict[str, Any] = AgentTestLoader._load_file(description_file) @@ -234,14 +248,28 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: else: test_suite_info.images = images - test_suite_info.location = test_suite.get("location") - if test_suite_info.location is None: - test_suite_info.location = "" + locations = test_suite.get("locations") + if locations is None: + test_suite_info.locations = [] + else: + if isinstance(locations, str): + test_suite_info.locations = [locations] + else: + test_suite_info.locations = locations test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"] test_suite_info.template = test_suite.get("template", "") + skip_on_clouds = test_suite.get("skip_on_clouds") + if skip_on_clouds is not None: + if isinstance(skip_on_clouds, str): + test_suite_info.skip_on_clouds = [skip_on_clouds] + else: + test_suite_info.skip_on_clouds = skip_on_clouds + else: + test_suite_info.skip_on_clouds = [] + return test_suite_info @staticmethod diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 82915dcfb0..5efdf54bf5 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -149,7 +149,11 @@ def create_environment_list(self) -> List[Dict[str, Any]]: runbook_images = self._get_runbook_images(loader) + skip_test_suites: List[str] = [] for suite_info in loader.test_suites: + if self.runbook.cloud in suite_info.skip_on_clouds: + skip_test_suites.append(suite_info.name) + continue if len(runbook_images) > 0: images_info: List[VmImageInfo] = runbook_images else: @@ -216,6 +220,9 @@ def create_environment(c_env_name: str) -> Dict[str, Any]: raise Exception("No VM images were found to execute the test suites.") log: logging.Logger = logging.getLogger("lisa") + if len(skip_test_suites) > 0: + log.info("") + log.info("Test suites skipped on %s:\n\n\t%s\n", self.runbook.cloud, '\n\t'.join(skip_test_suites)) log.info("") log.info("******** Waagent: Test Environments *****") log.info("") @@ -282,8 +289,9 @@ def _get_location(self, suite_info: TestSuiteInfo, image: VmImageInfo) -> str: return self.runbook.location # Then try the suite location, if any. - if suite_info.location != '': - return suite_info.location + for location in suite_info.locations: + if location.startswith(self.runbook.cloud + ":"): + return location.split(":")[1] # If the image has a location restriction, use any location where it is available. # However, if it is not available on any location, skip the image (return None) diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 77a0144d57..865fa89ca5 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -2,5 +2,8 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" images: "endorsed" -location: "eastus2euap" -owns_vm: true \ No newline at end of file +locations: "AzureCloud:eastus2euap" +owns_vm: true +skip_on_clouds: + - "AzureChinaCloud" + - "AzureUSGovernment" \ No newline at end of file From b490692bb63fadb48d333e02c1ca115ebd257791 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 27 Jun 2023 07:38:19 -0700 Subject: [PATCH 034/240] Do not collect LISA logs by default (#2857) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 468203cd22..dff8985da8 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -46,7 +46,7 @@ parameters: - name: collect_lisa_logs displayName: Collect LISA logs type: boolean - default: true + default: false - name: keep_environment displayName: Keep the test VMs (do not delete them) From faebcdbf57e585bcfe0a299d980ffa019d82a4b6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 28 Jun 2023 11:04:14 -0700 Subject: [PATCH 035/240] Add check for noexec on Permission denied errors (#2859) * Add check for noexec on Permission denied errors * remove type annotation --------- Co-authored-by: narrieta --- azurelinuxagent/common/event.py | 1 + .../common/utils/extensionprocessutil.py | 54 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 1f903a9faa..4679608067 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -104,6 +104,7 @@ class WALAEventOperation: InitializeHostPlugin = "InitializeHostPlugin" Log = "Log" LogCollection = "LogCollection" + NoExec = "NoExec" OSInfo = "OSInfo" Partition = "Partition" PersistFirewallRules = "PersistFirewallRules" diff --git a/azurelinuxagent/common/utils/extensionprocessutil.py b/azurelinuxagent/common/utils/extensionprocessutil.py index 137f3aa2dd..c24ebf1946 100644 --- a/azurelinuxagent/common/utils/extensionprocessutil.py +++ b/azurelinuxagent/common/utils/extensionprocessutil.py @@ -22,7 +22,9 @@ import signal import time +from azurelinuxagent.common import conf from azurelinuxagent.common import logger +from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.common.exception import ExtensionErrorCodes, ExtensionOperationError, ExtensionError from azurelinuxagent.common.future import ustr @@ -74,7 +76,7 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c process_output = read_output(stdout, stderr) if timed_out: - if cpu_cgroup is not None:# Report CPUThrottledTime when timeout happens + if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output), code=ExtensionErrorCodes.PluginHandlerScriptTimedout) @@ -82,12 +84,58 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c code=ExtensionErrorCodes.PluginHandlerScriptTimedout) if return_code != 0: - raise ExtensionOperationError("Non-zero exit code: {0}, {1}\n{2}".format(return_code, command, process_output), - code=error_code, exit_code=return_code) + noexec_warning = "" + if return_code == 126: # Permission denied + noexec_path = _check_noexec() + if noexec_path is not None: + noexec_warning = "\nWARNING: {0} is mounted with the noexec flag, which can prevent execution of VM Extensions.".format(noexec_path) + raise ExtensionOperationError( + "Non-zero exit code: {0}, {1}{2}\n{3}".format(return_code, command, noexec_warning, process_output), + code=error_code, + exit_code=return_code) return process_output +# +# Collect a sample of errors while checking for the noexec flag. Consider removing this telemetry after a few releases. +# +_COLLECT_NOEXEC_ERRORS = True + + +def _check_noexec(): + """ + Check if /var is mounted with the noexec flag. + """ + try: + agent_dir = conf.get_lib_dir() + with open('/proc/mounts', 'r') as f: + while True: + line = f.readline() + if line == "": # EOF + break + # The mount point is on the second column, and the flags are on the fourth. e.g. + # + # # grep /var /proc/mounts + # /dev/mapper/rootvg-varlv /var xfs rw,seclabel,noexec,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota 0 0 + # + columns = line.split() + mount_point = columns[1] + flags = columns[3] + if agent_dir.startswith(mount_point) and "noexec" in flags: + message = "The noexec flag is set on {0}. This can prevent extensions from executing.".format(mount_point) + logger.warn(message) + add_event(op=WALAEventOperation.NoExec, is_success=False, message=message) + return mount_point + except Exception as e: + message = "Error while checking the noexec flag: {0}".format(e) + logger.warn(message) + if _COLLECT_NOEXEC_ERRORS: + _COLLECT_NOEXEC_ERRORS = False + add_event(op=WALAEventOperation.NoExec, is_success=False, log_event=False, message="Error while checking the noexec flag: {0}".format(e)) + return None + + SAS_TOKEN_RE = re.compile(r'(https://\S+\?)((sv|st|se|sr|sp|sip|spr|sig)=\S+)+', flags=re.IGNORECASE) From 8bd63636b28a19ca20520332923f8cf8606e4b6d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 29 Jun 2023 15:37:00 -0700 Subject: [PATCH 036/240] Wait for log message in AgentNotProvisioned test (#2861) * Wait for log message in AgentNotProvisioned test * hardcoded value --------- Co-authored-by: narrieta --- .../agent_not_provisioned.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py index 45ff903c39..490fba3b8d 100755 --- a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -44,8 +44,19 @@ def run(self): log.info("Checking the Agent's log to verify that it is disabled.") try: output = ssh_client.run_command(""" - grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \ - grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log + # We need to wait for the agent to start and hit the disable code, give it a few minutes + n=18 + for i in $(seq $n); do + grep -E 'WARNING.*Daemon.*Disabling guest agent in accordance with ovf-env.xml' /var/log/waagent.log || \ + grep -E 'WARNING.*Daemon.*Disabling the guest agent by sleeping forever; to re-enable, remove /var/lib/waagent/disable_agent and restart' /var/log/waagent.log + if [[ $? == 0 ]]; then + exit 0 + fi + echo "Did not find the expected message in the agent's log, retrying after sleeping for a few seconds (attempt $i/$n)..." + sleep 10 + done + echo "Did not find the expected message in the agent's log, giving up." + exit 1 """) log.info("The Agent is disabled, log message: [%s]", output.rstrip()) except CommandError as e: From 077f66d1a0d168769cc565f13ae035446f3d7e9f Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 30 Jun 2023 10:41:28 -0700 Subject: [PATCH 037/240] Always collect logs on end-to-end tests (#2863) * Always collect logs * cleanup --------- Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index dff8985da8..2cf0979575 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -46,7 +46,7 @@ parameters: - name: collect_lisa_logs displayName: Collect LISA logs type: boolean - default: false + default: true - name: keep_environment displayName: Keep the test VMs (do not delete them) @@ -124,6 +124,8 @@ jobs: - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/collect_artifacts.sh displayName: "Collect test artifacts" + # Collect artifacts even if the previous step is cancelled (e.g. timeout) + condition: always() env: COLLECT_LISA_LOGS: ${{ parameters.collect_lisa_logs }} From 2ddd73617c4958e6a8fcda77c97220e036d7d1d3 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 30 Jun 2023 12:30:38 -0700 Subject: [PATCH 038/240] agent publish scenario (#2847) * agent publish * remove vm size * address comments * deamom version fallback * daemon versionfix * address comments * fix pylint error * address comment * added error handling --- azurelinuxagent/common/version.py | 16 ++- azurelinuxagent/ga/agent_update_handler.py | 15 +-- tests/common/test_version.py | 11 +- tests/ga/test_agent_update_handler.py | 10 +- tests/ga/test_extension.py | 3 +- tests/ga/test_report_status.py | 101 ++++++++-------- tests/ga/test_update.py | 18 ++- .../orchestrator/lib/agent_test_loader.py | 14 ++- .../lib/agent_test_suite_combinator.py | 2 +- .../orchestrator/scripts/update-waagent-conf | 29 +++-- tests_e2e/test_suites/agent_publish.yml | 11 ++ tests_e2e/test_suites/agent_update.yml | 2 +- tests_e2e/test_suites/images.yml | 5 - .../tests/agent_publish/agent_publish.py | 100 ++++++++++++++++ tests_e2e/tests/agent_update/__init__.py | 0 tests_e2e/tests/agent_update/rsm_update.py | 61 ++++++---- .../extensions_disabled.py | 2 +- .../scripts/agent_publish-check_update.py | 112 ++++++++++++++++++ ...nfig => agent_update-modify_agent_version} | 12 +- ...ate-verify_agent_reported_update_status.py | 59 +++++++++ ...te-verify_versioning_supported_feature.py} | 0 ...ate.py => agent_update-wait_for_rsm_gs.py} | 0 22 files changed, 456 insertions(+), 127 deletions(-) create mode 100644 tests_e2e/test_suites/agent_publish.yml create mode 100644 tests_e2e/tests/agent_publish/agent_publish.py delete mode 100644 tests_e2e/tests/agent_update/__init__.py create mode 100755 tests_e2e/tests/scripts/agent_publish-check_update.py rename tests_e2e/tests/scripts/{modify-agent-version-config => agent_update-modify_agent_version} (75%) create mode 100755 tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py rename tests_e2e/tests/scripts/{verify_agent_supported_feature.py => agent_update-verify_versioning_supported_feature.py} (100%) rename tests_e2e/tests/scripts/{wait_for_rsm_goal_state.py => agent_update-wait_for_rsm_gs.py} (100%) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index ff9c903b93..08c01b5ceb 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -21,6 +21,7 @@ import sys import azurelinuxagent.common.conf as conf +from azurelinuxagent.common import logger import azurelinuxagent.common.utils.shellutil as shellutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, get_linux_distribution @@ -48,12 +49,21 @@ def get_daemon_version(): The value indicates the version of the daemon that started the current agent process or, if the current process is the daemon, the version of the current process. If the variable is not set (because the agent is < 2.2.53, or the process was not started by the daemon and - the process is not the daemon itself) the function returns "0.0.0.0" + the process is not the daemon itself) the function returns version of agent which started by the python """ if __DAEMON_VERSION_ENV_VARIABLE in os.environ: return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) - return FlexibleVersion("0.0.0.0") - + else: + # The agent process which execute the extensions can have different version(after upgrades) and importing version from that process may provide wrong version for daemon. + # so launching new process with sys.executable python provides the correct version for daemon which preinstalled in the image. + try: + cmd = ["{0}".format(sys.executable), "-c", "\'from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)\'"] + version = shellutil.run_command(cmd) + return FlexibleVersion(version) + except Exception as e: # Make the best effort to get the daemon version, but don't fail the update if we can't. So default to 2.2.53 as env variable is not set < 2.2.53 + logger.warn("Failed to get the daemon version: {0}", ustr(e)) + return FlexibleVersion("2.2.53") + def get_f5_platform(): """ diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 9200075055..8de6cfd81e 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -9,7 +9,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.logger import LogLevel from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource -from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN @@ -231,7 +231,7 @@ def __proceed_with_update(self, requested_version): # In case of an upgrade, we don't need to exclude anything as the daemon will automatically # start the next available highest version which would be the target version prefix = "upgrade" - raise AgentUpgradeExitException("Agent update found, Exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) + raise AgentUpgradeExitException("Agent update found, exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) @staticmethod def __get_available_agents_on_disk(): @@ -243,15 +243,6 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - @staticmethod - def __get_daemon_version_for_update(): - daemon_version = get_daemon_version() - if daemon_version != FlexibleVersion(VERSION_0): - return daemon_version - # We return 0.0.0.0 if daemon version is not specified. In that case, - # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. - return FlexibleVersion("2.2.53") - @staticmethod def __log_event(level, msg, success=True): if level == LogLevel.INFO: @@ -299,7 +290,7 @@ def run(self, goal_state): self.__log_event(LogLevel.WARNING, warn_msg) try: - daemon_version = self.__get_daemon_version_for_update() + daemon_version = get_daemon_version() if requested_version < daemon_version: # Don't process the update if the requested version is less than daemon version, # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with diff --git a/tests/common/test_version.py b/tests/common/test_version.py index ba1fb76720..625c0bcfc3 100644 --- a/tests/common/test_version.py +++ b/tests/common/test_version.py @@ -136,11 +136,12 @@ def test_get_daemon_version_should_return_the_version_that_was_previously_set(se finally: os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - def test_get_daemon_version_should_return_zero_when_the_version_has_not_been_set(self): - self.assertEqual( - FlexibleVersion("0.0.0.0"), get_daemon_version(), - "The daemon version should not be defined. Environment={0}".format(os.environ) - ) + def test_get_daemon_version_from_fallback_when_the_version_has_not_been_set(self): + with patch("azurelinuxagent.common.utils.shellutil.run_command", return_value=FlexibleVersion("2.2.53")): + self.assertEqual( + FlexibleVersion("2.2.53"), get_daemon_version(), + "The daemon version should not be defined. Environment={0}".format(os.environ) + ) class TestCurrentAgentName(AgentTestCase): diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 9e01d0b6c2..49b0dc7628 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -106,7 +106,7 @@ def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_update_to_largest_version_if_time_window_not_elapsed(self): self.prepare_agents(count=1) @@ -142,7 +142,7 @@ def test_it_should_update_to_largest_version_if_time_window_elapsed(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): self.prepare_agents(count=1) @@ -172,7 +172,7 @@ def test_it_should_update_to_largest_version_if_requested_version_not_available( agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): data_file = DATA_FILE.copy() @@ -207,7 +207,7 @@ def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_ agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): data_file = DATA_FILE.copy() @@ -228,7 +228,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) self.__assert_agent_directories_exist_and_others_dont_exist( versions=[downgraded_version, str(CURRENT_VERSION)]) - self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason)) + self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) def test_it_should_not_downgrade_below_daemon_version(self): data_file = DATA_FILE.copy() diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 5309b80566..76dde881d2 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3209,7 +3209,8 @@ def tearDown(self): AgentTestCase.tearDown(self) @patch('time.gmtime', MagicMock(return_value=time.gmtime(0))) - def test_ext_handler_reporting_status_file(self): + @patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("0.0.0.0")) + def test_ext_handler_reporting_status_file(self, _): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: def mock_http_put(url, *args, **_): diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index 8f4ce58f4d..1dcfe33edc 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -3,6 +3,7 @@ import json +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler @@ -35,67 +36,69 @@ def on_new_iteration(iteration): exthandlers_handler = ExtHandlersHandler(protocol) with patch.object(exthandlers_handler, "run", wraps=exthandlers_handler.run) as exthandlers_handler_run: with mock_update_handler(protocol, iterations=2, on_new_iteration=on_new_iteration, exthandlers_handler=exthandlers_handler) as update_handler: - update_handler.run(debug=True) - - self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") - self.assertEqual(2, len(protocol.mock_wire_data.status_blobs), "Status should have been reported for the 2 iterations.") - - # - # Verify that we reported status for the extension in the test data - # - first_status = json.loads(protocol.mock_wire_data.status_blobs[0]) - - handler_aggregate_status = first_status.get('aggregateStatus', {}).get("handlerAggregateStatus") - self.assertIsNotNone(handler_aggregate_status, "Could not find the handlerAggregateStatus") - self.assertEqual(1, len(handler_aggregate_status), "Expected 1 extension status. Got: {0}".format(handler_aggregate_status)) - extension_status = handler_aggregate_status[0] - self.assertEqual("OSTCExtensions.ExampleHandlerLinux", extension_status["handlerName"], "The status does not correspond to the test data") - - # - # Verify that we reported the same status (minus timestamps) in the 2 iterations - # - second_status = json.loads(protocol.mock_wire_data.status_blobs[1]) - - def remove_timestamps(x): - if isinstance(x, list): - for v in x: - remove_timestamps(v) - elif isinstance(x, dict): - for k, v in x.items(): - if k == "timestampUTC": - x[k] = '' - else: + with patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("2.2.53")): + update_handler.run(debug=True) + + self.assertEqual(1, exthandlers_handler_run.call_count, "Extensions should have been executed only once.") + self.assertEqual(2, len(protocol.mock_wire_data.status_blobs), "Status should have been reported for the 2 iterations.") + + # + # Verify that we reported status for the extension in the test data + # + first_status = json.loads(protocol.mock_wire_data.status_blobs[0]) + + handler_aggregate_status = first_status.get('aggregateStatus', {}).get("handlerAggregateStatus") + self.assertIsNotNone(handler_aggregate_status, "Could not find the handlerAggregateStatus") + self.assertEqual(1, len(handler_aggregate_status), "Expected 1 extension status. Got: {0}".format(handler_aggregate_status)) + extension_status = handler_aggregate_status[0] + self.assertEqual("OSTCExtensions.ExampleHandlerLinux", extension_status["handlerName"], "The status does not correspond to the test data") + + # + # Verify that we reported the same status (minus timestamps) in the 2 iterations + # + second_status = json.loads(protocol.mock_wire_data.status_blobs[1]) + + def remove_timestamps(x): + if isinstance(x, list): + for v in x: remove_timestamps(v) + elif isinstance(x, dict): + for k, v in x.items(): + if k == "timestampUTC": + x[k] = '' + else: + remove_timestamps(v) - remove_timestamps(first_status) - remove_timestamps(second_status) + remove_timestamps(first_status) + remove_timestamps(second_status) - self.assertEqual(first_status, second_status) + self.assertEqual(first_status, second_status) def test_report_status_should_log_errors_only_once_per_goal_state(self): with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: - update_handler = get_update_handler() - update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here - exthandlers_handler = ExtHandlersHandler(protocol) - agent_update_handler = get_agent_update_handler(protocol) - update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") + with patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("2.2.53")): + update_handler = get_update_handler() + update_handler._goal_state = protocol.get_goal_state() # these tests skip the initialization of the goal state. so do that here + exthandlers_handler = ExtHandlersHandler(protocol) + agent_update_handler = get_agent_update_handler(protocol) + update_handler._report_status(exthandlers_handler, agent_update_handler) + self.assertEqual(0, logger_warn.call_count, "UpdateHandler._report_status() should not report WARNINGS when there are no errors") - with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() - get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] + with patch("azurelinuxagent.ga.update.ExtensionsSummary.__init__", side_effect=Exception("TEST EXCEPTION")): # simulate an error during _report_status() + get_warnings = lambda: [args[0] for args, _ in logger_warn.call_args_list if "TEST EXCEPTION" in args[0]] - update_handler._report_status(exthandlers_handler, agent_update_handler) - update_handler._report_status(exthandlers_handler, agent_update_handler) - update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) + update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") + self.assertEqual(1, len(get_warnings()), "UpdateHandler._report_status() should report only 1 WARNING when there are multiple errors within the same goal state") - exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) - update_handler._try_update_goal_state(exthandlers_handler.protocol) - update_handler._report_status(exthandlers_handler, agent_update_handler) - self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") + exthandlers_handler.protocol.mock_wire_data.set_incarnation(999) + update_handler._try_update_goal_state(exthandlers_handler.protocol) + update_handler._report_status(exthandlers_handler, agent_update_handler) + self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 5b4babfd14..e342fec7d3 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -43,7 +43,7 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules from azurelinuxagent.common.version import AGENT_PKG_GLOB, AGENT_DIR_GLOB, AGENT_NAME, AGENT_DIR_PATTERN, \ - AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION + AGENT_VERSION, CURRENT_AGENT, CURRENT_VERSION, set_daemon_version, __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE from azurelinuxagent.ga.exthandlers import ExtHandlersHandler, ExtHandlerInstance, HandlerEnvironment, ExtensionStatusValue from azurelinuxagent.ga.update import \ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ @@ -135,11 +135,16 @@ def setUpClass(cls): source = os.path.join(data_dir, "ga", sample_agent_zip) target = os.path.join(UpdateTestCase._agent_zip_dir, test_agent_zip) shutil.copyfile(source, target) + # The update_handler inherently calls agent update handler, which in turn calls daemon version. So now daemon version logic has fallback if env variable is not set. + # The fallback calls popen which is not mocked. So we set the env variable to avoid the fallback. + # This will not change any of the test validations. At the ene of all update test validations, we reset the env variable. + set_daemon_version("1.2.3.4") @classmethod def tearDownClass(cls): super(UpdateTestCase, cls).tearDownClass() shutil.rmtree(UpdateTestCase._test_suite_tmp_dir) + os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) @staticmethod def _get_agent_pkgs(in_dir=None): @@ -328,7 +333,6 @@ def setUp(self): self.update_handler._goal_state = Mock() self.update_handler._goal_state.extensions_goal_state = Mock() self.update_handler._goal_state.extensions_goal_state.source = "Fabric" - # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not reuse # a previous state clear_singleton_instances(ProtocolUtil) @@ -1474,7 +1478,7 @@ def __assert_exit_code_successful(self, update_handler): def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent update found, Exiting current process to {0} to the new Agent version {1}'.format( + 'Agent update found, exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), @@ -2293,9 +2297,11 @@ def test_it_should_clear_the_timestamp_for_the_most_recent_fast_track_goal_state raise Exception("The test setup did not save the Fast Track state") with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): - with mock_wire_protocol(data_file) as protocol: - with mock_update_handler(protocol) as update_handler: - update_handler.run() + with patch("azurelinuxagent.common.version.get_daemon_version", + return_value=FlexibleVersion("2.2.53")): + with mock_wire_protocol(data_file) as protocol: + with mock_update_handler(protocol) as update_handler: + update_handler.run() self.assertEqual(HostPluginProtocol.get_fast_track_timestamp(), timeutil.create_timestamp(datetime.min), "The Fast Track state was not cleared") diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index 31c6e52a40..fbd6cfe8f8 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -130,12 +130,19 @@ def _validate(self): """ Performs some basic validations on the data loaded from the YAML description files """ + def _parse_image(image: str) -> str: + """ + Parses a reference to an image or image set and returns the name of the image or image set + """ + match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) + if match is not None: + return match.group('image_set') + return image + for suite in self.test_suites: # Validate that the images the suite must run on are in images.yml for image in suite.images: - match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) - if match is not None: - image = match.group('image_set') + image = _parse_image(image) if image not in self.images: raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") @@ -146,6 +153,7 @@ def _validate(self): else: continue for suite_image in suite.images: + suite_image = _parse_image(suite_image) for image in self.images[suite_image]: # If the image has a location restriction, validate that it is available on the location the suite must run on if image.locations: diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 5efdf54bf5..be72cc4c70 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -273,7 +273,7 @@ def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> Lis count = 1 matching_images = loader.images[match.group('image_set')].copy() random.shuffle(matching_images) - image_list = matching_images[0:count] + image_list = matching_images[0:int(count)] for i in image_list: unique[i.urn] = i return [v for k, v in unique.items()] diff --git a/tests_e2e/orchestrator/scripts/update-waagent-conf b/tests_e2e/orchestrator/scripts/update-waagent-conf index 13cfd45401..43dadeee27 100755 --- a/tests_e2e/orchestrator/scripts/update-waagent-conf +++ b/tests_e2e/orchestrator/scripts/update-waagent-conf @@ -18,24 +18,31 @@ # # -# Updates waagent.conf with the specified setting and value and restarts the Agent. +# Updates waagent.conf with the specified setting and value(allows multiple) and restarts the Agent. # set -euo pipefail -if [[ $# -ne 2 ]]; then - echo "Usage: update-waagent-conf " +if [[ $# -lt 1 ]]; then + echo "Usage: update-waagent-conf []" exit 1 fi -name=$1 -value=$2 - PYTHON=$(get-agent-python) waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') -echo "Setting $name=$value in $waagent_conf" -sed -i -E "/^$name=/d" "$waagent_conf" -sed -i -E "\$a $name=$value" "$waagent_conf" -updated=$(grep "$name" "$waagent_conf") -echo "Updated value: $updated" +for setting_value in "$@"; do + IFS='=' read -r -a setting_value_array <<< "$setting_value" + name=${setting_value_array[0]} + value=${setting_value_array[1]} + + if [[ -z "$name" || -z "$value" ]]; then + echo "Invalid setting=value: $setting_value" + exit 1 + fi + echo "Setting $name=$value in $waagent_conf" + sed -i -E "/^$name=/d" "$waagent_conf" + sed -i -E "\$a $name=$value" "$waagent_conf" + updated=$(grep "$name" "$waagent_conf") + echo "Updated value: $updated" +done agent-service restart \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_publish.yml b/tests_e2e/test_suites/agent_publish.yml new file mode 100644 index 0000000000..9b855f4ce4 --- /dev/null +++ b/tests_e2e/test_suites/agent_publish.yml @@ -0,0 +1,11 @@ +# +# This test is used to verify that the agent will be updated after publishing a new version to the agent update channel. +# +name: "AgentPublish" +tests: + - "agent_publish/agent_publish.py" +images: + - "random(endorsed, 10)" + - "random(endorsed-arm64, 2)" +locations: "AzureCloud:centraluseuap" +owns_vm: true \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 865fa89ca5..7ef477e00b 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -1,7 +1,7 @@ name: "AgentUpdate" tests: - "agent_update/rsm_update.py" -images: "endorsed" +images: "random(endorsed, 10)" locations: "AzureCloud:eastus2euap" owns_vm: true skip_on_clouds: diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index a19105710b..5440486c25 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -91,8 +91,6 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] - vm_sizes: - - "Standard_D2pls_v5" mariner_1: urn: "microsoftcblmariner cbl-mariner cbl-mariner-1 latest" locations: @@ -101,11 +99,8 @@ images: mariner_2_arm64: urn: "microsoftcblmariner cbl-mariner cbl-mariner-2-arm64 latest" locations: - AzureCloud: ["eastus"] AzureChinaCloud: [] AzureUSGovernment: [] - vm_sizes: - - "Standard_D2pls_v5" rocky_9: urn: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" locations: diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py new file mode 100644 index 0000000000..d476414414 --- /dev/null +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import uuid +from typing import Any, Dict, List + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class AgentPublishTest(AgentTest): + """ + This script verifies if the agent update performed in the vm. + """ + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client: SshClient = self._context.create_ssh_client() + + def run(self): + """ + we run the scenario in the following steps: + 1. Print the current agent version before the update + 2. Prepare the agent for the update + 3. Check for agent update from the log + 4. Print the agent version after the update + 5. Ensure CSE is working + """ + self._get_agent_info() + self._prepare_agent() + self._check_update() + self._get_agent_info() + self._check_cse() + + def _get_agent_info(self) -> None: + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info('Agent info \n%s', stdout) + + def _prepare_agent(self) -> None: + log.info("Modifying agent update related config flags") + output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Updated agent-update related config flags \n%s', output) + + def _check_update(self) -> None: + log.info("Verifying for agent update status") + output = self._ssh_client.run_command("agent_publish-check_update.py") + log.info('Checked the agent update \n%s', output) + + def _check_cse(self) -> None: + custom_script_2_1 = VirtualMachineExtensionClient( + self._context.vm, + VmExtensionIdentifier(VmExtensionIds.CustomScript.publisher, VmExtensionIds.CustomScript.type, "2.1"), + resource_name="CustomScript") + + log.info("Installing %s", custom_script_2_1) + message = f"Hello {uuid.uuid4()}!" + custom_script_2_1.enable( + settings={ + 'commandToExecute': f"echo \'{message}\'" + }, + auto_upgrade_minor_version=False + ) + custom_script_2_1.assert_instance_view(expected_version="2.1", expected_message=message) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # This is expected as latest version can be the less than test version + # + # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted + # + { + 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + } + + ] + return ignore_rules + + +if __name__ == "__main__": + AgentPublishTest.run_from_command_line() diff --git a/tests_e2e/tests/agent_update/__init__.py b/tests_e2e/tests/agent_update/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 0493efcad1..a6a41ec3d0 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -21,7 +21,7 @@ # BVT for the agent update scenario # # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. - # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. +# For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # import json from typing import List, Dict, Any @@ -70,59 +70,71 @@ def run(self) -> None: log.info("*******Verifying the Agent Downgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) downgrade_version: str = "1.3.0.0" log.info("Attempting downgrade version %s", downgrade_version) self._request_rsm_update(downgrade_version) self._check_rsm_gs(downgrade_version) self._prepare_agent() - # Verify downgrade scenario self._verify_guest_agent_update(downgrade_version) + self._verify_agent_reported_update_status(downgrade_version) + # Verify upgrade scenario log.info("*******Verifying the Agent Upgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) upgrade_version: str = "1.3.1.0" log.info("Attempting upgrade version %s", upgrade_version) self._request_rsm_update(upgrade_version) self._check_rsm_gs(upgrade_version) self._verify_guest_agent_update(upgrade_version) + self._verify_agent_reported_update_status(upgrade_version) # verify no version update. There is bug in CRP and will enable once it's fixed log.info("*******Verifying the no version update scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm is \n%s", stdout) + log.info("Current agent version running on the vm before update is \n%s", stdout) version: str = "1.3.1.0" log.info("Attempting update version same as current version %s", upgrade_version) self._request_rsm_update(version) self._check_rsm_gs(version) self._verify_guest_agent_update(version) - - # verify requested version below daemon version - log.info("*******Verifying requested version below daemon version scenario*******") - stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info("Current agent version running on the vm before update \n%s", stdout) - version: str = "0.5.0" - log.info("Attempting requested version %s", version) - self._request_rsm_update(version) - self._check_rsm_gs(version) - self._verify_no_guest_agent_update(stdout) + self._verify_agent_reported_update_status(version) + + # disabled until the new daemon version logic is released in test versions + # # verify requested version below daemon version + # log.info("*******Verifying requested version below daemon version scenario*******") + # # changing back to 1.3.1.0 from 1.0.0.0 as there is no pkg below than 1.0.0.0 available in PIR, Otherwise we will get pkg not found error + # self._prepare_agent("1.3.0.1", update_config=False) + # stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + # log.info("Current agent version running on the vm before update \n%s", stdout) + # version: str = "1.3.0.0" + # log.info("Attempting requested version %s", version) + # self._request_rsm_update(version) + # self._check_rsm_gs(version) + # self._verify_no_guest_agent_update(stdout) + # self._verify_agent_reported_update_status(version) def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request - output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True) + output = self._ssh_client.run_command(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) log.info('Verifying requested version GS available to the agent \n%s', output) - def _prepare_agent(self) -> None: + def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: """ This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - output = self._ssh_client.run_command("modify-agent-version-config", use_sudo=True) - log.info('Updating agent update required config \n%s', output) + log.info('Modifying agent installed version') + output = self._ssh_client.run_command(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) + log.info('Updated agent installed version \n%s', output) + if update_config: + log.info('Modifying agent update config flags') + output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('updated agent update required config \n%s', output) @staticmethod def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: @@ -209,9 +221,18 @@ def _verify_agent_reported_supported_feature_flag(self): """ log.info("Verifying agent reported supported feature flag") - self._ssh_client.run_command("verify_agent_supported_feature.py", use_sudo=True) + self._ssh_client.run_command("agent_update-verify_versioning_supported_feature.py", use_sudo=True) log.info("Agent reported VersioningGovernance supported feature flag") + def _verify_agent_reported_update_status(self, version: str): + """ + Verify if the agent reported update status to CRP after update performed + """ + + log.info("Verifying agent reported update status for version {0}".format(version)) + self._ssh_client.run_command(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + log.info("Successfully Agent reported update status for version {0}".format(version)) + if __name__ == "__main__": RsmUpdateBvt.run_from_command_line() diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 1ec9e58da3..66cafcfc1c 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -44,7 +44,7 @@ def run(self): # Disable extension processing on the test VM log.info("Disabling extension processing on the test VM [%s]", self._context.vm.name) - output = ssh_client.run_command("update-waagent-conf Extensions.Enabled n", use_sudo=True) + output = ssh_client.run_command("update-waagent-conf Extensions.Enabled=n", use_sudo=True) log.info("Disable completed:\n%s", output) # From now on, extensions will time out; set the timeout to the minimum allowed(15 minutes) diff --git a/tests_e2e/tests/scripts/agent_publish-check_update.py b/tests_e2e/tests/scripts/agent_publish-check_update.py new file mode 100755 index 0000000000..9f8f66c4f2 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_publish-check_update.py @@ -0,0 +1,112 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +import sys +import logging + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.retry import retry_if_false + + +# pylint: disable=W0105 +""" +Post the _LOG_PATTERN_00 changes, the last group sometimes might not have the 'Agent' part at the start of the sentence; thus making it optional. + +> WALinuxAgent-2.2.18 discovered WALinuxAgent-2.2.47 as an update and will exit +(None, 'WALinuxAgent-2.2.18', '2.2.47') +""" +_UPDATE_PATTERN_00 = re.compile(r'(.*Agent\s)?(\S*)\sdiscovered\sWALinuxAgent-(\S*)\sas an update and will exit') + +""" +> Agent WALinuxAgent-2.2.45 discovered update WALinuxAgent-2.2.47 -- exiting +('Agent', 'WALinuxAgent-2.2.45', '2.2.47') +""" +_UPDATE_PATTERN_01 = re.compile(r'(.*Agent)?\s(\S*) discovered update WALinuxAgent-(\S*) -- exiting') + +""" +> Normal Agent upgrade discovered, updating to WALinuxAgent-2.9.1.0 -- exiting +('Normal Agent', WALinuxAgent, '2.9.1.0 ') +""" +_UPDATE_PATTERN_02 = re.compile(r'(.*Agent) upgrade discovered, updating to (WALinuxAgent)-(\S*) -- exiting') + +""" +> Agent update found, exiting current process to downgrade to the new Agent version 1.3.0.0 +(Agent, 'downgrade', '1.3.0.0') +""" +_UPDATE_PATTERN_03 = re.compile(r'(.*Agent) update found, exiting current process to (\S*) to the new Agent version (\S*)') + +""" +> Agent WALinuxAgent-2.2.47 is running as the goal state agent +('2.2.47',) +""" +_RUNNING_PATTERN_00 = re.compile(r'.*Agent\sWALinuxAgent-(\S*)\sis running as the goal state agent') + + +def verify_agent_update_from_log(): + + exit_code = 0 + detected_update = False + update_successful = False + update_version = '' + + log = AgentLog() + + for record in log.read(): + if 'TelemetryData' in record.text: + continue + + for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03]: + update_match = re.match(p, record.text) + if update_match: + detected_update = True + update_version = update_match.groups()[2] + logging.info('found the agent update log: %s', record.text) + break + + if detected_update: + running_match = re.match(_RUNNING_PATTERN_00, record.text) + if running_match and update_version == running_match.groups()[0]: + update_successful = True + logging.info('found the agent started new version log: %s', record.text) + + if detected_update: + logging.info('update was detected: %s', update_version) + if update_successful: + logging.info('update was successful') + else: + logging.warning('update was not successful') + exit_code = 1 + else: + logging.warning('update was not detected') + exit_code = 1 + + return exit_code == 0 + + +# This method will trace agent update messages in the agent log and determine if the update was successful or not. +try: + logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG, stream=sys.stdout) + found: bool = retry_if_false(verify_agent_update_from_log) + if not found: + raise Exception('update was not found in the logs') +except Exception as e: + logging.error(e) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/modify-agent-version-config b/tests_e2e/tests/scripts/agent_update-modify_agent_version similarity index 75% rename from tests_e2e/tests/scripts/modify-agent-version-config rename to tests_e2e/tests/scripts/agent_update-modify_agent_version index f121e6f4b0..c8011e0094 100755 --- a/tests_e2e/tests/scripts/modify-agent-version-config +++ b/tests_e2e/tests/scripts/agent_update-modify_agent_version @@ -20,14 +20,18 @@ # set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: agent_update-modify_agent_version " + exit 1 +fi + +version=$1 PYTHON=$(get-agent-python) echo "Agent's Python: $PYTHON" # some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') version_file_full_path="$version_file_dir/version.py" -sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '1.0.0.0'/" $version_file_full_path -waagent_conf_path=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') -sed -i 's/GAUpdates.Enabled=n/GAUpdates.Enabled=y/g' "$waagent_conf_path" -sed -i '$a AutoUpdate.GAFamily=Test' "$waagent_conf_path" +sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '$version'/" $version_file_full_path echo "Restarting service..." agent-service restart \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py new file mode 100755 index 0000000000..8e8d50a482 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py @@ -0,0 +1,59 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Verify if the agent reported update status to CRP via status file +# +import argparse +import glob +import json +import logging +import sys + +from tests_e2e.tests.lib.retry import retry_if_false + + +def check_agent_reported_update_status(expected_version: str) -> bool: + agent_status_file = "/var/lib/waagent/history/*/waagent_status.json" + file_paths = glob.glob(agent_status_file, recursive=True) + for file in file_paths: + with open(file, 'r') as f: + data = json.load(f) + logging.info("Agent status file is %s and it's content %s", file, data) + status = data["__status__"] + guest_agent_status = status["aggregateStatus"]["guestAgentStatus"] + if "updateStatus" in guest_agent_status.keys(): + if guest_agent_status["updateStatus"]["expectedVersion"] == expected_version: + return True + return False + + +try: + + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--version', required=True) + args = parser.parse_args() + + found: bool = retry_if_false(lambda: check_agent_reported_update_status(args.version)) + if not found: + raise Exception("Agent failed to report update status, so skipping rest of the agent update validations") + +except Exception as e: + print(f"{e}", file=sys.stderr) + sys.exit(1) + +sys.exit(0) diff --git a/tests_e2e/tests/scripts/verify_agent_supported_feature.py b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py similarity index 100% rename from tests_e2e/tests/scripts/verify_agent_supported_feature.py rename to tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py diff --git a/tests_e2e/tests/scripts/wait_for_rsm_goal_state.py b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py similarity index 100% rename from tests_e2e/tests/scripts/wait_for_rsm_goal_state.py rename to tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py From 6a32b50f469e484dc405829ed16f98dee774b9fa Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 30 Jun 2023 12:56:18 -0700 Subject: [PATCH 039/240] add time window for agent manifest download (#2860) * add time window for agent manifest download * address comments * address comments --- azurelinuxagent/common/conf.py | 17 +++-- azurelinuxagent/ga/agent_update_handler.py | 23 +++++- tests/ga/test_agent_update_handler.py | 76 ++++++++++---------- tests/test_agent.py | 4 +- tests_e2e/orchestrator/scripts/install-agent | 2 +- 5 files changed, 72 insertions(+), 50 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index cb929e433a..0e0eb7f18e 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -129,7 +129,6 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "ResourceDisk.EnableSwapEncryption": False, "AutoUpdate.Enabled": True, "EnableOverProvisioning": True, - "GAUpdates.Enabled": True, # # "Debug" options are experimental and may be removed in later # versions of the Agent. @@ -137,9 +136,10 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.CgroupLogMetrics": False, "Debug.CgroupDisableOnProcessCheckFailure": True, "Debug.CgroupDisableOnQuotaCheckFailure": True, + "Debug.DownloadNewAgents": True, "Debug.EnableAgentMemoryUsageCheck": False, "Debug.EnableFastTrack": True, - "Debug.EnableGAVersioning": False + "Debug.EnableGAVersioning": True } @@ -503,12 +503,15 @@ def get_monitor_network_configuration_changes(conf=__conf__): return conf.get_switch("Monitor.NetworkConfigurationChanges", False) -def get_ga_updates_enabled(conf=__conf__): +def get_download_new_agents(conf=__conf__): """ - If True, the agent go through update logic to look for new agents otherwise it will stop agent updates. - NOTE: This option is needed in e2e tests to control agent updates. + If True, the agent go through update logic to look for new agents to download otherwise it will stop agent updates. + NOTE: AutoUpdate.Enabled controls whether the Agent downloads new update and also whether any downloaded updates are started or not, while DownloadNewAgents controls only the former. + AutoUpdate.Enabled == false -> Agent preinstalled on the image will process extensions and will not update (regardless of DownloadNewAgents flag) + AutoUpdate.Enabled == true and DownloadNewAgents == true, any update already downloaded will be started, and agent look for future updates + AutoUpdate.Enabled == true and DownloadNewAgents == false, any update already downloaded will be started, but the agent will not look for future updates """ - return conf.get_switch("GAUpdates.Enabled", True) + return conf.get_switch("Debug.DownloadNewAgents", True) def get_cgroup_check_period(conf=__conf__): @@ -637,7 +640,7 @@ def get_normal_upgrade_frequency(conf=__conf__): def get_enable_ga_versioning(conf=__conf__): """ - If True, the agent uses GA Versioning for auto-updating the agent vs automatically auto-updating to the highest version. + If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR. NOTE: This option is experimental and may be removed in later versions of the Agent. """ return conf.get_switch("Debug.EnableGAVersioning", True) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 8de6cfd81e..47a8fa27dd 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -37,6 +37,7 @@ def __init__(self): self.last_attempted_requested_version_update_time = datetime.datetime.min self.last_attempted_hotfix_update_time = datetime.datetime.min self.last_attempted_normal_update_time = datetime.datetime.min + self.last_attempted_manifest_download_time = datetime.datetime.min class AgentUpdateHandler(object): @@ -86,6 +87,23 @@ def __update_last_attempt_update_times(self): else: self.update_state.last_attempted_normal_update_time = now self.update_state.last_attempted_hotfix_update_time = now + self.update_state.last_attempted_manifest_download_time = now + + def __should_agent_attempt_manifest_download(self): + """ + The agent should attempt to download the manifest if + the agent has not attempted to download the manifest in the last 1 hour + """ + now = datetime.datetime.now() + + if self.update_state.last_attempted_manifest_download_time != datetime.datetime.min: + next_attempt_time = self.update_state.last_attempted_manifest_download_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + return True @staticmethod def __get_agent_upgrade_type(requested_version): @@ -256,7 +274,7 @@ def __log_event(level, msg, success=True): def run(self, goal_state): try: # Ignore new agents if update is disabled. The latter flag only used in e2e tests. - if not self._autoupdate_enabled or not conf.get_ga_updates_enabled(): + if not self._autoupdate_enabled or not conf.get_download_new_agents(): return self._gs_id = goal_state.extensions_goal_state.id @@ -265,6 +283,9 @@ def run(self, goal_state): agent_manifest = None # This is to make sure fetch agent manifest once per update warn_msg = "" if requested_version is None: + # Do not proceed with update if self-update needs to download the manifest again with in an hour + if not self.__should_agent_attempt_manifest_download(): + return if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) GAUpdateReportState.report_error_msg = warn_msg diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 49b0dc7628..62dfd6488d 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -28,7 +28,7 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) @contextlib.contextmanager - def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True): + def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False): # Default to DATA_FILE of test_data parameter raises the pylint warning # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) test_data = DATA_FILE if test_data is None else test_data @@ -37,9 +37,12 @@ def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, def get_handler(url, **kwargs): if HttpRequestPredicates.is_agent_package_request(url): - agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) - protocol.mock_wire_data.call_counts['agentArtifact'] += 1 - return MockHttpResponse(status=httpclient.OK, body=agent_pkg) + if not protocol_get_error: + agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) + return MockHttpResponse(status=httpclient.OK, body=agent_pkg) + else: + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) def put_handler(url, *args, **_): @@ -59,6 +62,7 @@ def put_handler(url, *args, **_): agent_update_handler._protocol = protocol yield agent_update_handler, mock_telemetry + def __assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) @@ -174,6 +178,33 @@ def test_it_should_update_to_largest_version_if_requested_version_not_available( self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + def test_it_should_not_download_manifest_again_if_last_attempted_download_time_not_elapsed(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10, protocol_get_error=True) as (agent_update_handler, _): + # making multiple agent update attempts + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + mock_wire_data = agent_update_handler._protocol.mock_wire_data + self.assertEqual(1, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should not be downloaded again") + + def test_it_should_download_manifest_if_last_attempted_download_time_is_elapsed(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + + with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=0.00001, protocol_get_error=True) as (agent_update_handler, _): + # making multiple agent update attempts + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + mock_wire_data = agent_update_handler._protocol.mock_wire_data + self.assertEqual(3, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should be downloaded in all attempts") + def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" @@ -313,24 +344,7 @@ def test_it_should_report_update_status_with_error_on_download_fail(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - @contextlib.contextmanager - def mock_agent_update_handler(test_data): - with mock_wire_protocol(test_data) as protocol: - - def get_handler(url, **kwargs): - if HttpRequestPredicates.is_agent_package_request(url): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return protocol.mock_wire_data.mock_http_get(url, **kwargs) - - protocol.set_http_handlers(http_get_handler=get_handler) - - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): - with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - agent_update_handler_local = get_agent_update_handler(protocol) - yield agent_update_handler_local - - with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): GAUpdateReportState.report_error_msg = "" agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() @@ -343,23 +357,7 @@ def test_it_should_report_update_status_with_missing_requested_version_error(sel data_file = DATA_FILE.copy() data_file['ext_conf'] = "wire/ext_conf.xml" - @contextlib.contextmanager - def mock_agent_update_handler(test_data): - with mock_wire_protocol(test_data) as protocol: - def get_handler(url, **kwargs): - if HttpRequestPredicates.is_agent_package_request(url): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return protocol.mock_wire_data.mock_http_get(url, **kwargs) - - protocol.set_http_handlers(http_get_handler=get_handler) - - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): - with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - agent_update_handler_local = get_agent_update_handler(protocol) - yield agent_update_handler_local - - with mock_agent_update_handler(test_data=data_file) as (agent_update_handler): + with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): GAUpdateReportState.report_error_msg = "" agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() diff --git a/tests/test_agent.py b/tests/test_agent.py index f5e91405a6..abbc090ba8 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -40,9 +40,10 @@ Debug.CgroupLogMetrics = False Debug.CgroupMonitorExpiryTime = 2022-03-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent +Debug.DownloadNewAgents = True Debug.EnableAgentMemoryUsageCheck = False Debug.EnableFastTrack = True -Debug.EnableGAVersioning = False +Debug.EnableGAVersioning = True Debug.EtpCollectionPeriod = 300 Debug.FirewallRulesLogPeriod = 86400 DetectScvmmEnv = False @@ -51,7 +52,6 @@ Extensions.Enabled = True Extensions.GoalStatePeriod = 6 Extensions.InitialGoalStatePeriod = 6 -GAUpdates.Enabled = True HttpProxy.Host = None HttpProxy.Port = None Lib.Dir = /var/lib/waagent diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index b494ac8e28..00200f3a11 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -115,7 +115,7 @@ echo "Agent's conf path: $waagent_conf_path" sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" # By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. # But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. -sed -i '$a GAUpdates.Enabled=n' "$waagent_conf_path" +sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path" # # Restart the service From 0e1b9f519584c71f3f7286dbf79cd18822520d49 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:02:34 -0700 Subject: [PATCH 040/240] ignore 75-persistent-net-generator.rules in e2e tests (#2862) * ignore 75-persistent-net-generator.rules in e2e tests * address comment * remove --- tests_e2e/tests/lib/agent_log.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 6094e033e7..c9ca258472 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -92,11 +92,6 @@ def get_errors(self) -> List[AgentLogRecord]: # # NOTE: This list was taken from the older agent tests and needs to be cleaned up. Feel free to un-comment rules as new tests are added. # - # # This warning is expected on SUSE 12 - # { - # 'message': r"WARNING EnvHandler ExtHandler Move rules file 75-persistent-net-generator.rules to /var/lib/waagent/75-persistent-net-generator.rules", - # 'if': lambda _: re.match(r"((sles15\.2)|suse12)\D*", DISTRO_NAME, flags=re.IGNORECASE) is not None - # }, # # The following message is expected to log an error if systemd is not enabled on it # { # 'message': r"Did not detect Systemd, unable to set wa(|linux)agent-network-setup.service", @@ -139,10 +134,11 @@ def get_errors(self) -> List[AgentLogRecord]: # and r.prefix == "Daemon" # }, # + # 2023-06-28T09:31:38.903835Z WARNING EnvHandler ExtHandler Move rules file 75-persistent-net-generator.rules to /var/lib/waagent/75-persistent-net-generator.rules # The environment thread performs this operation periodically # { - 'message': r"Move rules file 70-persistent-net.rules to /var/lib/waagent/70-persistent-net.rules", + 'message': r"Move rules file (70|75)-persistent.*.rules to /var/lib/waagent/(70|75)-persistent.*.rules", 'if': lambda r: r.level == "WARNING" }, # From a168e2580a65b24c35d9c3716471a6ed8fa84062 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 5 Jul 2023 12:52:58 -0700 Subject: [PATCH 041/240] Always publish artifacts and test results (#2865) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 2cf0979575..59420dd88d 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -132,9 +132,11 @@ jobs: - publish: $(Build.ArtifactStagingDirectory) artifact: 'artifacts' displayName: 'Publish test artifacts' + condition: always() - task: PublishTestResults@2 displayName: 'Publish test results' + condition: always() inputs: testResultsFormat: 'JUnit' testResultsFiles: 'runbook_logs/agent.junit.xml' From 307e880a115e38d707cf0afc6d2adac828d8f0b3 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:59:37 -0700 Subject: [PATCH 042/240] Add tests for extension workflow (#2843) * Update version to dummy 1.0.0.0' * Revert version change * Basic structure * Test must run in SCUS for test ext * Add GuestAgentDCRTest Extension id * Test stucture * Update test file name * test no location * Test location as southcentralus * Assert ext is installed * Try changing version for dcr test ext * Update expected message in instance view * try changing message to string * Limit images for ext workflow * Update classes after refactor * Update class name * Refactor tests * Rename extension_install to extension_workflow * Assert ext status * Assert operation sequence is expected * Remove logger reference * Pass ssh client * Update ssh * Add permission to run script * Correct permissions * Add execute permissions for helper script * Make scripts executable * Change args to string * Add required parameter * Add shebang for retart_agent * Fix arg format * Use restart utility * Run restart with sudo * Add enable scenario * Attempt to remove start_time * Only assert enable * Add delete scenario * Fix uninstall scenario * Add extension update scenario * Run assert scenario on update scenario * Fix reference to ext * Format args as str instead of arr * Update test args * Add test case for update without install * Fix delete * Keep changes * Save changes * Add special chars test case * Fix dcr_ext issue{ * Add validate no lag scenario * Fix testguid reference * Add additional log statements for debugging * Fix message to check before encoding * Encode setting name * Correctly check data * Make check data executable * Fix command args for special char test * Fix no lag time * Fix ssh client reference * Try message instead of text * Remove unused method * Start clean up * Continue code cleanup * Fix pylint errors * Fix pylint errors * Start refactor * Debug agent lag * Update lag logging * Fix assert_that for lag * Remove typo * Add readme for extension_workflow scenario * Reformat comment * Improve logging * Refactor assert scenario * Remove unused constants * Remove unusued parameter in assert scenario * Add logging * Improve logging * Improve logging * Fix soft assertions issue * Remove todo for delete polling * Remove unnecessary new line * removed unnecessary function * Make special chars log more readable * remove unnecessary log * Add version to add or update log * Remove unnecessary assert instance view * Add empty log line * Add update back to restart args to debug * Add update back to restart args to debug * Remove unused init * Remove test_suites from pipeline yml * Update location in test suite yml * Add comment for location restriction * Remove unused init and fix comments * Improve method header * Rename scripts * Remove print_function * Rename is_data_in_waagent_log * Add comments describing assert operation sequence script * add comments to scripts and type annotate assert operation sequence * Add GuestAgentDcrExtension source code to repo * Fix typing.dict error * Fix typing issue * Remove outdated comment * Add comments to extension_workflow.py * rename scripts to match test suite name * Ignore pylint warnings on test ext * Update pylint rc to ignore tests_e2e/GuestAgentDcrTestExtension * Update pylint rc to ignore tests_e2e/GuestAgentDcrTestExtension * disable all errors/warnings dcr test ext * disable all errors/warnings dcr test ext * Run workflow on debian * Revert to dcr config distros * Move enable increment to beginning of function * Fix gs completed regex * Remove unnessary files from dcr test ext dir --- .../GuestAgentDcrTest.py | 123 +++++ .../HandlerManifest.json | 14 + tests_e2e/GuestAgentDcrTestExtension/Makefile | 8 + .../Utils/HandlerUtil.py | 387 +++++++++++++++ .../Utils/LogUtil.py | 50 ++ .../Utils/ScriptUtil.py | 140 ++++++ .../Utils/WAAgentUtil.py | 140 ++++++ .../Utils/test/MockUtil.py | 44 ++ .../Utils/test/env.py | 24 + .../Utils/test/mock.sh | 23 + .../Utils/test/test_logutil.py | 35 ++ .../test/test_null_protected_settings.py | 48 ++ .../Utils/test/test_redacted_settings.py | 47 ++ .../Utils/test/test_scriptutil.py | 55 +++ .../GuestAgentDcrTestExtension/manifest.xml | 17 + .../GuestAgentDcrTestExtension/references | 2 + tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/agent_ext_workflow.yml | 11 + tests_e2e/tests/agent_ext_workflow/README.md | 45 ++ .../agent_ext_workflow/extension_workflow.py | 447 ++++++++++++++++++ tests_e2e/tests/lib/agent_log.py | 11 + tests_e2e/tests/lib/identifiers.py | 1 + ..._ext_workflow-assert_operation_sequence.py | 183 +++++++ ...nt_ext_workflow-check_data_in_agent_log.py | 49 ++ ...g_between_agent_start_and_gs_processing.py | 96 ++++ 25 files changed, 2001 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/GuestAgentDcrTestExtension/GuestAgentDcrTest.py create mode 100644 tests_e2e/GuestAgentDcrTestExtension/HandlerManifest.json create mode 100644 tests_e2e/GuestAgentDcrTestExtension/Makefile create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/HandlerUtil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/LogUtil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/ScriptUtil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/WAAgentUtil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/MockUtil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/env.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/mock.sh create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_logutil.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_null_protected_settings.py create mode 100644 tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_redacted_settings.py create mode 100755 tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_scriptutil.py create mode 100644 tests_e2e/GuestAgentDcrTestExtension/manifest.xml create mode 100644 tests_e2e/GuestAgentDcrTestExtension/references create mode 100644 tests_e2e/test_suites/agent_ext_workflow.yml create mode 100644 tests_e2e/tests/agent_ext_workflow/README.md create mode 100644 tests_e2e/tests/agent_ext_workflow/extension_workflow.py create mode 100755 tests_e2e/tests/scripts/agent_ext_workflow-assert_operation_sequence.py create mode 100755 tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py create mode 100755 tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py diff --git a/tests_e2e/GuestAgentDcrTestExtension/GuestAgentDcrTest.py b/tests_e2e/GuestAgentDcrTestExtension/GuestAgentDcrTest.py new file mode 100644 index 0000000000..df6c1b5179 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/GuestAgentDcrTest.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# pylint: disable=all +from __future__ import print_function + +from Utils.WAAgentUtil import waagent +import Utils.HandlerUtil as Util +import sys +import re +import traceback +import os +import datetime + +ExtensionShortName = "GADcrTestExt" +OperationFileName = "operations-{0}.log" + + +def install(): + operation = "install" + status = "success" + msg = "Installed successfully" + + hutil = parse_context(operation) + hutil.log("Start to install.") + hutil.log(msg) + hutil.do_exit(0, operation, status, '0', msg) + + +def enable(): + # Global Variables definition + operation = "enable" + status = "success" + msg = "Enabled successfully." + + # Operations.append(operation) + hutil = parse_context(operation) + hutil.log("Start to enable.") + public_settings = hutil.get_public_settings() + name = public_settings.get("name") + if name: + name = "Name: {0}".format(name) + hutil.log(name) + msg = "{0} {1}".format(msg, name) + print(name) + else: + hutil.error("The name in public settings is not provided.") + # msg = msg % ','.join(Operations) + hutil.log(msg) + hutil.do_exit(0, operation, status, '0', msg) + + +def disable(): + operation = "disable" + status = "success" + msg = "Disabled successfully." + + # Operations.append(operation) + hutil = parse_context(operation) + hutil.log("Start to disable.") + # msg % ','.join(Operations) + hutil.log(msg) + hutil.do_exit(0, operation, status, '0', msg) + + +def uninstall(): + operation = "uninstall" + status = "success" + msg = "Uninstalled successfully." + + # Operations.append(operation) + hutil = parse_context(operation) + hutil.log("Start to uninstall.") + # msg % ','.join(Operations) + hutil.log(msg) + hutil.do_exit(0, operation, status, '0', msg) + + +def update(): + operation = "update" + status = "success" + msg = "Updated successfully." + + # Operations.append(operation) + hutil = parse_context(operation) + hutil.log("Start to update.") + # msg % ','.join(Operations) + hutil.log(msg) + hutil.do_exit(0, operation, status, '0', msg) + + +def parse_context(operation): + hutil = Util.HandlerUtility(waagent.Log, waagent.Error) + hutil.do_parse_context(operation) + op_log = os.path.join(hutil.get_log_dir(), OperationFileName.format(hutil.get_extension_version())) + with open(op_log, 'a+') as oplog_handler: + oplog_handler.write("Date:{0}; Operation:{1}; SeqNo:{2}\n" + .format(datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), + operation, hutil.get_seq_no())) + return hutil + + +def main(): + waagent.LoggerInit('/var/log/waagent.log', '/dev/stdout') + waagent.Log("%s started to handle." % (ExtensionShortName)) + + try: + for a in sys.argv[1:]: + if re.match("^([-/]*)(disable)", a): + disable() + elif re.match("^([-/]*)(uninstall)", a): + uninstall() + elif re.match("^([-/]*)(install)", a): + install() + elif re.match("^([-/]*)(enable)", a): + enable() + elif re.match("^([-/]*)(update)", a): + update() + except Exception as e: + err_msg = "Failed with error: {0}, {1}".format(e, traceback.format_exc()) + waagent.Error(err_msg) + + +if __name__ == '__main__': + main() diff --git a/tests_e2e/GuestAgentDcrTestExtension/HandlerManifest.json b/tests_e2e/GuestAgentDcrTestExtension/HandlerManifest.json new file mode 100644 index 0000000000..398aab8648 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/HandlerManifest.json @@ -0,0 +1,14 @@ +[{ + "name": "GuestAgentDcrTestExtension", + "version": 1.0, + "handlerManifest": { + "installCommand": "./GuestAgentDcrTest.py --install", + "uninstallCommand": "./GuestAgentDcrTest.py --uninstall", + "updateCommand": "./GuestAgentDcrTest.py --update", + "enableCommand": "./GuestAgentDcrTest.py --enable", + "disableCommand": "./GuestAgentDcrTest.py --disable", + "updateMode": "UpdateWithoutInstall", + "rebootAfterInstall": false, + "reportHeartbeat": false + } +}] diff --git a/tests_e2e/GuestAgentDcrTestExtension/Makefile b/tests_e2e/GuestAgentDcrTestExtension/Makefile new file mode 100644 index 0000000000..d766ef63ae --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Makefile @@ -0,0 +1,8 @@ +default: build + +build: + $(eval NAME = $(shell grep -Pom1 "(?<=)[^<]+" manifest.xml)) + $(eval VERSION = $(shell grep -Pom1 "(?<=)[^<]+" manifest.xml)) + + @echo "Building '$(NAME)-$(VERSION).zip' ..." + zip -r9 $(NAME)-$(VERSION).zip * diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/HandlerUtil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/HandlerUtil.py new file mode 100755 index 0000000000..56343f2e55 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/HandlerUtil.py @@ -0,0 +1,387 @@ +# +# Handler library for Linux IaaS +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +""" +JSON def: +HandlerEnvironment.json +[{ + "name": "ExampleHandlerLinux", + "seqNo": "seqNo", + "version": "1.0", + "handlerEnvironment": { + "logFolder": "", + "configFolder": "", + "statusFolder": "", + "heartbeatFile": "", + + } +}] + +Example ./config/1.settings +"{"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"1BE9A13AA1321C7C515EF109746998BAB6D86FD1","protectedSettings": +"MIIByAYJKoZIhvcNAQcDoIIBuTCCAbUCAQAxggFxMIIBbQIBADBVMEExPzA9BgoJkiaJk/IsZAEZFi9XaW5kb3dzIEF6dXJlIFNlcnZpY2UgTWFuYWdlbWVudCBmb3IgR+nhc6VHQTQpCiiV2zANBgkqhkiG9w0BAQEFAASCAQCKr09QKMGhwYe+O4/a8td+vpB4eTR+BQso84cV5KCAnD6iUIMcSYTrn9aveY6v6ykRLEw8GRKfri2d6tvVDggUrBqDwIgzejGTlCstcMJItWa8Je8gHZVSDfoN80AEOTws9Fp+wNXAbSuMJNb8EnpkpvigAWU2v6pGLEFvSKC0MCjDTkjpjqciGMcbe/r85RG3Zo21HLl0xNOpjDs/qqikc/ri43Y76E/Xv1vBSHEGMFprPy/Hwo3PqZCnulcbVzNnaXN3qi/kxV897xGMPPC3IrO7Nc++AT9qRLFI0841JLcLTlnoVG1okPzK9w6ttksDQmKBSHt3mfYV+skqs+EOMDsGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQITgu0Nu3iFPuAGD6/QzKdtrnCI5425fIUy7LtpXJGmpWDUA==","publicSettings":{"port":"3000"}}}]}" + + +Example HeartBeat +{ +"version": 1.0, + "heartbeat" : { + "status": "ready", + "code": 0, + "Message": "Sample Handler running. Waiting for a new configuration from user." + } +} +Example Status Report: +[{"version":"1.0","timestampUTC":"2014-05-29T04:20:13Z","status":{"name":"Chef Extension Handler","operation":"chef-client-run","status":"success","code":0,"formattedMessage":{"lang":"en-US","message":"Chef-client run success"}}}] + +""" + +import os +import os.path +import sys +import imp +import base64 +import json +import time +import re + +from xml.etree import ElementTree +from os.path import join +from Utils.WAAgentUtil import waagent +from waagent import LoggerInit + +DateTimeFormat = "%Y-%m-%dT%H:%M:%SZ" + +MANIFEST_XML = "manifest.xml" + + +class HandlerContext: + def __init__(self, name): + self._name = name + self._version = '0.0' + self._config_dir = None + self._log_dir = None + self._log_file = None + self._status_dir = None + self._heartbeat_file = None + self._seq_no = -1 + self._status_file = None + self._settings_file = None + self._config = None + return + + +class HandlerUtility: + def __init__(self, log, error, s_name=None, l_name=None, extension_version=None, logFileName='extension.log', + console_logger=None, file_logger=None): + self._log = log + self._log_to_con = console_logger + self._log_to_file = file_logger + self._error = error + self._logFileName = logFileName + if s_name is None or l_name is None or extension_version is None: + (l_name, s_name, extension_version) = self._get_extension_info() + + self._short_name = s_name + self._extension_version = extension_version + self._log_prefix = '[%s-%s] ' % (l_name, extension_version) + + def get_extension_version(self): + return self._extension_version + + def _get_log_prefix(self): + return self._log_prefix + + def _get_extension_info(self): + if os.path.isfile(MANIFEST_XML): + return self._get_extension_info_manifest() + + ext_dir = os.path.basename(os.getcwd()) + (long_name, version) = ext_dir.split('-') + short_name = long_name.split('.')[-1] + + return long_name, short_name, version + + def _get_extension_info_manifest(self): + with open(MANIFEST_XML) as fh: + doc = ElementTree.parse(fh) + namespace = doc.find('{http://schemas.microsoft.com/windowsazure}ProviderNameSpace').text + short_name = doc.find('{http://schemas.microsoft.com/windowsazure}Type').text + version = doc.find('{http://schemas.microsoft.com/windowsazure}Version').text + + long_name = "%s.%s" % (namespace, short_name) + return (long_name, short_name, version) + + def _get_current_seq_no(self, config_folder): + seq_no = -1 + cur_seq_no = -1 + freshest_time = None + for subdir, dirs, files in os.walk(config_folder): + for file in files: + try: + cur_seq_no = int(os.path.basename(file).split('.')[0]) + if (freshest_time == None): + freshest_time = os.path.getmtime(join(config_folder, file)) + seq_no = cur_seq_no + else: + current_file_m_time = os.path.getmtime(join(config_folder, file)) + if (current_file_m_time > freshest_time): + freshest_time = current_file_m_time + seq_no = cur_seq_no + except ValueError: + continue + return seq_no + + def log(self, message): + self._log(self._get_log_prefix() + message) + + def log_to_console(self, message): + if self._log_to_con is not None: + self._log_to_con(self._get_log_prefix() + message) + else: + self.error("Unable to log to console, console log method not set") + + def log_to_file(self, message): + if self._log_to_file is not None: + self._log_to_file(self._get_log_prefix() + message) + else: + self.error("Unable to log to file, file log method not set") + + def error(self, message): + self._error(self._get_log_prefix() + message) + + @staticmethod + def redact_protected_settings(content): + redacted_tmp = re.sub('"protectedSettings":\s*"[^"]+=="', '"protectedSettings": "*** REDACTED ***"', content) + redacted = re.sub('"protectedSettingsCertThumbprint":\s*"[^"]+"', '"protectedSettingsCertThumbprint": "*** REDACTED ***"', redacted_tmp) + return redacted + + def _parse_config(self, ctxt): + config = None + try: + config = json.loads(ctxt) + except: + self.error('JSON exception decoding ' + HandlerUtility.redact_protected_settings(ctxt)) + + if config is None: + self.error("JSON error processing settings file:" + HandlerUtility.redact_protected_settings(ctxt)) + else: + handlerSettings = config['runtimeSettings'][0]['handlerSettings'] + if 'protectedSettings' in handlerSettings and \ + 'protectedSettingsCertThumbprint' in handlerSettings and \ + handlerSettings['protectedSettings'] is not None and \ + handlerSettings["protectedSettingsCertThumbprint"] is not None: + protectedSettings = handlerSettings['protectedSettings'] + thumb = handlerSettings['protectedSettingsCertThumbprint'] + cert = waagent.LibDir + '/' + thumb + '.crt' + pkey = waagent.LibDir + '/' + thumb + '.prv' + unencodedSettings = base64.standard_b64decode(protectedSettings) + openSSLcmd = "openssl smime -inform DER -decrypt -recip {0} -inkey {1}" + cleartxt = waagent.RunSendStdin(openSSLcmd.format(cert, pkey), unencodedSettings)[1] + if cleartxt is None: + self.error("OpenSSL decode error using thumbprint " + thumb) + self.do_exit(1, "Enable", 'error', '1', 'Failed to decrypt protectedSettings') + jctxt = '' + try: + jctxt = json.loads(cleartxt) + except: + self.error('JSON exception decoding ' + HandlerUtility.redact_protected_settings(cleartxt)) + handlerSettings['protectedSettings']=jctxt + self.log('Config decoded correctly.') + return config + + def do_parse_context(self, operation): + _context = self.try_parse_context() + if not _context: + self.do_exit(1, operation, 'error', '1', operation + ' Failed') + return _context + + def try_parse_context(self): + self._context = HandlerContext(self._short_name) + handler_env = None + config = None + ctxt = None + code = 0 + # get the HandlerEnvironment.json. According to the extension handler spec, it is always in the ./ directory + self.log('cwd is ' + os.path.realpath(os.path.curdir)) + handler_env_file = './HandlerEnvironment.json' + if not os.path.isfile(handler_env_file): + self.error("Unable to locate " + handler_env_file) + return None + ctxt = waagent.GetFileContents(handler_env_file) + if ctxt == None: + self.error("Unable to read " + handler_env_file) + try: + handler_env = json.loads(ctxt) + except: + pass + if handler_env == None: + self.log("JSON error processing " + handler_env_file) + return None + if type(handler_env) == list: + handler_env = handler_env[0] + + self._context._name = handler_env['name'] + self._context._version = str(handler_env['version']) + self._context._config_dir = handler_env['handlerEnvironment']['configFolder'] + self._context._log_dir = handler_env['handlerEnvironment']['logFolder'] + + self._context._log_file = os.path.join(handler_env['handlerEnvironment']['logFolder'], self._logFileName) + self._change_log_file() + self._context._status_dir = handler_env['handlerEnvironment']['statusFolder'] + self._context._heartbeat_file = handler_env['handlerEnvironment']['heartbeatFile'] + self._context._seq_no = self._get_current_seq_no(self._context._config_dir) + if self._context._seq_no < 0: + self.error("Unable to locate a .settings file!") + return None + self._context._seq_no = str(self._context._seq_no) + self.log('sequence number is ' + self._context._seq_no) + self._context._status_file = os.path.join(self._context._status_dir, self._context._seq_no + '.status') + self._context._settings_file = os.path.join(self._context._config_dir, self._context._seq_no + '.settings') + self.log("setting file path is" + self._context._settings_file) + ctxt = None + ctxt = waagent.GetFileContents(self._context._settings_file) + if ctxt == None: + error_msg = 'Unable to read ' + self._context._settings_file + '. ' + self.error(error_msg) + return None + + self.log("JSON config: " + HandlerUtility.redact_protected_settings(ctxt)) + self._context._config = self._parse_config(ctxt) + return self._context + + def _change_log_file(self): + self.log("Change log file to " + self._context._log_file) + LoggerInit(self._context._log_file, '/dev/stdout') + self._log = waagent.Log + self._error = waagent.Error + + def set_verbose_log(self, verbose): + if (verbose == "1" or verbose == 1): + self.log("Enable verbose log") + LoggerInit(self._context._log_file, '/dev/stdout', verbose=True) + else: + self.log("Disable verbose log") + LoggerInit(self._context._log_file, '/dev/stdout', verbose=False) + + def is_seq_smaller(self): + return int(self._context._seq_no) <= self._get_most_recent_seq() + + def save_seq(self): + self._set_most_recent_seq(self._context._seq_no) + self.log("set most recent sequence number to " + self._context._seq_no) + + def exit_if_enabled(self, remove_protected_settings=False): + self.exit_if_seq_smaller(remove_protected_settings) + + def exit_if_seq_smaller(self, remove_protected_settings): + if(self.is_seq_smaller()): + self.log("Current sequence number, " + self._context._seq_no + ", is not greater than the sequnce number of the most recent executed configuration. Exiting...") + sys.exit(0) + self.save_seq() + + if remove_protected_settings: + self.scrub_settings_file() + + def _get_most_recent_seq(self): + if (os.path.isfile('mrseq')): + seq = waagent.GetFileContents('mrseq') + if (seq): + return int(seq) + + return -1 + + def is_current_config_seq_greater_inused(self): + return int(self._context._seq_no) > self._get_most_recent_seq() + + def get_inused_config_seq(self): + return self._get_most_recent_seq() + + def set_inused_config_seq(self, seq): + self._set_most_recent_seq(seq) + + def _set_most_recent_seq(self, seq): + waagent.SetFileContents('mrseq', str(seq)) + + def do_status_report(self, operation, status, status_code, message): + self.log("{0},{1},{2},{3}".format(operation, status, status_code, message)) + tstamp = time.strftime(DateTimeFormat, time.gmtime()) + stat = [{ + "version": self._context._version, + "timestampUTC": tstamp, + "status": { + "name": self._context._name, + "operation": operation, + "status": status, + "code": status_code, + "formattedMessage": { + "lang": "en-US", + "message": message + } + } + }] + stat_rept = json.dumps(stat) + if self._context._status_file: + tmp = "%s.tmp" % (self._context._status_file) + with open(tmp, 'w+') as f: + f.write(stat_rept) + os.rename(tmp, self._context._status_file) + + def do_heartbeat_report(self, heartbeat_file, status, code, message): + # heartbeat + health_report = '[{"version":"1.0","heartbeat":{"status":"' + status + '","code":"' + code + '","Message":"' + message + '"}}]' + if waagent.SetFileContents(heartbeat_file, health_report) == None: + self.error('Unable to wite heartbeat info to ' + heartbeat_file) + + def do_exit(self, exit_code, operation, status, code, message): + try: + self.do_status_report(operation, status, code, message) + except Exception as e: + self.log("Can't update status: " + str(e)) + sys.exit(exit_code) + + def get_name(self): + return self._context._name + + def get_seq_no(self): + return self._context._seq_no + + def get_log_dir(self): + return self._context._log_dir + + def get_handler_settings(self): + if (self._context._config != None): + return self._context._config['runtimeSettings'][0]['handlerSettings'] + return None + + def get_protected_settings(self): + if (self._context._config != None): + return self.get_handler_settings().get('protectedSettings') + return None + + def get_public_settings(self): + handlerSettings = self.get_handler_settings() + if (handlerSettings != None): + return self.get_handler_settings().get('publicSettings') + return None + + def scrub_settings_file(self): + content = waagent.GetFileContents(self._context._settings_file) + redacted = HandlerUtility.redact_protected_settings(content) + + waagent.SetFileContents(self._context._settings_file, redacted) \ No newline at end of file diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/LogUtil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/LogUtil.py new file mode 100755 index 0000000000..71c200cec5 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/LogUtil.py @@ -0,0 +1,50 @@ +# Logging utilities +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +import os +import os.path +import string +import sys + +OutputSize = 4 * 1024 + + +def tail(log_file, output_size = OutputSize): + pos = min(output_size, os.path.getsize(log_file)) + with open(log_file, "r") as log: + log.seek(0, os.SEEK_END) + log.seek(log.tell() - pos, os.SEEK_SET) + buf = log.read(output_size) + buf = filter(lambda x: x in string.printable, buf) + + # encoding works different for between interpreter version, we are keeping separate implementation to ensure + # backward compatibility + if sys.version_info[0] == 3: + buf = ''.join(list(buf)).encode('ascii', 'ignore').decode("ascii", "ignore") + elif sys.version_info[0] == 2: + buf = buf.decode("ascii", "ignore") + + return buf + + +def get_formatted_log(summary, stdout, stderr): + msg_format = ("{0}\n" + "---stdout---\n" + "{1}\n" + "---errout---\n" + "{2}\n") + return msg_format.format(summary, stdout, stderr) \ No newline at end of file diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/ScriptUtil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/ScriptUtil.py new file mode 100755 index 0000000000..3987cc04cc --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/ScriptUtil.py @@ -0,0 +1,140 @@ +# Script utilities +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +import os +import os.path +import time +import subprocess +import traceback +import string +import shlex +import sys + +from Utils import LogUtil +from Utils.WAAgentUtil import waagent + +DefaultStdoutFile = "stdout" +DefaultErroutFile = "errout" + + +def run_command(hutil, args, cwd, operation, extension_short_name, version, exit_after_run=True, interval=30, + std_out_file_name=DefaultStdoutFile, std_err_file_name=DefaultErroutFile): + std_out_file = os.path.join(cwd, std_out_file_name) + err_out_file = os.path.join(cwd, std_err_file_name) + std_out = None + err_out = None + try: + std_out = open(std_out_file, "w") + err_out = open(err_out_file, "w") + start_time = time.time() + child = subprocess.Popen(args, + cwd=cwd, + stdout=std_out, + stderr=err_out) + time.sleep(1) + while child.poll() is None: + msg = "Command is running..." + msg_with_cmd_output = LogUtil.get_formatted_log(msg, LogUtil.tail(std_out_file), LogUtil.tail(err_out_file)) + msg_without_cmd_output = msg + " Stdout/Stderr omitted from output." + + hutil.log_to_file(msg_with_cmd_output) + hutil.log_to_console(msg_without_cmd_output) + hutil.do_status_report(operation, 'transitioning', '0', msg_without_cmd_output) + time.sleep(interval) + + exit_code = child.returncode + if child.returncode and child.returncode != 0: + msg = "Command returned an error." + msg_with_cmd_output = LogUtil.get_formatted_log(msg, LogUtil.tail(std_out_file), LogUtil.tail(err_out_file)) + msg_without_cmd_output = msg + " Stdout/Stderr omitted from output." + + hutil.error(msg_without_cmd_output) + waagent.AddExtensionEvent(name=extension_short_name, + op=operation, + isSuccess=False, + version=version, + message="(01302)" + msg_without_cmd_output) + else: + msg = "Command is finished." + msg_with_cmd_output = LogUtil.get_formatted_log(msg, LogUtil.tail(std_out_file), LogUtil.tail(err_out_file)) + msg_without_cmd_output = msg + " Stdout/Stderr omitted from output." + + hutil.log_to_file(msg_with_cmd_output) + hutil.log_to_console(msg_without_cmd_output) + waagent.AddExtensionEvent(name=extension_short_name, + op=operation, + isSuccess=True, + version=version, + message="(01302)" + msg_without_cmd_output) + end_time = time.time() + waagent.AddExtensionEvent(name=extension_short_name, + op=operation, + isSuccess=True, + version=version, + message=("(01304)Command execution time: " + "{0}s").format(str(end_time - start_time))) + + log_or_exit(hutil, exit_after_run, exit_code, operation, msg_with_cmd_output) + except Exception as e: + error_msg = ("Failed to launch command with error: {0}," + "stacktrace: {1}").format(e, traceback.format_exc()) + hutil.error(error_msg) + waagent.AddExtensionEvent(name=extension_short_name, + op=operation, + isSuccess=False, + version=version, + message="(01101)" + error_msg) + exit_code = 1 + msg = 'Launch command failed: {0}'.format(e) + + log_or_exit(hutil, exit_after_run, exit_code, operation, msg) + finally: + if std_out: + std_out.close() + if err_out: + err_out.close() + return exit_code + + +# do_exit calls sys.exit which raises an exception so we do not call it from the finally block +def log_or_exit(hutil, exit_after_run, exit_code, operation, msg): + status = 'success' if exit_code == 0 else 'failed' + if exit_after_run: + hutil.do_exit(exit_code, operation, status, str(exit_code), msg) + else: + hutil.do_status_report(operation, status, str(exit_code), msg) + + +def parse_args(cmd): + cmd = filter(lambda x: x in string.printable, cmd) + + # encoding works different for between interpreter version, we are keeping separate implementation to ensure + # backward compatibility + if sys.version_info[0] == 3: + cmd = ''.join(list(cmd)).encode('ascii', 'ignore').decode("ascii", "ignore") + elif sys.version_info[0] == 2: + cmd = cmd.decode("ascii", "ignore") + + args = shlex.split(cmd) + # From python 2.6 to python 2.7.2, shlex.split output UCS-4 result like + # '\x00\x00a'. Temp workaround is to replace \x00 + for idx, val in enumerate(args): + if '\x00' in args[idx]: + args[idx] = args[idx].replace('\x00', '') + return args + + diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/WAAgentUtil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/WAAgentUtil.py new file mode 100755 index 0000000000..41ef3bb11b --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/WAAgentUtil.py @@ -0,0 +1,140 @@ +# Wrapper module for waagent +# +# waagent is not written as a module. This wrapper module is created +# to use the waagent code as a module. +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +import imp +import os +import os.path + + +# +# The following code will search and load waagent code and expose +# it as a submodule of current module +# +def searchWAAgent(): + # if the extension ships waagent in its package to default to this version first + pkg_agent_path = os.path.join(os.getcwd(), 'waagent') + if os.path.isfile(pkg_agent_path): + return pkg_agent_path + + agentPath = '/usr/sbin/waagent' + if os.path.isfile(agentPath): + return agentPath + + user_paths = os.environ['PYTHONPATH'].split(os.pathsep) + for user_path in user_paths: + agentPath = os.path.join(user_path, 'waagent') + if os.path.isfile(agentPath): + return agentPath + return None + + +waagent = None +agentPath = searchWAAgent() +if agentPath: + waagent = imp.load_source('waagent', agentPath) +else: + raise Exception("Can't load waagent.") + +if not hasattr(waagent, "AddExtensionEvent"): + """ + If AddExtensionEvent is not defined, provide a dummy impl. + """ + + + def _AddExtensionEvent(*args, **kwargs): + pass + + + waagent.AddExtensionEvent = _AddExtensionEvent + +if not hasattr(waagent, "WALAEventOperation"): + class _WALAEventOperation: + HeartBeat = "HeartBeat" + Provision = "Provision" + Install = "Install" + UnIsntall = "UnInstall" + Disable = "Disable" + Enable = "Enable" + Download = "Download" + Upgrade = "Upgrade" + Update = "Update" + + + waagent.WALAEventOperation = _WALAEventOperation + +# Better deal with the silly waagent typo, in anticipation of a proper fix of the typo later on waagent +if not hasattr(waagent.WALAEventOperation, 'Uninstall'): + if hasattr(waagent.WALAEventOperation, 'UnIsntall'): + waagent.WALAEventOperation.Uninstall = waagent.WALAEventOperation.UnIsntall + else: # This shouldn't happen, but just in case... + waagent.WALAEventOperation.Uninstall = 'Uninstall' + + +def GetWaagentHttpProxyConfigString(): + """ + Get http_proxy and https_proxy from waagent config. + Username and password is not supported now. + This code is adopted from /usr/sbin/waagent + """ + host = None + port = None + try: + waagent.Config = waagent.ConfigurationProvider( + None) # Use default waagent conf file (most likely /etc/waagent.conf) + + host = waagent.Config.get("HttpProxy.Host") + port = waagent.Config.get("HttpProxy.Port") + except Exception as e: + # waagent.ConfigurationProvider(None) will throw an exception on an old waagent + # Has to silently swallow because logging is not yet available here + # and we don't want to bring that in here. Also if the call fails, then there's + # no proxy config in waagent.conf anyway, so it's safe to silently swallow. + pass + + result = '' + if host is not None: + result = "http://" + host + if port is not None: + result += ":" + port + + return result + + +waagent.HttpProxyConfigString = GetWaagentHttpProxyConfigString() + +# end: waagent http proxy config stuff + +__ExtensionName__ = None + + +def InitExtensionEventLog(name): + global __ExtensionName__ + __ExtensionName__ = name + + +def AddExtensionEvent(name=__ExtensionName__, + op=waagent.WALAEventOperation.Enable, + isSuccess=False, + message=None): + if name is not None: + waagent.AddExtensionEvent(name=name, + op=op, + isSuccess=isSuccess, + message=message) diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/MockUtil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/MockUtil.py new file mode 100755 index 0000000000..8c8c242712 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/MockUtil.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# +# Sample Extension +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +# TODO: These tests were copied as reference - they are not currently running + +class MockUtil(): + def __init__(self, test): + self.test = test + + def get_log_dir(self): + return "/tmp" + + def log(self, msg): + print(msg) + + def error(self, msg): + print(msg) + + def get_seq_no(self): + return "0" + + def do_status_report(self, operation, status, status_code, message): + self.test.assertNotEqual(None, message) + self.last = "do_status_report" + + def do_exit(self,exit_code,operation,status,code,message): + self.test.assertNotEqual(None, message) + self.last = "do_exit" diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/env.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/env.py new file mode 100755 index 0000000000..fa447fcc6e --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/env.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# +# Sample Extension +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os + +#append installer directory to sys.path +root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(root) diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/mock.sh b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/mock.sh new file mode 100755 index 0000000000..da2fec539f --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/mock.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Start..." +sleep 0.1 +echo "Running" +>&2 echo "Warning" +sleep 0.1 +echo "Finished" +exit $1 diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_logutil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_logutil.py new file mode 100755 index 0000000000..163ad7a913 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_logutil.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +# TODO: These tests were copied as reference - they are not currently running + +import unittest +import LogUtil as lu + + +class TestLogUtil(unittest.TestCase): + def test_tail(self): + with open("/tmp/testtail", "w+") as F: + F.write(u"abcdefghijklmnopqrstu\u6211vwxyz".encode("utf-8")) + tail = lu.tail("/tmp/testtail", 2) + self.assertEquals("yz", tail) + + tail = lu.tail("/tmp/testtail") + self.assertEquals("abcdefghijklmnopqrstuvwxyz", tail) + +if __name__ == '__main__': + unittest.main() diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_null_protected_settings.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_null_protected_settings.py new file mode 100755 index 0000000000..bbb6dbbd6b --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_null_protected_settings.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# +# Sample Extension +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +# TODO: These tests were copied as reference - they are not currently running + +import unittest +import HandlerUtil as Util + +def mock_log(*args, **kwargs): + pass + +class TestNullProtectedSettings(unittest.TestCase): + def test_null_protected_settings(self): + hutil = Util.HandlerUtility(mock_log, mock_log, "UnitTest", "HandlerUtil.UnitTest", "0.0.1") + config = hutil._parse_config(Settings) + handlerSettings = config['runtimeSettings'][0]['handlerSettings'] + self.assertEquals(handlerSettings["protectedSettings"], None) + +Settings="""\ +{ + "runtimeSettings":[{ + "handlerSettings":{ + "protectedSettingsCertThumbprint":null, + "protectedSettings":null, + "publicSettings":{} + } + }] +} +""" + +if __name__ == '__main__': + unittest.main() diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_redacted_settings.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_redacted_settings.py new file mode 100644 index 0000000000..d3ed63ba73 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_redacted_settings.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# +# Tests for redacted settings +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +# TODO: These tests were copied as reference - they are not currently running + +import unittest +import Utils.HandlerUtil as Util + + +class TestRedactedProtectedSettings(unittest.TestCase): + + def test_redacted_protected_settings(self): + redacted = Util.HandlerUtility.redact_protected_settings(settings_original) + self.assertIn('"protectedSettings": "*** REDACTED ***"', redacted) + self.assertIn('"protectedSettingsCertThumbprint": "*** REDACTED ***"', redacted) + + +settings_original = """\ +{ + "runtimeSettings": [{ + "handlerSettings": { + "protectedSettingsCertThumbprint": "9310D2O49D7216D4A1CEDCE9D8A7CE5DBD7FB7BF", + "protectedSettings": "MIIC4AYJKoZIhvcNAQcWoIIB0TCDEc0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEB8f7DyzHLGjSDLnEWd4YeAwDQYJKoZIhvcNAQEBBQAEggEAiZj2gQtT4MpdTaEH8rUVFB/8Ucc8OxGFWu8VKbIdoHLKp1WcDb7Vlzv6fHLBIccgXGuR1XHTvtlD4QiKpSet341tPPug/R5ZtLSRz1pqtXZdrFcuuSxOa6ib/+la5ukdygcVwkEnmNSQaiipPKyqPH2JsuhmGCdXFiKwCSTrgGE6GyCBtaK9KOf48V/tYXHnDGrS9q5a1gRF5KVI2B26UYSO7V7pXjzYCd/Sp9yGj7Rw3Kqf9Lpix/sPuqWjV6e2XFlD3YxaHSeHVnLI/Bkz2E6Ri8yfPYus52r/mECXPL2YXqY9dGyrlKKIaD9AuzMyvvy1A74a9VBq7zxQQ4adEzBbBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECDyEf4mRrmWJgDhW4j2nRNTJU4yXxocQm/PhAr39Um7n0pgI2Cn28AabYtsHWjKqr8Al9LX6bKm8cnmnLjqTntphCw==", + "publicSettings": {} + } + }] +} +""" + +if __name__ == '__main__': + unittest.main() diff --git a/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_scriptutil.py b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_scriptutil.py new file mode 100755 index 0000000000..4f84cefb21 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/Utils/test/test_scriptutil.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# +# Copyright 2014 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=all + +# TODO: These tests were copied as reference - they are not currently running + +import os +import os.path +import env +import ScriptUtil as su +import unittest +from MockUtil import MockUtil + +class TestScriptUtil(unittest.TestCase): + def test_parse_args(self): + print(__file__) + cmd = u'sh foo.bar.sh -af bar --foo=bar | more \u6211' + args = su.parse_args(cmd.encode('utf-8')) + self.assertNotEquals(None, args) + self.assertNotEquals(0, len(args)) + print(args) + + def test_run_command(self): + hutil = MockUtil(self) + test_script = "mock.sh" + os.chdir(os.path.join(env.root, "test")) + exit_code = su.run_command(hutil, ["sh", test_script, "0"], os.getcwd(), 'RunScript-0', 'TestExtension', '1.0', True, 0.1) + self.assertEquals(0, exit_code) + self.assertEquals("do_exit", hutil.last) + exit_code = su.run_command(hutil, ["sh", test_script, "75"], os.getcwd(), 'RunScript-1', 'TestExtension', '1.0', False, 0.1) + self.assertEquals(75, exit_code) + self.assertEquals("do_status_report", hutil.last) + + def test_log_or_exit(self): + hutil = MockUtil(self) + su.log_or_exit(hutil, True, 0, 'LogOrExit-0', 'Message1') + self.assertEquals("do_exit", hutil.last) + su.log_or_exit(hutil, False, 0, 'LogOrExit-1', 'Message2') + self.assertEquals("do_status_report", hutil.last) + +if __name__ == '__main__': + unittest.main() diff --git a/tests_e2e/GuestAgentDcrTestExtension/manifest.xml b/tests_e2e/GuestAgentDcrTestExtension/manifest.xml new file mode 100644 index 0000000000..a4b0c755f2 --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/manifest.xml @@ -0,0 +1,17 @@ + + + Microsoft.Azure.TestExtensions + GuestAgentDcrTest + 1.4.1 + + VmRole + + Microsoft Azure Guest Agent test Extension for testing Linux Virtual Machines in DCR + true + https://github.com/larohra/GuestAgentDcrTestExtension/blob/master/LICENSE + http://www.microsoft.com/privacystatement/en-us/OnlineServices/Default.aspx + https://github.com/larohra/GuestAgentDcrTestExtension + true + Linux + Microsoft + diff --git a/tests_e2e/GuestAgentDcrTestExtension/references b/tests_e2e/GuestAgentDcrTestExtension/references new file mode 100644 index 0000000000..442153ec8c --- /dev/null +++ b/tests_e2e/GuestAgentDcrTestExtension/references @@ -0,0 +1,2 @@ +# TODO: Investigate the use of this file +Utils/ diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index a1370bd6e7..ca9f8dde46 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/agent_ext_workflow.yml b/tests_e2e/test_suites/agent_ext_workflow.yml new file mode 100644 index 0000000000..1e965317ba --- /dev/null +++ b/tests_e2e/test_suites/agent_ext_workflow.yml @@ -0,0 +1,11 @@ +name: "AgentExtWorkflow" +tests: + - "agent_ext_workflow/extension_workflow.py" +images: + - "centos_79" + - "suse_12" + - "rhel_79" + - "ubuntu_1604" + - "ubuntu_1804" +# This test suite uses the DCR Test Extension, which is only published in South Central US +locations: "AzureCloud:southcentralus" diff --git a/tests_e2e/tests/agent_ext_workflow/README.md b/tests_e2e/tests/agent_ext_workflow/README.md new file mode 100644 index 0000000000..a8d59fc151 --- /dev/null +++ b/tests_e2e/tests/agent_ext_workflow/README.md @@ -0,0 +1,45 @@ +# Agent Extension Worflow Test + +This scenario tests if the correct extension workflow sequence is being executed from the agent. + +### GuestAgentDcrTestExtension + +This is a test extension that exists for the sole purpose of testing the extension workflow of agent. This is currently deployed to SCUS only. + +All the extension does is prints the settings['name'] out to stdout. It is run everytime enable is called. + +Another important feature of this extension is that it maintains a `operations-.log` **for every operation that the agent executes on that extension**. We use this to confirm that the agent executed the correct sequence of operations. + +Sample operations-.log file snippet - +```text +Date:2019-07-30T21:54:03Z; Operation:install; SeqNo:0 +Date:2019-07-30T21:54:05Z; Operation:enable; SeqNo:0 +Date:2019-07-30T21:54:37Z; Operation:enable; SeqNo:1 +Date:2019-07-30T21:55:20Z; Operation:disable; SeqNo:1 +Date:2019-07-30T21:55:22Z; Operation:uninstall; SeqNo:1 +``` +The setting for this extension is of the format - +```json +{ + "name": String +} +``` +##### Repo link +https://github.com/larohra/GuestAgentDcrTestExtension + +##### Available Versions: +- 1.1.5 - Version with Basic functionalities as mentioned above +- 1.2.0 - Same functionalities as above with `"updateMode": "UpdateWithInstall"` in HandlerManifest.json to test update case +- 1.3.0 - Same functionalities as above with `"updateMode": "UpdateWithoutInstall"` in HandlerManifest.json to test update case + +### Test Sequence + +- Install the test extension on the VM +- Assert the extension status by checking if our Enable string matches the status message (We receive the status message by using the Azure SDK by polling for the VM instance view and parsing the extension status message) + +The Enable string of our test is of the following format (this is set in the `Settings` object when we call enable from the tests ) - +```text +[ExtensionName]-[Version], Count: [Enable-count] +``` +- Match the operation sequence as per the test and make sure they are in the correct chronological order +- Restart the agent and verify if the correct operation sequence is followed \ No newline at end of file diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py new file mode 100644 index 0000000000..8c08ea7d3c --- /dev/null +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from azure.mgmt.compute.models import VirtualMachineExtensionInstanceView +from assertpy import assert_that, soft_assertions +from datetime import datetime +from random import choice +import uuid + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class ExtensionWorkflow(AgentTest): + """ + This scenario tests if the correct extension workflow sequence is being executed from the agent. It installs the + GuestAgentDcrTestExtension on the test VM and makes requests to install, enable, update, and delete the extension + from the VM. The GuestAgentDcrTestExtension maintains a local `operations-.log` for every operation that + the agent executes on that extension. We use this to confirm that the agent executed the correct sequence of + operations. + + Sample operations-.log file snippet - + Date:2019-07-30T21:54:03Z; Operation:install; SeqNo:0 + Date:2019-07-30T21:54:05Z; Operation:enable; SeqNo:0 + Date:2019-07-30T21:54:37Z; Operation:enable; SeqNo:1 + Date:2019-07-30T21:55:20Z; Operation:disable; SeqNo:1 + Date:2019-07-30T21:55:22Z; Operation:uninstall; SeqNo:1 + + The setting for the GuestAgentDcrTestExtension is of the format - + { + "name": String + } + + Test sequence - + - Install the test extension on the VM + - Assert the extension status by checking if our Enable string matches the status message + - The Enable string of our test is of the following format (this is set in the `Settings` object when we c + all enable from the tests): [ExtensionName]-[Version], Count: [Enable-count] + - Match the operation sequence as per the test and make sure they are in the correct chronological order + - Restart the agent and verify if the correct operation sequence is followed + """ + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = SshClient( + ip_address=self._context.vm_ip_address, + username=self._context.username, + private_key_file=self._context.private_key_file) + + # This class represents the GuestAgentDcrTestExtension running on the test VM + class GuestAgentDcrTestExtension: + COUNT_KEY_NAME = "Count" + NAME_KEY_NAME = "name" + DATA_KEY_NAME = "data" + + def __init__(self, extension: VirtualMachineExtensionClient, ssh_client: SshClient, version: str): + self.extension = extension + self.name = "GuestAgentDcrTestExt" + self.version = version + self.expected_message = "" + self.enable_count = 0 + self.ssh_client = ssh_client + self.data = None + + def modify_ext_settings_and_enable(self, data=None): + self.enable_count += 1 + + # Settings follows the following format: [ExtensionName]-[Version], Count: [Enable-count] + setting_name = "%s-%s, %s: %s" % (self.name, self.version, self.COUNT_KEY_NAME, self.enable_count) + # We include data in the settings to test the special characters case. The settings with data follows the + # following format: [ExtensionName]-[Version], Count: [Enable-count], data: [data] + if data is not None: + setting_name = "{0}, {1}: {2}".format(setting_name, self.DATA_KEY_NAME, data) + + self.expected_message = setting_name + settings = {self.NAME_KEY_NAME: setting_name.encode('utf-8')} + + log.info("") + log.info("Add or update extension {0} , version {1}, settings {2}".format(self.extension, self.version, + settings)) + self.extension.enable(settings=settings, auto_upgrade_minor_version=False) + + def assert_instance_view(self, data=None): + log.info("") + + # If data is not None, we want to assert the instance view has the expected data + if data is None: + log.info("Assert instance view has expected message for test extension. Expected version: {0}, " + "Expected message: {1}".format(self.version, self.expected_message)) + self.extension.assert_instance_view(expected_version=self.version, + expected_message=self.expected_message) + else: + self.data = data + log.info("Assert instance view has expected data for test extension. Expected version: {0}, " + "Expected data: {1}".format(self.version, data)) + self.extension.assert_instance_view(expected_version=self.version, + assert_function=self.assert_data_in_instance_view) + + def assert_data_in_instance_view(self, instance_view: VirtualMachineExtensionInstanceView): + log.info("Asserting extension status ...") + status_message = instance_view.statuses[0].message + log.info("Status message: %s" % status_message) + + with soft_assertions(): + expected_ext_version = "%s-%s" % (self.name, self.version) + assert_that(expected_ext_version in status_message).described_as( + f"Specific extension version name should be in the InstanceView message ({expected_ext_version})").is_true() + + expected_count = "%s: %s" % (self.COUNT_KEY_NAME, self.enable_count) + assert_that(expected_count in status_message).described_as( + f"Expected count should be in the InstanceView message ({expected_count})").is_true() + + if self.data is not None: + expected_data = "{0}: {1}".format(self.DATA_KEY_NAME, self.data) + assert_that(expected_data in status_message).described_as( + f"Expected data should be in the InstanceView message ({expected_data})").is_true() + + def execute_assertion_script(self, file_name, args): + log.info("") + log.info("Running {0} remotely with arguments {1}".format(file_name, args)) + result = self.ssh_client.run_command(f"{file_name} {args}", use_sudo=True) + log.info(result) + log.info("Assertion completed successfully") + + def assert_scenario(self, file_name: str, command_args: str, assert_status: bool = False, restart_agent: list = None, data: str = None): + # Assert the extension status by checking if our Enable string matches the status message in the instance + # view + if assert_status: + self.assert_instance_view(data=data) + + # Remotely execute the assertion script + self.execute_assertion_script(file_name, command_args) + + # Restart the agent and test the status again if enabled (by checking the operations.log file in the VM) + # Restarting agent should just run enable again and rerun the same settings + if restart_agent is not None: + log.info("") + log.info("Restarting the agent...") + output = self.ssh_client.run_command("agent-service restart", use_sudo=True) + log.info("Restart completed:\n%s", output) + + for args in restart_agent: + self.execute_assertion_script('agent_ext_workflow-assert_operation_sequence.py', args) + + if assert_status: + self.assert_instance_view() + + def update_ext_version(self, extension: VirtualMachineExtensionClient, version: str): + self.extension = extension + self.version = version + + def run(self): + is_arm64: bool = self._ssh_client.get_architecture() == "aarch64" + + if is_arm64: + log.info("Skipping test case for %s, since it has not been published on ARM64", VmExtensionIds.GuestAgentDcrTestExtension) + else: + log.info("") + log.info("*******Verifying the extension install scenario*******") + + # Record the time we start the test + start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Create DcrTestExtension with version 1.1.5 + dcr_test_ext_id_1_1 = VmExtensionIdentifier( + VmExtensionIds.GuestAgentDcrTestExtension.publisher, + VmExtensionIds.GuestAgentDcrTestExtension.type, + "1.1" + ) + dcr_test_ext_client = VirtualMachineExtensionClient( + self._context.vm, + dcr_test_ext_id_1_1, + resource_name="GuestAgentDcrTestExt" + ) + dcr_ext = ExtensionWorkflow.GuestAgentDcrTestExtension( + extension=dcr_test_ext_client, + ssh_client=self._ssh_client, + version="1.1.5" + ) + + # Install test extension on the VM + dcr_ext.modify_ext_settings_and_enable() + + # command_args are the args we pass to the agent_ext_workflow-assert_operation_sequence.py file to verify + # the operation sequence for the current test + command_args = f"--start-time {start_time} " \ + f"normal_ops_sequence " \ + f"--version {dcr_ext.version} " \ + f"--ops install enable" + # restart_agentcommand_args are the args we pass to the agent_ext_workflow-assert_operation_sequence.py file + # to verify the operation sequence after restarting the agent. Restarting agent should just run enable again + # and rerun the same settings + restart_agent_command_args = [f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {dcr_ext.version} " + f"--ops install enable enable"] + + # Assert the operation sequence to confirm the agent executed the operations in the correct chronological + # order + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-assert_operation_sequence.py', + command_args=command_args, + assert_status=True, + restart_agent=restart_agent_command_args + ) + + log.info("") + log.info("*******Verifying the extension enable scenario*******") + + # Record the time we start the test + start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Enable test extension on the VM + dcr_ext.modify_ext_settings_and_enable() + + command_args = f"--start-time {start_time} " \ + f"normal_ops_sequence " \ + f"--version {dcr_ext.version} " \ + f"--ops enable" + restart_agent_command_args = [f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {dcr_ext.version} " + f"--ops enable enable"] + + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-assert_operation_sequence.py', + command_args=command_args, + assert_status=True, + restart_agent=restart_agent_command_args + ) + + log.info("") + log.info("*******Verifying the extension enable with special characters scenario*******") + + test_guid = str(uuid.uuid4()) + random_special_char_sentences = [ + "Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther spillede på xylofon.", + "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg", + "Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich", + "Heizölrückstoßabdämpfung", + "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο", + "Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία", + "El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.", + "Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches" + ] + sentence = choice(random_special_char_sentences) + test_str = "{0}; Special chars: {1}".format(test_guid, sentence) + + # Enable test extension on the VM + dcr_ext.modify_ext_settings_and_enable(data=test_str) + + command_args = f"--data {test_guid}" + + # We first ensure that the stdout contains the special characters and then we check if the test_guid is + # logged atleast once in the agent log to ensure that there were no errors when handling special characters + # in the agent + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-check_data_in_agent_log.py', + command_args=command_args, + assert_status=True, + data=test_guid + ) + + log.info("") + log.info("*******Verifying the extension uninstall scenario*******") + + # Record the time we start the test + start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Remove the test extension on the VM + log.info("Delete %s from VM", dcr_test_ext_client) + dcr_ext.extension.delete() + + command_args = f"--start-time {start_time} " \ + f"normal_ops_sequence " \ + f"--version {dcr_ext.version} " \ + f"--ops disable uninstall" + restart_agent_command_args = [f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {dcr_ext.version} " + f"--ops disable uninstall"] + + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-assert_operation_sequence.py', + command_args=command_args, + restart_agent=restart_agent_command_args + ) + + log.info("") + log.info("*******Verifying the extension update with install scenario*******") + + # Record the time we start the test + start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Version 1.2.0 of the test extension has the same functionalities as 1.1.5 with + # "updateMode": "UpdateWithInstall" in HandlerManifest.json to test update case + new_version_update_mode_with_install = "1.2.0" + old_version = "1.1.5" + + # Create DcrTestExtension with version 1.1 and 1.2 + dcr_test_ext_id_1_2 = VmExtensionIdentifier( + VmExtensionIds.GuestAgentDcrTestExtension.publisher, + VmExtensionIds.GuestAgentDcrTestExtension.type, + "1.2" + ) + dcr_test_ext_client_1_2 = VirtualMachineExtensionClient( + self._context.vm, + dcr_test_ext_id_1_2, + resource_name="GuestAgentDcrTestExt" + ) + dcr_ext = ExtensionWorkflow.GuestAgentDcrTestExtension( + extension=dcr_test_ext_client, + ssh_client=self._ssh_client, + version=old_version + ) + + # Install test extension v1.1.5 on the VM and assert instance view + dcr_ext.modify_ext_settings_and_enable() + dcr_ext.assert_instance_view() + + # Update extension object & version to new version + dcr_ext.update_ext_version(dcr_test_ext_client_1_2, new_version_update_mode_with_install) + + # Install test extension v1.2.0 on the VM + dcr_ext.modify_ext_settings_and_enable() + + command_args = f"--start-time {start_time} " \ + f"update_sequence " \ + f"--old-version {old_version} " \ + f"--old-ver-ops disable uninstall " \ + f"--new-version {new_version_update_mode_with_install} " \ + f"--new-ver-ops update install enable " \ + f"--final-ops disable update uninstall install enable" + restart_agent_command_args = [ + f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {old_version} " + f"--ops disable uninstall", + f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {new_version_update_mode_with_install} " + f"--ops update install enable enable" + ] + + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-assert_operation_sequence.py', + command_args=command_args, + assert_status=True, + restart_agent=restart_agent_command_args + ) + + log.info("") + log.info("Delete %s from VM", dcr_test_ext_client_1_2) + dcr_ext.extension.delete() + + log.info("") + log.info("*******Verifying the extension update without install scenario*******") + + # Record the time we start the test + start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + + # Version 1.3.0 of the test extension has the same functionalities as 1.1.5 with + # "updateMode": "UpdateWithoutInstall" in HandlerManifest.json to test update case + new_version_update_mode_without_install = "1.3.0" + + # Create DcrTestExtension with version 1.1 and 1.3 + dcr_test_ext_id_1_3 = VmExtensionIdentifier( + VmExtensionIds.GuestAgentDcrTestExtension.publisher, + VmExtensionIds.GuestAgentDcrTestExtension.type, + "1.3") + dcr_test_ext_client_1_3 = VirtualMachineExtensionClient( + self._context.vm, + dcr_test_ext_id_1_3, + resource_name="GuestAgentDcrTestExt" + ) + dcr_ext = ExtensionWorkflow.GuestAgentDcrTestExtension( + extension=dcr_test_ext_client, + ssh_client=self._ssh_client, + version=old_version + ) + + # Install test extension v1.1.5 on the VM and assert instance view + dcr_ext.modify_ext_settings_and_enable() + dcr_ext.assert_instance_view() + + # Update extension object & version to new version + dcr_ext.update_ext_version(dcr_test_ext_client_1_3, new_version_update_mode_without_install) + + # Install test extension v1.3.0 on the VM + dcr_ext.modify_ext_settings_and_enable() + + command_args = f"--start-time {start_time} " \ + f"update_sequence " \ + f"--old-version {old_version} " \ + f"--old-ver-ops disable uninstall " \ + f"--new-version {new_version_update_mode_without_install} " \ + f"--new-ver-ops update enable " \ + f"--final-ops disable update uninstall enable" + restart_agent_command_args = [ + f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {old_version} " + f"--ops disable uninstall", + f"--start-time {start_time} " + f"normal_ops_sequence " + f"--version {new_version_update_mode_without_install} " + f"--ops update enable enable" + ] + + dcr_ext.assert_scenario( + file_name='agent_ext_workflow-assert_operation_sequence.py', + command_args=command_args, + assert_status=True, + restart_agent=restart_agent_command_args + ) + + log.info("") + log.info("*******Verifying no lag between agent start and gs processing*******") + + log.info("") + log.info("Running agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py remotely...") + result = self._ssh_client.run_command("agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py", use_sudo=True) + log.info(result) + log.info("Validation for no lag time between agent start and gs processing completed successfully") + + +if __name__ == "__main__": + ExtensionWorkflow.run_from_command_line() diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index c9ca258472..b5c4885209 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -358,6 +358,17 @@ def is_error(r: AgentLogRecord) -> bool: return errors + def agent_log_contains(self, data: str): + """ + This function looks for the specified test data string in the WALinuxAgent logs and returns if found or not. + :param data: The string to look for in the agent logs + :return: True if test data string found in the agent log and False if not. + """ + for record in self.read(): + if data in record.text: + return True + return False + @staticmethod def _is_systemd(): # Taken from azurelinuxagent/common/osutil/systemd.py; repeated here because it is available only on agents >= 2.3 diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 398ffd61cb..149d89ce3b 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -62,3 +62,4 @@ class VmExtensionIds(object): # New run command extension, with support for multi-config RunCommandHandler: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.CPlat.Core', ext_type='RunCommandHandlerLinux', version="1.0") VmAccess: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.OSTCExtensions', ext_type='VMAccessForLinux', version="1.0") + GuestAgentDcrTestExtension: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.Azure.TestExtensions.Edp', ext_type='GuestAgentDcrTest', version='1.0') diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-assert_operation_sequence.py b/tests_e2e/tests/scripts/agent_ext_workflow-assert_operation_sequence.py new file mode 100755 index 0000000000..d01d27799d --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_workflow-assert_operation_sequence.py @@ -0,0 +1,183 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# The DcrTestExtension maintains an `operations-.log` for every operation that the agent executes on that +# extension. This script asserts that the operations sequence in the log file matches the expected operations given as +# input to this script. We do this to confirm that the agent executed the correct sequence of operations. +# +# Sample operations-.log file snippet - +# Date:2019-07-30T21:54:03Z; Operation:install; SeqNo:0 +# Date:2019-07-30T21:54:05Z; Operation:enable; SeqNo:0 +# Date:2019-07-30T21:54:37Z; Operation:enable; SeqNo:1 +# Date:2019-07-30T21:55:20Z; Operation:disable; SeqNo:1 +# Date:2019-07-30T21:55:22Z; Operation:uninstall; SeqNo:1 +# +import argparse +import os +import sys +import time +from datetime import datetime +from typing import Any, Dict, List + +DELIMITER = ";" +OPS_FILE_DIR = "/var/log/azure/Microsoft.Azure.TestExtensions.Edp.GuestAgentDcrTest/" +OPS_FILE_PATTERN = ["operations-%s.log", "%s/operations-%s.log"] +MAX_RETRY = 5 +SLEEP_TIMER = 30 + + +def parse_ops_log(ops_version: str, input_ops: List[str], start_time: str): + # input_ops are the expected operations that we expect to see in the operations log file + ver = (ops_version,) + ops_file_name = None + for file_pat in OPS_FILE_PATTERN: + ops_file_name = os.path.join(OPS_FILE_DIR, file_pat % ver) + if not os.path.exists(ops_file_name): + ver = ver + (ops_version,) + ops_file_name = None + continue + break + + if not ops_file_name: + raise IOError("Operations File %s not found" % os.path.join(OPS_FILE_DIR, OPS_FILE_PATTERN[0] % ops_version)) + + ops = [] + with open(ops_file_name, 'r') as ops_log: + # we get the last len(input_ops) from the log file and ensure they match with the input_ops + # Example of a line in the log file - `Date:2019-07-30T21:54:03Z; Operation:install; SeqNo:0` + content = ops_log.readlines()[-len(input_ops):] + for op_log in content: + data = op_log.split(DELIMITER) + date = datetime.strptime(data[0].split("Date:")[1], "%Y-%m-%dT%H:%M:%SZ") + op = data[1].split("Operation:")[1] + seq_no = data[2].split("SeqNo:")[1].strip('\n') + + # We only capture the operations that > start_time of the test + if start_time > date: + continue + + ops.append({'date': date, 'op': op, 'seq_no': seq_no}) + return ops + + +def assert_ops_in_sequence(actual_ops: List[Dict[str, Any]], expected_ops: List[str]): + exit_code = 0 + if len(actual_ops) != len(expected_ops): + print("Operation sequence length doesn't match, exit code 2") + exit_code = 2 + + last_date = datetime(70, 1, 1) + for idx, val in enumerate(actual_ops): + if exit_code != 0: + break + + if val['date'] < last_date or val['op'] != expected_ops[idx]: + print("Operation sequence doesn't match, exit code 2") + exit_code = 2 + + last_date = val['date'] + + return exit_code + + +def check_update_sequence(args): + # old_ops_file_name = OPS_FILE_PATTERN % args.old_version + # new_ops_file_name = OPS_FILE_PATTERN % args.new_version + + actual_ops = parse_ops_log(args.old_version, args.old_ops, args.start_time) + actual_ops.extend(parse_ops_log(args.new_version, args.new_ops, args.start_time)) + actual_ops = sorted(actual_ops, key=lambda op: op['date']) + + exit_code = assert_ops_in_sequence(actual_ops, args.ops) + + return exit_code, actual_ops + + +def check_operation_sequence(args): + # ops_file_name = OPS_FILE_PATTERN % args.version + + actual_ops = parse_ops_log(args.version, args.ops, args.start_time) + exit_code = assert_ops_in_sequence(actual_ops, args.ops) + + return exit_code, actual_ops + + +def main(): + # There are 2 main ways you can call this file - normal_ops_sequence or update_sequence + parser = argparse.ArgumentParser() + cmd_parsers = parser.add_subparsers(help="sub-command help", dest="command") + + # We use start_time to make sure we're testing the correct test and not some other test + parser.add_argument("--start-time", dest='start_time', required=True) + + # Normal_ops_sequence gets the version of the ext and parses the corresponding operations file to get the operation + # sequence that were run on the extension + normal_ops_sequence_parser = cmd_parsers.add_parser("normal_ops_sequence", help="Test the normal operation sequence") + normal_ops_sequence_parser.add_argument('--version', dest='version') + normal_ops_sequence_parser.add_argument('--ops', nargs='*', dest='ops', default=argparse.SUPPRESS) + + # Update_sequence mode is used to check for the update scenario. We get the expected old operations, expected + # new operations and the final operation list and verify if the expected operations match the actual ones + update_sequence_parser = cmd_parsers.add_parser("update_sequence", help="Test the update operation sequence") + update_sequence_parser.add_argument("--old-version", dest="old_version") + update_sequence_parser.add_argument("--new-version", dest="new_version") + update_sequence_parser.add_argument("--old-ver-ops", nargs="*", dest="old_ops", default=argparse.SUPPRESS) + update_sequence_parser.add_argument("--new-ver-ops", nargs="*", dest="new_ops", default=argparse.SUPPRESS) + update_sequence_parser.add_argument("--final-ops", nargs="*", dest="ops", default=argparse.SUPPRESS) + + args, unknown = parser.parse_known_args() + + if unknown or len(unknown) > 0: + # Print any unknown arguments passed to this script and fix them with low priority + print("[Low Proiority][To-Fix] Found unknown args: %s" % ', '.join(unknown)) + + args.start_time = datetime.strptime(args.start_time, "%Y-%m-%dT%H:%M:%SZ") + + exit_code = 999 + actual_ops = [] + + for i in range(0, MAX_RETRY): + if args.command == "update_sequence": + exit_code, actual_ops = check_update_sequence(args) + elif args.command == "normal_ops_sequence": + exit_code, actual_ops = check_operation_sequence(args) + else: + print("No such command %s, exit code 5\n" % args.command) + exit_code, actual_ops = 5, [] + break + + if exit_code == 0: + break + + print("{0} test failed with exit code: {1}; Retry attempt: {2}; Retrying in {3} secs".format(args.command, + exit_code, i, + SLEEP_TIMER)) + time.sleep(SLEEP_TIMER) + + if exit_code != 0: + print("Expected Operations: %s" % ", ".join(args.ops)) + print("Actual Operations: %s" % + ','.join(["[%s, Date: %s]" % (op['op'], op['date'].strftime("%Y-%m-%dT%H:%M:%SZ")) for op in actual_ops])) + + print("Assertion completed, exiting with code: %s" % exit_code) + sys.exit(exit_code) + + +if __name__ == "__main__": + print("Asserting operations\n") + main() diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py new file mode 100755 index 0000000000..867c9b67d9 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_workflow-check_data_in_agent_log.py @@ -0,0 +1,49 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Checks that the input data is found in the agent log +# +import argparse +import sys + +from pathlib import Path +from tests_e2e.tests.lib.agent_log import AgentLog + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--data", dest='data', required=True) + args, _ = parser.parse_known_args() + + print("Verifying data: {0} in waagent.log".format(args.data)) + found = False + + try: + found = AgentLog(Path('/var/log/waagent.log')).agent_log_contains(args.data) + if found: + print("Found data: {0} in agent log".format(args.data)) + else: + print("Did not find data: {0} in agent log".format(args.data)) + except Exception as e: + print("Error thrown when searching for test data in agent log: {0}".format(str(e))) + + sys.exit(0 if found else 1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py new file mode 100755 index 0000000000..7e75c87ef7 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py @@ -0,0 +1,96 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Asserts that goal state processing completed no more than 15 seconds after agent start +# +from datetime import timedelta +import re +import sys + +from pathlib import Path +from tests_e2e.tests.lib.agent_log import AgentLog + + +def main(): + success = True + + agent_started_time = [] + agent_msg = [] + time_diff_max_secs = 15 + + # Example: Agent WALinuxAgent-2.2.47.2 is running as the goal state agent + agent_started_regex = r"Azure Linux Agent \(Goal State Agent version [0-9.]+\)" + gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[(?P[a-z_\d]{13,14})\s(?P\d+)\sms\]" + + verified_atleast_one_log_line = False + verified_atleast_one_agent_started_log_line = False + verified_atleast_one_gs_complete_log_line = False + + agent_log = AgentLog(Path('/var/log/waagent.log')) + + try: + for agent_record in agent_log.read(): + verified_atleast_one_log_line = True + + agent_started = re.match(agent_started_regex, agent_record.message) + verified_atleast_one_agent_started_log_line = verified_atleast_one_agent_started_log_line or agent_started + if agent_started: + agent_started_time.append(agent_record.timestamp) + agent_msg.append(agent_record.text) + + gs_complete = re.match(gs_completed_regex, agent_record.message) + verified_atleast_one_gs_complete_log_line = verified_atleast_one_gs_complete_log_line or gs_complete + if agent_started_time and gs_complete: + duration = gs_complete.group('duration') + diff = agent_record.timestamp - agent_started_time.pop() + # Reduce the duration it took to complete the Goalstate, essentially we should only care about how long + # the agent took after start/restart to start processing GS + diff -= timedelta(milliseconds=int(duration)) + agent_msg_line = agent_msg.pop() + if diff.seconds > time_diff_max_secs: + success = False + print("Found delay between agent start and GoalState Processing > {0}secs: " + "Messages: \n {1} {2}".format(time_diff_max_secs, agent_msg_line, agent_record.text)) + + except IOError as e: + print("Unable to validate no lag time: {0}".format(str(e))) + + if not verified_atleast_one_log_line: + success = False + print("Didn't parse a single log line, ensure the log_parser is working fine and verify log regex") + + if not verified_atleast_one_agent_started_log_line: + success = False + print("Didn't parse a single agent started log line, ensure the Regex is working fine: {0}" + .format(agent_started_regex)) + + if not verified_atleast_one_gs_complete_log_line: + success = False + print("Didn't parse a single GS completed log line, ensure the Regex is working fine: {0}" + .format(gs_completed_regex)) + + if agent_started_time or agent_msg: + success = False + print("Mismatch between number of agent start messages and number of GoalState Processing messages\n " + "Agent Start Messages: \n {0}".format('\n'.join(agent_msg))) + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() From 6f2a5ab0c108ba6a44e4bb7137a878f08ed98448 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 10 Jul 2023 12:18:06 -0700 Subject: [PATCH 043/240] Update agent_ext_workflow.yml to skip China and Gov clouds (#2872) * Update agent_ext_workflow.yml to skip China and Gov clouds * Update tests_e2e/test_suites/agent_ext_workflow.yml --- tests_e2e/test_suites/agent_ext_workflow.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests_e2e/test_suites/agent_ext_workflow.yml b/tests_e2e/test_suites/agent_ext_workflow.yml index 1e965317ba..2d506c00d8 100644 --- a/tests_e2e/test_suites/agent_ext_workflow.yml +++ b/tests_e2e/test_suites/agent_ext_workflow.yml @@ -7,5 +7,8 @@ images: - "rhel_79" - "ubuntu_1604" - "ubuntu_1804" -# This test suite uses the DCR Test Extension, which is only published in South Central US +# This test suite uses the DCR Test Extension, which is only published in southcentralus region in public cloud locations: "AzureCloud:southcentralus" +skip_on_clouds: + - "AzureChinaCloud" + - "AzureUSGovernment" From 2a761ecc80f1293522cc33e62fd0b3772ed43d42 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 11 Jul 2023 15:29:27 -0700 Subject: [PATCH 044/240] fix daemon version (#2874) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 08c01b5ceb..c056425432 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -57,7 +57,7 @@ def get_daemon_version(): # The agent process which execute the extensions can have different version(after upgrades) and importing version from that process may provide wrong version for daemon. # so launching new process with sys.executable python provides the correct version for daemon which preinstalled in the image. try: - cmd = ["{0}".format(sys.executable), "-c", "\'from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)\'"] + cmd = ["{0}".format(sys.executable), "-c", "from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)"] version = shellutil.run_command(cmd) return FlexibleVersion(version) except Exception as e: # Make the best effort to get the daemon version, but don't fail the update if we can't. So default to 2.2.53 as env variable is not set < 2.2.53 From eb6795788182ba9e68b3db68dab6114223c02f8f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 11 Jul 2023 16:21:26 -0700 Subject: [PATCH 045/240] Wait for extension goal state processing before checking for lag in log (#2873) * Update version to dummy 1.0.0.0' * Revert version change * Add sleep time to allow goal state processing to complete before lag check * Add retry logic to gs processing lag check * Clean up retry logic * Add back empty line * Fix timestamp parsing issue * Fix timestamp parsing issue * Fix timestamp parsing issue * Do 3 retries{ --- tests_e2e/tests/lib/agent_log.py | 11 ++ ...g_between_agent_start_and_gs_processing.py | 141 ++++++++++-------- 2 files changed, 92 insertions(+), 60 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index b5c4885209..452c5552a0 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -64,6 +64,17 @@ def from_dictionary(dictionary: Dict[str, str]): @property def timestamp(self) -> datetime: + # Extension logs may follow different timestamp formats + # 2023/07/10 20:50:13.459260 + ext_timestamp_regex_1 = r"\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}[.\d]+" + # 2023/07/10 20:50:13 + ext_timestamp_regex_2 = r"\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}" + + if re.match(ext_timestamp_regex_1, self.when): + return datetime.strptime(self.when, u'%Y/%m/%d %H:%M:%S.%f') + elif re.match(ext_timestamp_regex_2, self.when): + return datetime.strptime(self.when, u'%Y/%m/%d %H:%M:%S') + # Logs from agent follow this format: 2023-07-10T20:50:13.038599Z return datetime.strptime(self.when, u'%Y-%m-%dT%H:%M:%S.%fZ') diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py index 7e75c87ef7..0b492d8153 100755 --- a/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py +++ b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py @@ -21,6 +21,7 @@ from datetime import timedelta import re import sys +import time from pathlib import Path from tests_e2e.tests.lib.agent_log import AgentLog @@ -28,66 +29,86 @@ def main(): success = True - - agent_started_time = [] - agent_msg = [] - time_diff_max_secs = 15 - - # Example: Agent WALinuxAgent-2.2.47.2 is running as the goal state agent - agent_started_regex = r"Azure Linux Agent \(Goal State Agent version [0-9.]+\)" - gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[(?P[a-z_\d]{13,14})\s(?P\d+)\sms\]" - - verified_atleast_one_log_line = False - verified_atleast_one_agent_started_log_line = False - verified_atleast_one_gs_complete_log_line = False - - agent_log = AgentLog(Path('/var/log/waagent.log')) - - try: - for agent_record in agent_log.read(): - verified_atleast_one_log_line = True - - agent_started = re.match(agent_started_regex, agent_record.message) - verified_atleast_one_agent_started_log_line = verified_atleast_one_agent_started_log_line or agent_started - if agent_started: - agent_started_time.append(agent_record.timestamp) - agent_msg.append(agent_record.text) - - gs_complete = re.match(gs_completed_regex, agent_record.message) - verified_atleast_one_gs_complete_log_line = verified_atleast_one_gs_complete_log_line or gs_complete - if agent_started_time and gs_complete: - duration = gs_complete.group('duration') - diff = agent_record.timestamp - agent_started_time.pop() - # Reduce the duration it took to complete the Goalstate, essentially we should only care about how long - # the agent took after start/restart to start processing GS - diff -= timedelta(milliseconds=int(duration)) - agent_msg_line = agent_msg.pop() - if diff.seconds > time_diff_max_secs: - success = False - print("Found delay between agent start and GoalState Processing > {0}secs: " - "Messages: \n {1} {2}".format(time_diff_max_secs, agent_msg_line, agent_record.text)) - - except IOError as e: - print("Unable to validate no lag time: {0}".format(str(e))) - - if not verified_atleast_one_log_line: - success = False - print("Didn't parse a single log line, ensure the log_parser is working fine and verify log regex") - - if not verified_atleast_one_agent_started_log_line: - success = False - print("Didn't parse a single agent started log line, ensure the Regex is working fine: {0}" - .format(agent_started_regex)) - - if not verified_atleast_one_gs_complete_log_line: - success = False - print("Didn't parse a single GS completed log line, ensure the Regex is working fine: {0}" - .format(gs_completed_regex)) - - if agent_started_time or agent_msg: - success = False - print("Mismatch between number of agent start messages and number of GoalState Processing messages\n " - "Agent Start Messages: \n {0}".format('\n'.join(agent_msg))) + needs_retry = True + retry = 3 + + while retry >= 0 and needs_retry: + success = True + needs_retry = False + + agent_started_time = [] + agent_msg = [] + time_diff_max_secs = 15 + last_agent_log_timestamp = None + + # Example: Agent WALinuxAgent-2.2.47.2 is running as the goal state agent + agent_started_regex = r"Azure Linux Agent \(Goal State Agent version [0-9.]+\)" + gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[(?P[a-z_\d]{13,14})\s(?P\d+)\sms\]" + + verified_atleast_one_log_line = False + verified_atleast_one_agent_started_log_line = False + verified_atleast_one_gs_complete_log_line = False + + agent_log = AgentLog(Path('/var/log/waagent.log')) + + try: + for agent_record in agent_log.read(): + last_agent_log_timestamp = agent_record.timestamp + verified_atleast_one_log_line = True + + agent_started = re.match(agent_started_regex, agent_record.message) + verified_atleast_one_agent_started_log_line = verified_atleast_one_agent_started_log_line or agent_started + if agent_started: + agent_started_time.append(agent_record.timestamp) + agent_msg.append(agent_record.text) + + gs_complete = re.match(gs_completed_regex, agent_record.message) + verified_atleast_one_gs_complete_log_line = verified_atleast_one_gs_complete_log_line or gs_complete + if agent_started_time and gs_complete: + duration = gs_complete.group('duration') + diff = agent_record.timestamp - agent_started_time.pop() + # Reduce the duration it took to complete the Goalstate, essentially we should only care about how long + # the agent took after start/restart to start processing GS + diff -= timedelta(milliseconds=int(duration)) + agent_msg_line = agent_msg.pop() + if diff.seconds > time_diff_max_secs: + success = False + print("Found delay between agent start and GoalState Processing > {0}secs: " + "Messages: \n {1} {2}".format(time_diff_max_secs, agent_msg_line, agent_record.text)) + + except IOError as e: + print("Unable to validate no lag time: {0}".format(str(e))) + + if not verified_atleast_one_log_line: + success = False + print("Didn't parse a single log line, ensure the log_parser is working fine and verify log regex") + + if not verified_atleast_one_agent_started_log_line: + success = False + print("Didn't parse a single agent started log line, ensure the Regex is working fine: {0}" + .format(agent_started_regex)) + + if not verified_atleast_one_gs_complete_log_line: + success = False + print("Didn't parse a single GS completed log line, ensure the Regex is working fine: {0}" + .format(gs_completed_regex)) + + if agent_started_time or agent_msg: + # If agent_started_time or agent_msg is not empty, there is a mismatch in the number of agent start messages + # and GoalState Processing messages + # If another check hasn't already failed, and the last parsed log is less than 15 seconds after the + # mismatched agent start log, we should retry after sleeping for 5s to give the agent time to finish + # GoalState processing + if success and last_agent_log_timestamp < (agent_started_time[-1] + timedelta(seconds=15)): + needs_retry = True + print("Sleeping for 5 seconds to allow goal state processing to complete...") + time.sleep(5) + else: + success = False + print("Mismatch between number of agent start messages and number of GoalState Processing messages\n " + "Agent Start Messages: \n {0}".format('\n'.join(agent_msg))) + + retry -= 1 sys.exit(0 if success else 1) From 6862673d19347548445d7c7fa3699bbc2e38ffd7 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 18 Jul 2023 10:57:35 -0700 Subject: [PATCH 046/240] Extract tarball with xvf during setup (#2880) In a pipeline run we saw the following error when extracting the tarball on the test node: Adding v to extract the contents with verbose --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index caa499b341..06a5acde42 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -351,7 +351,7 @@ def _setup_node(self, install_test_agent: bool) -> None: # Extract the tarball and execute the install scripts # log.info('Installing tools on the test node') - command = f"tar xf {target_path/tarball_path.name} && ~/bin/install-tools" + command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools" log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) if self.context.is_vhd: From 7bff7b75d8ce1dbb802c4f2e6cd2173b3ef9159c Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 18 Jul 2023 16:53:44 -0700 Subject: [PATCH 047/240] enable agent update in daily run (#2878) --- tests_e2e/orchestrator/runbook.yml | 2 +- .../tests/agent_publish/agent_publish.py | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 50 +++++++++++-------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index ca9f8dde46..7e19205747 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index d476414414..397ecd0e33 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -57,7 +57,7 @@ def _get_agent_info(self) -> None: def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags") - output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + output = self._ssh_client.run_command("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Updated agent-update related config flags \n%s', output) def _check_update(self) -> None: diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index a6a41ec3d0..eeb287f33b 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -59,6 +59,12 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: # { 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + }, + # We don't allow downgrades below then daemon version + # 2023-07-11T02:28:21.249836Z WARNING ExtHandler ExtHandler [AgentUpdateError] The Agent received a request to downgrade to version 1.4.0.0, but downgrading to a version less than the Agent installed on the image (1.4.0.1) is not supported. Skipping downgrade. + # + { + 'message': r"downgrading to a version less than the Agent installed on the image.* is not supported" } ] @@ -71,7 +77,7 @@ def run(self) -> None: log.info("*******Verifying the Agent Downgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - downgrade_version: str = "1.3.0.0" + downgrade_version: str = "1.5.0.0" log.info("Attempting downgrade version %s", downgrade_version) self._request_rsm_update(downgrade_version) self._check_rsm_gs(downgrade_version) @@ -85,7 +91,7 @@ def run(self) -> None: log.info("*******Verifying the Agent Upgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - upgrade_version: str = "1.3.1.0" + upgrade_version: str = "1.5.1.0" log.info("Attempting upgrade version %s", upgrade_version) self._request_rsm_update(upgrade_version) self._check_rsm_gs(upgrade_version) @@ -96,31 +102,31 @@ def run(self) -> None: log.info("*******Verifying the no version update scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - version: str = "1.3.1.0" + version: str = "1.5.1.0" log.info("Attempting update version same as current version %s", upgrade_version) self._request_rsm_update(version) self._check_rsm_gs(version) self._verify_guest_agent_update(version) self._verify_agent_reported_update_status(version) - # disabled until the new daemon version logic is released in test versions - # # verify requested version below daemon version - # log.info("*******Verifying requested version below daemon version scenario*******") - # # changing back to 1.3.1.0 from 1.0.0.0 as there is no pkg below than 1.0.0.0 available in PIR, Otherwise we will get pkg not found error - # self._prepare_agent("1.3.0.1", update_config=False) - # stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - # log.info("Current agent version running on the vm before update \n%s", stdout) - # version: str = "1.3.0.0" - # log.info("Attempting requested version %s", version) - # self._request_rsm_update(version) - # self._check_rsm_gs(version) - # self._verify_no_guest_agent_update(stdout) - # self._verify_agent_reported_update_status(version) + # verify requested version below daemon version + log.info("*******Verifying requested version below daemon version scenario*******") + # changing daemon version to 1.5.0.1 from 1.0.0.0 as there is no pkg below than 1.0.0.0 available in PIR, Otherwise we will get pkg not found error + self._prepare_agent("1.5.0.1", update_config=False) + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Current agent version running on the vm before update is \n%s", stdout) + version: str = "1.5.0.0" + log.info("Attempting requested version %s", version) + self._request_rsm_update(version) + self._check_rsm_gs(version) + self._verify_no_guest_agent_update(version) + self._verify_agent_reported_update_status(version) def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we mock the rsm update request + log.info('Verifying latest GS includes requested version available to the agent') output = self._ssh_client.run_command(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) - log.info('Verifying requested version GS available to the agent \n%s', output) + log.info('Verified latest GS includes requested version available to the agent. \n%s', output) def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: """ @@ -133,7 +139,7 @@ def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: log.info('Updated agent installed version \n%s', output) if update_config: log.info('Modifying agent update config flags') - output = self._ssh_client.run_command("update-waagent-conf GAUpdates.Enabled=y AutoUpdate.GAFamily=Test", use_sudo=True) + output = self._ssh_client.run_command("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('updated agent update required config \n%s', output) @staticmethod @@ -208,12 +214,14 @@ def _check_agent_version(requested_version: str) -> bool: stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") - def _verify_no_guest_agent_update(self, previous_agent: str) -> None: + def _verify_no_guest_agent_update(self, version: str) -> None: """ - verify current agent version is same as previous after update attempt + verify current agent version is not updated to requested version """ + log.info("Verifying no update happened to agent") current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}") + assert_that(current_agent).does_not_contain(version).described_as(f"Agent version changed.\n Current agent {current_agent}") + log.info("Verified agent was not updated to requested version") def _verify_agent_reported_supported_feature_flag(self): """ From 5dec9922fd1a9b78d30505e0a634b0ef9e93bd43 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 25 Jul 2023 06:08:23 -0700 Subject: [PATCH 048/240] Create Network Security Group for test VMs (#2882) * Create Network Security Group for test VMs * error handling --------- Co-authored-by: narrieta --- .../lib/update_arm_template_hook.py | 42 +++-- .../test_suites/agent_not_provisioned.yml | 2 +- .../test_suites/no_outbound_connections.yml | 2 +- .../disable_agent_provisioning.py | 89 +++++++++++ .../tests/agent_not_provisioned/template.py | 86 ---------- .../tests/lib/add_network_security_group.py | 150 ++++++++++++++++++ tests_e2e/tests/lib/retry.py | 34 ++-- tests_e2e/tests/lib/update_arm_template.py | 53 +++++++ .../deny_outbound_connections.py | 58 +++++++ .../tests/no_outbound_connections/template.py | 95 ----------- 10 files changed, 407 insertions(+), 204 deletions(-) create mode 100755 tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py delete mode 100755 tests_e2e/tests/agent_not_provisioned/template.py create mode 100644 tests_e2e/tests/lib/add_network_security_group.py create mode 100644 tests_e2e/tests/lib/update_arm_template.py create mode 100755 tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py delete mode 100755 tests_e2e/tests/no_outbound_connections/template.py diff --git a/tests_e2e/orchestrator/lib/update_arm_template_hook.py b/tests_e2e/orchestrator/lib/update_arm_template_hook.py index c1c94f5522..2ff910a9a7 100644 --- a/tests_e2e/orchestrator/lib/update_arm_template_hook.py +++ b/tests_e2e/orchestrator/lib/update_arm_template_hook.py @@ -15,10 +15,11 @@ # limitations under the License. # -import importlib +import importlib.util import logging + from pathlib import Path -from typing import Any, Callable +from typing import Any # Disable those warnings, since 'lisa' is an external, non-standard, dependency # E0401: Unable to import 'lisa.*' (import-error) @@ -29,6 +30,8 @@ # pylint: enable=E0401 import tests_e2e +from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup +from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate class UpdateArmTemplateHook: @@ -37,30 +40,45 @@ class UpdateArmTemplateHook: """ @hookimpl def azure_update_arm_template(self, template: Any, environment: Environment) -> None: + log: logging.Logger = logging.getLogger("lisa") + + # + # Add the network security group for the test VM. This group includes a rule allowing SSH access from the current machine. + # + log.info("******** Waagent: Adding network security rule to the ARM template") + AddNetworkSecurityGroup().update(template) + + # + # Apply any template customizations provided by the tests. + # azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema) vm_tags = azure_runbook.vm_tags - templates = vm_tags.get("templates") - if templates is not None: - log: logging.Logger = logging.getLogger("lisa") - log.info("******** Waagent: Applying custom templates '%s' to environment '%s'", templates, environment.name) + # The "templates" tag is a comma-separated list of the template customizations provided by the tests + test_templates = vm_tags.get("templates") + if test_templates is not None: + log.info("******** Waagent: Applying custom templates '%s' to environment '%s'", test_templates, environment.name) - for t in templates.split(","): + for t in test_templates.split(","): update_arm_template = self._get_update_arm_template(t) - update_arm_template(template) + update_arm_template().update(template) _SOURCE_CODE_ROOT: Path = Path(tests_e2e.__path__[0]) @staticmethod - def _get_update_arm_template(template_path: str) -> Callable: - source_file: Path = UpdateArmTemplateHook._SOURCE_CODE_ROOT/"tests"/template_path + def _get_update_arm_template(test_template: str) -> UpdateArmTemplate: + """ + Returns the UpdateArmTemplate class that implements the template customization for the test. + """ + source_file: Path = UpdateArmTemplateHook._SOURCE_CODE_ROOT/"tests"/test_template spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.templates.{source_file.name}", str(source_file)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - matches = [v for v in module.__dict__.values() if callable(v) and v.__name__ == "update_arm_template"] + # find all the classes in the module that are subclasses of UpdateArmTemplate but are not UpdateArmTemplate itself. + matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, UpdateArmTemplate) and v != UpdateArmTemplate] if len(matches) != 1: - raise Exception(f"Could not find update_arm_template in {source_file}") + raise Exception(f"Error in {source_file}: template files must contain exactly one class derived from UpdateArmTemplate)") return matches[0] diff --git a/tests_e2e/test_suites/agent_not_provisioned.yml b/tests_e2e/test_suites/agent_not_provisioned.yml index 07dd5ccce2..7c85353f02 100644 --- a/tests_e2e/test_suites/agent_not_provisioned.yml +++ b/tests_e2e/test_suites/agent_not_provisioned.yml @@ -6,7 +6,7 @@ name: "AgentNotProvisioned" tests: - "agent_not_provisioned/agent_not_provisioned.py" images: "random(endorsed)" -template: "agent_not_provisioned/template.py" +template: "agent_not_provisioned/disable_agent_provisioning.py" owns_vm: true install_test_agent: false diff --git a/tests_e2e/test_suites/no_outbound_connections.yml b/tests_e2e/test_suites/no_outbound_connections.yml index 2e2e2d1a84..b256b51461 100644 --- a/tests_e2e/test_suites/no_outbound_connections.yml +++ b/tests_e2e/test_suites/no_outbound_connections.yml @@ -16,5 +16,5 @@ tests: - "agent_bvt/vm_access.py" - "no_outbound_connections/check_fallback_to_hgap.py" images: "random(endorsed)" -template: "no_outbound_connections/template.py" +template: "no_outbound_connections/deny_outbound_connections.py" owns_vm: true diff --git a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py new file mode 100755 index 0000000000..8de9e55967 --- /dev/null +++ b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Any, Dict + +from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate + + +class DenyOutboundConnections(UpdateArmTemplate): + """ + Updates the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false. + """ + def update(self, template: Dict[str, Any]) -> None: + # + # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is + # under the 'lisa' namespace. + # + # "getLinuxConfiguration": { + # "parameters": [ + # { + # "name": "keyPath", + # "type": "string" + # }, + # { + # "name": "publicKeyData", + # "type": "string" + # } + # ], + # "output": { + # "type": "object", + # "value": { + # "disablePasswordAuthentication": true, + # "ssh": { + # "publicKeys": [ + # { + # "path": "[parameters('keyPath')]", + # "keyData": "[parameters('publicKeyData')]" + # } + # ] + # }, + # "provisionVMAgent": true + # } + # } + # } + # + # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True, + # where template['functions'][i] is the 'lisa' namespace. + # + functions = template.get("functions") + if functions is None: + raise Exception('Cannot find "functions" in the LISA template.') + for namespace in functions: + name = namespace.get("namespace") + if name is None: + raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') + if name == "lisa": + members = namespace.get('members') + if members is None: + raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') + get_linux_configuration = members.get('getLinuxConfiguration') + if get_linux_configuration is None: + raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}') + output = get_linux_configuration.get('output') + if output is None: + raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}') + value = output.get('value') + if value is None: + raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}") + value['provisionVMAgent'] = False + break + else: + raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}') + diff --git a/tests_e2e/tests/agent_not_provisioned/template.py b/tests_e2e/tests/agent_not_provisioned/template.py deleted file mode 100755 index c45b69f295..0000000000 --- a/tests_e2e/tests/agent_not_provisioned/template.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 - -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Any - - -def update_arm_template(template: Any) -> None: - """ - Customizes the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false. - """ - # - # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is - # under the 'lisa' namespace. - # - # "getLinuxConfiguration": { - # "parameters": [ - # { - # "name": "keyPath", - # "type": "string" - # }, - # { - # "name": "publicKeyData", - # "type": "string" - # } - # ], - # "output": { - # "type": "object", - # "value": { - # "disablePasswordAuthentication": true, - # "ssh": { - # "publicKeys": [ - # { - # "path": "[parameters('keyPath')]", - # "keyData": "[parameters('publicKeyData')]" - # } - # ] - # }, - # "provisionVMAgent": true - # } - # } - # } - # - # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True, - # where template['functions'][i] is the 'lisa' namespace. - # - functions = template.get("functions") - if functions is None: - raise Exception('Cannot find "functions" in the LISA template.') - for namespace in functions: - name = namespace.get("namespace") - if name is None: - raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') - if name == "lisa": - members = namespace.get('members') - if members is None: - raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') - get_linux_configuration = members.get('getLinuxConfiguration') - if get_linux_configuration is None: - raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}') - output = get_linux_configuration.get('output') - if output is None: - raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}') - value = output.get('value') - if value is None: - raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}") - value['provisionVMAgent'] = False - break - else: - raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}') - diff --git a/tests_e2e/tests/lib/add_network_security_group.py b/tests_e2e/tests/lib/add_network_security_group.py new file mode 100644 index 0000000000..2c65e27f9e --- /dev/null +++ b/tests_e2e/tests/lib/add_network_security_group.py @@ -0,0 +1,150 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +import http.client + +from typing import Any, Dict, List + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry +from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate + +# Name of the security group added by this class +NETWORK_SECURITY_GROUP: str = "waagent-nsg" + + +class AddNetworkSecurityGroup(UpdateArmTemplate): + """ + Updates the ARM template to add a network security group allowing SSH access from the current machine. + """ + def update(self, template: Dict[str, Any]) -> None: + resources: List[Dict[str, Any]] = template["resources"] + + # Append the NSG to the list of resources + network_security_group = json.loads(f"""{{ + "type": "Microsoft.Network/networkSecurityGroups", + "name": "{NETWORK_SECURITY_GROUP}", + "location": "[parameters('location')]", + "apiVersion": "2020-05-01", + "properties": {{ + "securityRules": [] + }} + }}""") + resources.append(network_security_group) + + # Add the SSH rule, but if anything fails just go ahead without it + try: + network_security_group["properties"]["securityRules"].append(json.loads(f"""{{ + "name": "waagent-ssh", + "properties": {{ + "description": "Allows inbound SSH connections from the orchestrator machine.", + "protocol": "Tcp", + "sourcePortRange": "*", + "destinationPortRange": "22", + "sourceAddressPrefix": "{self._my_ip_address}", + "destinationAddressPrefix": "*", + "access": "Allow", + "priority": 100, + "direction": "Inbound" + }} + }}""")) + except Exception as e: + log.warning("******** Waagent: Failed to create Allow security rule for SSH, skipping rule: %s", e) + + + # + # Add reference to the NSG to the properties of the subnets. + # + # The subnets are a copy property of the virtual network in LISA's ARM template: + # + # { + # "condition": "[empty(parameters('virtual_network_resource_group'))]", + # "apiVersion": "2020-05-01", + # "type": "Microsoft.Network/virtualNetworks", + # "name": "[parameters('virtual_network_name')]", + # "location": "[parameters('location')]", + # "properties": { + # "addressSpace": { + # "addressPrefixes": [ + # "10.0.0.0/16" + # ] + # }, + # "copy": [ + # { + # "name": "subnets", + # "count": "[parameters('subnet_count')]", + # "input": { + # "name": "[concat(parameters('subnet_prefix'), copyIndex('subnets'))]", + # "properties": { + # "addressPrefix": "[concat('10.0.', copyIndex('subnets'), '.0/24')]" + # } + # } + # } + # ] + # } + # } + # + network_resource = self._get_resource(resources, "Microsoft.Network/virtualNetworks") + + # Add a dependency on the NSG + nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" + network_resource_dependencies = network_resource.get("dependsOn") + if network_resource_dependencies is None: + network_resource["dependsOn"] = [nsg_reference] + else: + network_resource_dependencies.append(nsg_reference) + + subnets_copy = network_resource["properties"].get("copy") if network_resource.get("properties") is not None else None + if subnets_copy is None: + raise Exception("Cannot find the copy property of the virtual network in the ARM template") + + subnets = [i for i in subnets_copy if "name" in i and i["name"] == 'subnets'] + if len(subnets) == 0: + raise Exception("Cannot find the subnets of the virtual network in the ARM template") + + subnets_input = subnets[0].get("input") + if subnets_input is None: + raise Exception("Cannot find the input property of the subnets in the ARM template") + + nsg_reference = json.loads(f"""{{ + "networkSecurityGroup": {{ + "id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" + }} + }}""") + + subnets_properties = subnets_input.get("properties") + if subnets_properties is None: + subnets_input["properties"] = nsg_reference + else: + subnets_properties.update(nsg_reference) + + @property + def _my_ip_address(self) -> str: + """ + Gets the IP address of the current machine. + """ + if self.__my_ip_address is None: + def get_my_address(): + connection = http.client.HTTPSConnection("ifconfig.io") + connection.request("GET", "/ip") + response = connection.getresponse() + return response.read().decode().strip() + self.__my_ip_address = retry(get_my_address, attempts=3, delay=10) + return self.__my_ip_address + + __my_ip_address: str = None diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 79a209ba44..eec6e08a0b 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -57,21 +57,37 @@ def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: in time.sleep(attempt_delay) -def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool: +def retry_if_false(operation: Callable[[], bool], attempts: int = 5, delay: int = 30) -> bool: """ This method attempts the given operation retrying a few times (after a short delay) Note: Method used for operations which are return True or False """ - found: bool = False - while attempts > 0 and not found: + success: bool = False + while attempts > 0 and not success: attempts -= 1 try: - found = operation() - except Exception: + success = operation() + except Exception as e: + log.warning("Error in operation: %s", e) + if attempts == 0: + raise + if not success: + log.info("Current operation failed, retrying in %s secs.", delay) + time.sleep(delay) + return success + + +def retry(operation: Callable[[], Any], attempts: int = 5, delay: int = 30) -> Any: + """ + This method attempts the given operation retrying a few times on exceptions. Returns the value returned by the operation. + """ + while attempts > 0: + attempts -= 1 + try: + return operation() + except Exception as e: if attempts == 0: raise - if not found: - log.info(f"Current execution didn't find it, retrying in {duration} secs.") - time.sleep(duration) - return found + log.warning("Error in operation, retrying in %s secs: %s", delay, e) + time.sleep(delay) diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py new file mode 100644 index 0000000000..9637525f35 --- /dev/null +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -0,0 +1,53 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from abc import ABC, abstractmethod +from typing import Any, Dict, List + + +class UpdateArmTemplate(ABC): + + @abstractmethod + def update(self, template: Dict[str, Any]) -> None: + """ + Derived classes implement this method to customize the ARM template used to create the test VMs. The 'template' parameter is a dictionary + created from the template's JSON document, as parsed by json.loads(). + + The original JSON document is currently at https://github.com/microsoft/lisa/blob/main/lisa/sut_orchestrator/azure/arm_template.json + """ + + @staticmethod + def _get_resource(resources: List[Dict[str, Any]], type_name: str) -> Any: + """ + Returns the first resource of the specified type in the given 'resources' list. + """ + for item in resources: + if item["type"] == type_name: + return item + raise Exception(f"Cannot find a resource of type {type_name} in the ARM template") + + @staticmethod + def _get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, type_name: str) -> Any: + """ + Returns the first resource of the specified type and name in the given 'resources' list. + """ + for item in resources: + if item["type"] == type_name and item["name"] == resource_name: + return item + raise Exception(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") + + diff --git a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py new file mode 100755 index 0000000000..114999d5b4 --- /dev/null +++ b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json + +from typing import Any, Dict + +from tests_e2e.tests.lib.add_network_security_group import NETWORK_SECURITY_GROUP +from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate + + +class DenyOutboundConnections(UpdateArmTemplate): + """ + Updates the ARM template to add a security rule that denies all outbound connections. + """ + def update(self, template: Dict[str, Any]) -> None: + resources = template["resources"] + nsg = self._get_resource_by_name(resources, NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups") + properties = nsg.get("properties") + + if properties is None: + raise Exception("Cannot find the properties of the Network Security Group in the ARM template") + + security_rules = properties.get("securityRules") + if security_rules is None: + raise Exception("Cannot find the security rules of the Network Security Group in the ARM template") + + security_rules.append(json.loads("""{ + "name": "waagent-no-outbound", + "properties": { + "description": "Denies all outbound connections.", + "protocol": "*", + "sourcePortRange": "*", + "destinationPortRange": "*", + "sourceAddressPrefix": "*", + "destinationAddressPrefix": "Internet", + "access": "Deny", + "priority": 200, + "direction": "Outbound" + } + }""")) + diff --git a/tests_e2e/tests/no_outbound_connections/template.py b/tests_e2e/tests/no_outbound_connections/template.py deleted file mode 100755 index 8a7421f00e..0000000000 --- a/tests_e2e/tests/no_outbound_connections/template.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python3 - -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from typing import Any - - -def update_arm_template(template: Any) -> None: - """ - Updates the ARM template to add a network security group that denies all outbound connections. - """ - resources = template["resources"] - - # Append the NSG to the list of resources - resources.append(json.loads(""" - { - "type": "Microsoft.Network/networkSecurityGroups", - "name": "no-outbound-connections", - "location": "[parameters('location')]", - "apiVersion": "2020-05-01", - "properties": { - "securityRules": [ - { - "name": "ssh_rule", - "properties": { - "description": "Allows inbound SSH connections.", - "protocol": "Tcp", - "sourcePortRange": "*", - "destinationPortRange": "22", - "sourceAddressPrefix": "*", - "destinationAddressPrefix": "*", - "access": "Allow", - "priority": 110, - "direction": "Inbound" - } - }, - { - "name": "outbound_rule", - "properties": { - "description": "Denies all outbound connections.", - "protocol": "*", - "sourcePortRange": "*", - "destinationPortRange": "*", - "sourceAddressPrefix": "*", - "destinationAddressPrefix": "Internet", - "access": "Deny", - "priority": 200, - "direction": "Outbound" - } - } - ] - } - } - """)) - - # Add a dependency of the deployment on the NSG - deployment_resource = _get_resource(resources, "Microsoft.Resources/deployments") - deployment_resource["dependsOn"].append("[resourceId('Microsoft.Network/networkSecurityGroups', 'no-outbound-connections')]") - - # Add reference to the NSG to the properties of the network interface - template_resources = deployment_resource["properties"]["template"]["resources"] - network_interface_resource = _get_resource(template_resources, "Microsoft.Network/networkInterfaces") - network_interface_resource["properties"].update(json.loads( - """ - { - "networkSecurityGroup": { - "id": "[resourceId('Microsoft.Network/networkSecurityGroups', 'no-outbound-connections')]" - } - } - """)) - - -def _get_resource(resources: Any, type_name: str) -> Any: - for item in resources: - if item["type"] == type_name: - return item - raise Exception(f"Cannot find a resource of type {type_name} in the ARM template") - - From bde5b4069dc7aec5c12c8e96cff90260b363566f Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:21:04 -0700 Subject: [PATCH 049/240] don't allow downgrades for self-update (#2881) * don't allow downgrades for self-update * address comments * update comment * add logger --- azurelinuxagent/ga/agent_update_handler.py | 18 ++++++++++++++++++ tests/ga/test_agent_update_handler.py | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 47a8fa27dd..212ca1f945 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -58,7 +58,12 @@ def __should_update_agent(self, requested_version): largest version update(self-update): update is allowed once per (as specified in the conf.get_hotfix_upgrade_frequency() or conf.get_normal_upgrade_frequency()) return false when we don't allow updates. + Note: Downgrades are not allowed for self-update. """ + + if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): + return False + now = datetime.datetime.now() if self._is_requested_version_update: @@ -261,6 +266,19 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] + def __check_if_downgrade_is_requested_and_allowed(self, requested_version): + """ + Don't allow downgrades for self-update version + Note: The intention of this check is to keep the original behavior of self-update as it is. + """ + if not self._is_requested_version_update: + if requested_version < CURRENT_VERSION: + msg = "Downgrade requested in the GoalState, but downgrades are not supported for self-update version:{0}, " \ + "skipping agent update".format(requested_version) + self.__log_event(LogLevel.INFO, msg) + return False + return True + @staticmethod def __log_event(level, msg, success=True): if level == LogLevel.INFO: diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 62dfd6488d..35ec714264 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -148,6 +148,14 @@ def test_it_should_update_to_largest_version_if_time_window_elapsed(self): self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + def test_it_should_not_allow_update_if_largest_version_below_current_version(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ga_manifest"] = "wire/ga_manifest_no_upgrade.xml" + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() From 0ce136b4e8ab63d99ba10122cf82f21dd9d95562 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 27 Jul 2023 15:08:53 -0700 Subject: [PATCH 050/240] Supress telemetry failures from check agent log (#2887) Co-authored-by: narrieta --- tests_e2e/tests/lib/agent_log.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 452c5552a0..1926c0d4db 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -77,6 +77,9 @@ def timestamp(self) -> datetime: # Logs from agent follow this format: 2023-07-10T20:50:13.038599Z return datetime.strptime(self.when, u'%Y-%m-%dT%H:%M:%S.%fZ') + def __str__(self): + return self.text + class AgentLog(object): """ @@ -304,12 +307,21 @@ def get_errors(self) -> List[AgentLogRecord]: 'message': r"SendHostPluginHeartbeat:.*ResourceGoneError.*410", 'if': lambda r: r.level == "WARNING" and self._increment_counter("SendHostPluginHeartbeat-ResourceGoneError-410") < 2 # ignore unless there are 2 or more instances }, + # # 2023-01-18T02:58:25.589492Z ERROR SendTelemetryHandler ExtHandler Event: name=WALinuxAgent, op=ReportEventErrors, message=DroppedEventsCount: 1 # Reasons (first 5 errors): [ProtocolError] [Wireserver Exception] [ProtocolError] [Wireserver Failed] URI http://168.63.129.16/machine?comp=telemetrydata [HTTP Failed] Status Code 400: Traceback (most recent call last): # { - 'message': r"(?s)SendTelemetryHandler.*http://168.63.129.16/machine\?comp=telemetrydata.*Status Code 400", - 'if': lambda _: self._increment_counter("SendTelemetryHandler-telemetrydata-Status Code 400") < 2 # ignore unless there are 2 or more instances + 'message': r"(?s)\[ProtocolError\].*http://168.63.129.16/machine\?comp=telemetrydata.*Status Code 400", + 'if': lambda r: r.thread == 'SendTelemetryHandler' and self._increment_counter("SendTelemetryHandler-telemetrydata-Status Code 400") < 2 # ignore unless there are 2 or more instances + }, + # + # 2023-07-26T22:05:42.841692Z ERROR SendTelemetryHandler ExtHandler Event: name=WALinuxAgent, op=ReportEventErrors, message=DroppedEventsCount: 1 + # Reasons (first 5 errors): [ProtocolError] Failed to send events:[ResourceGoneError] [HTTP Failed] [410: Gone] b'\n\n ResourceNotAvailable\n The resource requested is no longer available. Please refresh your cache.\n
\n
': Traceback (most recent call last): + # + { + 'message': r"(?s)\[ProtocolError\].*Failed to send events.*\[410: Gone\]", + 'if': lambda r: r.thread == 'SendTelemetryHandler' and self._increment_counter("SendTelemetryHandler-telemetrydata-Status Code 410") < 2 # ignore unless there are 2 or more instances }, # # Ignore these errors in flatcar: From b837c30a73446748ee20d52f930c59423c54a0e0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 27 Jul 2023 15:46:12 -0700 Subject: [PATCH 051/240] Install assertpy on test VMs (#2886) * Install assertpy on test VMs * set versions --------- Co-authored-by: narrieta --- tests_e2e/orchestrator/docker/Dockerfile | 23 ++++++-- .../orchestrator/lib/agent_test_suite.py | 4 +- tests_e2e/orchestrator/scripts/prepare-pypy | 56 +++++++++++++++++++ 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100755 tests_e2e/orchestrator/scripts/prepare-pypy diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index 2d709c7913..bbd460e6e7 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -68,11 +68,16 @@ RUN \ export PATH="$HOME/.local/bin:$PATH" && \ \ # \ - # Install LISA \ + # Install LISA. \ + # \ + # (note that we use a specific commit, which is the version of LISA that has been verified to work with our \ + # tests; when taking a new LISA version, make sure to verify that the tests work OK before pushing the \ + # Docker image to our registry) \ # \ cd $HOME && \ git clone https://github.com/microsoft/lisa.git && \ cd lisa && \ + git checkout a030c5e6a0695db77dbf5bd52a45d07cbbf00087 && \ \ python3 -m pip install --upgrade pip && \ python3 -m pip install --editable .[azure,libvirt] --config-settings editable_mode=compat && \ @@ -80,15 +85,23 @@ RUN \ # \ # Install additional test dependencies \ # \ + # (note that we update azure-mgmt-compute to 29.1.0 - LISA installs 26.1; this is needed in order to access \ + # osProfile.linuxConfiguration.enableVMAgentPlatformUpdates in the VM model - that property is used by some \ + # tests, such as Agent versioning) \ + # \ python3 -m pip install distro msrestazure pytz && \ - python3 -m pip install azure-mgmt-compute --upgrade && \ + python3 -m pip install azure-mgmt-compute==29.1.0 --upgrade && \ \ # \ # Download Pypy to a known location, from which it will be installed to the test VMs. \ # \ - mkdir $HOME/bin && \ - wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2 -O /tmp/pypy3.7-x64.tar.bz2 && \ - wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2 -O /tmp/pypy3.7-arm64.tar.bz2 && \ + wget https://dcrdata.blob.core.windows.net/python/pypy3.7-x64.tar.bz2 -O /tmp/pypy3.7-x64.tar.bz2 && \ + wget https://dcrdata.blob.core.windows.net/python/pypy3.7-arm64.tar.bz2 -O /tmp/pypy3.7-arm64.tar.bz2 && \ + \ + # \ + # Install pudb, which can be useful to debug issues in the image \ + # \ + python3 -m pip install pudb && \ \ # \ # The setup for the tests depends on a few paths; add those to the profile \ diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 06a5acde42..6f9eda020f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -294,10 +294,10 @@ def _setup_node(self, install_test_agent: bool) -> None: # if self.context.ssh_client.get_architecture() == "aarch64": pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2") - pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2" + pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-arm64.tar.bz2" else: pypy_path = Path("/tmp/pypy3.7-x64.tar.bz2") - pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2" + pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-x64.tar.bz2" if pypy_path.exists(): log.info("Found Pypy at %s", pypy_path) else: diff --git a/tests_e2e/orchestrator/scripts/prepare-pypy b/tests_e2e/orchestrator/scripts/prepare-pypy new file mode 100755 index 0000000000..fe469c914e --- /dev/null +++ b/tests_e2e/orchestrator/scripts/prepare-pypy @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This script is used to prepare a tarball containing Pypy with the assert-py module pre-installed. +# It needs to be run on x64 and arm64 VMs and the resulting tarballs need to be uploaded to storage, +# from where they are downloaded and installed to the test VMs (see wiki for detail). +# + +set -euo pipefail + +cd /tmp +rm -rf pypy3.7-* + +arch=$(uname -m) +printf "Preparing Pypy for architecture %s...\n" $arch + +printf "\n*** Downloading Pypy...\n" +if [[ $arch == "aarch64" ]]; then + tarball="pypy3.7-arm64.tar.bz2" + wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2 -O $tarball +else + tarball="pypy3.7-x64.tar.bz2" + wget https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2 -O $tarball +fi + +printf "\n*** Installing assertpy...\n" +tar xf $tarball +./pypy3.7-v7.3.5-*/bin/pypy -m ensurepip +./pypy3.7-v7.3.5-*/bin/pypy -mpip install assertpy + +printf "\n*** Creating new tarball for Pypy...\n" +# remove the cache files created when Pypy, and set the owner to 0/0, in order to match the original tarball +find pypy3.7-v7.3.5-* -name '*.pyc' -exec rm {} \; +mv -v $tarball "$tarball.original" +tar cf $tarball --bzip2 --owner 0:0 --group 0:0 pypy3.7-v7.3.5-* +rm -rf pypy3.7-v7.3.5-* + +printf "\nPypy is ready at %s\n" "$(pwd)/$tarball" + From 09ffc8f1f035520cd7920646a1dc830d758e502b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 28 Jul 2023 12:48:10 -0700 Subject: [PATCH 052/240] Add sample remote tests (#2888) * Add sample remote tests * add pass * review feedback --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 20 ++++++-- tests_e2e/test_suites/fail.yml | 8 ++-- tests_e2e/test_suites/pass.yml | 3 +- tests_e2e/tests/lib/agent_test.py | 30 ++++++++++++ tests_e2e/tests/lib/logging.py | 4 +- tests_e2e/tests/lib/remote_test.py | 48 +++++++++++++++++++ tests_e2e/tests/samples/error_remote_test.py | 32 +++++++++++++ tests_e2e/tests/{ => samples}/error_test.py | 2 +- tests_e2e/tests/samples/fail_remote_test.py | 32 +++++++++++++ tests_e2e/tests/{ => samples}/fail_test.py | 2 +- tests_e2e/tests/samples/pass_remote_test.py | 32 +++++++++++++ tests_e2e/tests/{ => samples}/pass_test.py | 0 .../scripts/samples-error_remote_test.py | 36 ++++++++++++++ .../tests/scripts/samples-fail_remote_test.py | 37 ++++++++++++++ .../tests/scripts/samples-pass_remote_test.py | 36 ++++++++++++++ 15 files changed, 311 insertions(+), 11 deletions(-) create mode 100644 tests_e2e/tests/lib/remote_test.py create mode 100755 tests_e2e/tests/samples/error_remote_test.py rename tests_e2e/tests/{ => samples}/error_test.py (92%) create mode 100755 tests_e2e/tests/samples/fail_remote_test.py rename tests_e2e/tests/{ => samples}/fail_test.py (96%) create mode 100755 tests_e2e/tests/samples/pass_remote_test.py rename tests_e2e/tests/{ => samples}/pass_test.py (100%) create mode 100755 tests_e2e/tests/scripts/samples-error_remote_test.py create mode 100755 tests_e2e/tests/scripts/samples-fail_remote_test.py create mode 100755 tests_e2e/tests/scripts/samples-pass_remote_test.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 6f9eda020f..49e104d8db 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -46,7 +46,7 @@ from azurelinuxagent.common.version import AGENT_VERSION from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo from tests_e2e.tests.lib.agent_log import AgentLog -from tests_e2e.tests.lib.agent_test import TestSkipped +from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError from tests_e2e.tests.lib.agent_test_context import AgentTestContext from tests_e2e.tests.lib.identifiers import VmIdentifier from tests_e2e.tests.lib.logging import log @@ -531,17 +531,29 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: TestStatus.FAILED, test_start_time, message=str(e)) + except RemoteTestError as e: + test_success = False + summary.append(f"[Failed] {test.name}") + message = f"UNEXPECTED ERROR IN [{e.command}] {e.stderr}\n{e.stdout}" + log.error("******** [Failed] %s: %s", test.name, message) + self.context.lisa_log.error("******** [Failed] %s", test_full_name) + self._report_test_result( + suite_full_name, + test.name, + TestStatus.FAILED, + test_start_time, + message=str(message)) except: # pylint: disable=bare-except test_success = False summary.append(f"[Error] {test.name}") - log.exception("UNHANDLED EXCEPTION IN %s", test.name) - self.context.lisa_log.exception("UNHANDLED EXCEPTION IN %s", test_full_name) + log.exception("UNEXPECTED ERROR IN %s", test.name) + self.context.lisa_log.exception("UNEXPECTED ERROR IN %s", test_full_name) self._report_test_result( suite_full_name, test.name, TestStatus.FAILED, test_start_time, - message="Unhandled exception.", + message="Unexpected error.", add_exception_stack_trace=True) log.info("") diff --git a/tests_e2e/test_suites/fail.yml b/tests_e2e/test_suites/fail.yml index 6cd3b01aff..ae38db062a 100644 --- a/tests_e2e/test_suites/fail.yml +++ b/tests_e2e/test_suites/fail.yml @@ -1,5 +1,7 @@ name: "Fail" tests: - - "fail_test.py" - - "error_test.py" -images: "ubuntu_1804" + - "samples/fail_test.py" + - "samples/fail_remote_test.py" + - "samples/error_test.py" + - "samples/error_remote_test.py" +images: "ubuntu_2004" diff --git a/tests_e2e/test_suites/pass.yml b/tests_e2e/test_suites/pass.yml index 40b0e60b46..b80db63f56 100644 --- a/tests_e2e/test_suites/pass.yml +++ b/tests_e2e/test_suites/pass.yml @@ -1,4 +1,5 @@ name: "Pass" tests: - - "pass_test.py" + - "samples/pass_test.py" + - "samples/pass_remote_test.py" images: "ubuntu_2004" diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index 22f865a6f3..2eac007afd 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -20,10 +20,14 @@ import sys from abc import ABC, abstractmethod +from assertpy import fail from typing import Any, Dict, List from tests_e2e.tests.lib.agent_test_context import AgentTestContext from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import FAIL_EXIT_CODE +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import ATTEMPTS, ATTEMPT_DELAY, SshClient class TestSkipped(Exception): @@ -33,6 +37,12 @@ class TestSkipped(Exception): """ +class RemoteTestError(CommandError): + """ + Raised when a remote test fails with an unexpected error. + """ + + class AgentTest(ABC): """ Defines the interface for agent tests, which are simply constructed from an AgentTestContext and expose a single method, @@ -59,8 +69,28 @@ def run_from_command_line(cls): cls(AgentTestContext.from_args()).run() except SystemExit: # Bad arguments pass + except AssertionError as e: + log.error("%s", e) + sys.exit(1) except: # pylint: disable=bare-except log.exception("Test failed") sys.exit(1) sys.exit(0) + + def _run_remote_test(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: + """ + Derived classes can use this method to execute a remote test (a test that runs over SSH). + """ + try: + ssh_client: SshClient = self._context.create_ssh_client() + output = ssh_client.run_command(command=command, use_sudo=use_sudo, attempts=attempts, attempt_delay=attempt_delay) + log.info("*** PASSED: [%s]\n%s", command, self._indent(output)) + except CommandError as error: + if error.exit_code == FAIL_EXIT_CODE: + fail(f"[{command}] {error.stderr}{self._indent(error.stdout)}") + raise RemoteTestError(command=error.command, exit_code=error.exit_code, stdout=self._indent(error.stdout), stderr=error.stderr) + + @staticmethod + def _indent(text: str, indent: str = " " * 8): + return "\n".join(f"{indent}{line}" for line in text.splitlines()) diff --git a/tests_e2e/tests/lib/logging.py b/tests_e2e/tests/lib/logging.py index ff636b63de..a6cf6566b1 100644 --- a/tests_e2e/tests/lib/logging.py +++ b/tests_e2e/tests/lib/logging.py @@ -20,6 +20,8 @@ # for logging. # import contextlib +import sys + from logging import FileHandler, Formatter, Handler, Logger, StreamHandler, INFO from pathlib import Path from threading import current_thread @@ -46,7 +48,7 @@ class _AgentLoggingHandler(Handler): def __init__(self): super().__init__() self.formatter: Formatter = Formatter('%(asctime)s.%(msecs)03d [%(levelname)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%SZ") - self.default_handler = StreamHandler() + self.default_handler = StreamHandler(sys.stdout) self.default_handler.setFormatter(self.formatter) self.per_thread_handlers: Dict[int, FileHandler] = {} diff --git a/tests_e2e/tests/lib/remote_test.py b/tests_e2e/tests/lib/remote_test.py new file mode 100644 index 0000000000..c5bf979f01 --- /dev/null +++ b/tests_e2e/tests/lib/remote_test.py @@ -0,0 +1,48 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import sys + +from typing import Callable + +from tests_e2e.tests.lib.logging import log + +SUCCESS_EXIT_CODE = 0 +FAIL_EXIT_CODE = 100 +ERROR_EXIT_CODE = 200 + + +def run_remote_test(test_method: Callable[[], int]) -> None: + """ + Helper function to run a remote test; implements coding conventions for remote tests, e.g. error message goes + to stderr, test log goes to stdout, etc. + """ + try: + test_method() + log.info("*** PASSED") + except AssertionError as e: + print(f"{e}", file=sys.stderr) + log.error("%s", e) + sys.exit(FAIL_EXIT_CODE) + except Exception as e: + print(f"UNEXPECTED ERROR: {e}", file=sys.stderr) + log.exception("*** UNEXPECTED ERROR") + sys.exit(ERROR_EXIT_CODE) + + sys.exit(SUCCESS_EXIT_CODE) + diff --git a/tests_e2e/tests/samples/error_remote_test.py b/tests_e2e/tests/samples/error_remote_test.py new file mode 100755 index 0000000000..29612f4246 --- /dev/null +++ b/tests_e2e/tests/samples/error_remote_test.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from tests_e2e.tests.lib.agent_test import AgentTest + + +class ErrorRemoteTest(AgentTest): + """ + A trivial remote test that fails + """ + def run(self): + self._run_remote_test("samples-error_remote_test.py") + + +if __name__ == "__main__": + ErrorRemoteTest.run_from_command_line() diff --git a/tests_e2e/tests/error_test.py b/tests_e2e/tests/samples/error_test.py similarity index 92% rename from tests_e2e/tests/error_test.py rename to tests_e2e/tests/samples/error_test.py index cf369f7d39..4c24080687 100755 --- a/tests_e2e/tests/error_test.py +++ b/tests_e2e/tests/samples/error_test.py @@ -25,7 +25,7 @@ class ErrorTest(AgentTest): A trivial test that errors out """ def run(self): - raise Exception("* ERROR *") + raise Exception("* TEST ERROR *") # simulate an unexpected error if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/fail_remote_test.py b/tests_e2e/tests/samples/fail_remote_test.py new file mode 100755 index 0000000000..f0a50495ad --- /dev/null +++ b/tests_e2e/tests/samples/fail_remote_test.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from tests_e2e.tests.lib.agent_test import AgentTest + + +class FailRemoteTest(AgentTest): + """ + A trivial remote test that fails + """ + def run(self): + self._run_remote_test("samples-fail_remote_test.py") + + +if __name__ == "__main__": + FailRemoteTest.run_from_command_line() diff --git a/tests_e2e/tests/fail_test.py b/tests_e2e/tests/samples/fail_test.py similarity index 96% rename from tests_e2e/tests/fail_test.py rename to tests_e2e/tests/samples/fail_test.py index e96b5bcf7e..fcebd99183 100755 --- a/tests_e2e/tests/fail_test.py +++ b/tests_e2e/tests/samples/fail_test.py @@ -26,7 +26,7 @@ class FailTest(AgentTest): A trivial test that fails """ def run(self): - fail("* FAILED *") + fail("* TEST FAILED *") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/pass_remote_test.py b/tests_e2e/tests/samples/pass_remote_test.py new file mode 100755 index 0000000000..94e0cb604c --- /dev/null +++ b/tests_e2e/tests/samples/pass_remote_test.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from tests_e2e.tests.lib.agent_test import AgentTest + + +class PassRemoteTest(AgentTest): + """ + A trivial remote test that succeeds + """ + def run(self): + self._run_remote_test("samples-pass_remote_test.py") + + +if __name__ == "__main__": + PassRemoteTest.run_from_command_line() diff --git a/tests_e2e/tests/pass_test.py b/tests_e2e/tests/samples/pass_test.py similarity index 100% rename from tests_e2e/tests/pass_test.py rename to tests_e2e/tests/samples/pass_test.py diff --git a/tests_e2e/tests/scripts/samples-error_remote_test.py b/tests_e2e/tests/scripts/samples-error_remote_test.py new file mode 100755 index 0000000000..fd7c3810f4 --- /dev/null +++ b/tests_e2e/tests/scripts/samples-error_remote_test.py @@ -0,0 +1,36 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# A sample remote test that simulates an unexpected error +# + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test + + +def main(): + log.info("Setting up test") + log.info("Doing some operation") + log.warning("Something went wrong, but the test can continue") + log.info("Doing some other operation") + raise Exception("Something went wrong") # simulate an unexpected error + + +run_remote_test(main) diff --git a/tests_e2e/tests/scripts/samples-fail_remote_test.py b/tests_e2e/tests/scripts/samples-fail_remote_test.py new file mode 100755 index 0000000000..2e2cbae691 --- /dev/null +++ b/tests_e2e/tests/scripts/samples-fail_remote_test.py @@ -0,0 +1,37 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# A sample remote test that fails +# + +from assertpy import fail +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test + + +def main(): + log.info("Setting up test") + log.info("Doing some operation") + log.warning("Something went wrong, but the test can continue") + log.info("Doing some other operation") + fail("Verification of the operation failed") + + +run_remote_test(main) diff --git a/tests_e2e/tests/scripts/samples-pass_remote_test.py b/tests_e2e/tests/scripts/samples-pass_remote_test.py new file mode 100755 index 0000000000..1c65f53326 --- /dev/null +++ b/tests_e2e/tests/scripts/samples-pass_remote_test.py @@ -0,0 +1,36 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# A sample remote test that passes +# + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test + + +def main(): + log.info("Setting up test") + log.info("Doing some operation") + log.warning("Something went wrong, but the test can continue") + log.info("Doing some other operation") + log.info("All verifications succeeded") + + +run_remote_test(main) From 0e99ee12775486a6867ef1093dd50546e00b19f2 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 8 Aug 2023 15:07:24 -0700 Subject: [PATCH 053/240] Enable Extensions.Enabled in tests (#2892) * enable Extensions.Enabled * address comment * address comment * use script * improve msg * improve msg --- tests_e2e/orchestrator/scripts/install-agent | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 00200f3a11..7c80f6caf6 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -116,6 +116,20 @@ sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" # By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. # But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path" +# Logging and exiting tests if Extensions.Enabled flag is disabled for other distros than debian +if grep -q "Extensions.Enabled=n" $waagent_conf_path; then + pypy_get_distro=$(pypy3 -c 'from azurelinuxagent.common.version import get_distro; print(get_distro())') + python_get_distro=$($python -c 'from azurelinuxagent.common.version import get_distro; print(get_distro())') + # As we know debian distros disable extensions by default, so we need to enable them to verify agent extension scenarios + # If rest of the distros disable extensions, then we exit the test setup to fail the test. + if [[ $pypy_get_distro == *"debian"* ]] || [[ $python_get_distro == *"debian"* ]]; then + echo "Extensions.Enabled flag is disabled and this is expected in debian distro, so enabling it" + update-waagent-conf Extensions.Enabled=y + else + echo "Extensions.Enabled flag is disabled which is unexpected in this distro, so exiting test setup to fail the test" + exit 1 + fi +fi # # Restart the service From 4a1d3de2ee091d508e91521053d90d915ac8f5e4 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 11 Aug 2023 07:04:41 -0700 Subject: [PATCH 054/240] Reorganize file structure of unit tests (#2894) * Reorganize file structure of unit tests * remove duplicate * add init * mocks --------- Co-authored-by: narrieta --- tests/common/dhcp/test_dhcp.py | 2 +- tests/common/osutil/test_alpine.py | 2 +- tests/common/osutil/test_arch.py | 2 +- tests/common/osutil/test_bigip.py | 2 +- tests/common/osutil/test_clearlinux.py | 2 +- tests/common/osutil/test_coreos.py | 2 +- tests/common/osutil/test_default.py | 4 +- tests/common/osutil/test_default_osutil.py | 2 +- tests/common/osutil/test_factory.py | 2 +- tests/common/osutil/test_freebsd.py | 2 +- tests/common/osutil/test_nsbsd.py | 2 +- tests/common/osutil/test_openbsd.py | 2 +- tests/common/osutil/test_openwrt.py | 2 +- tests/common/osutil/test_photonos.py | 2 +- tests/common/osutil/test_redhat.py | 2 +- tests/common/osutil/test_suse.py | 2 +- tests/common/osutil/test_ubuntu.py | 2 +- tests/{distro => common/protocol}/__init__.py | 0 .../protocol/test_datacontract.py | 0 ...sions_goal_state_from_extensions_config.py | 18 +-- ..._extensions_goal_state_from_vm_settings.py | 20 +-- .../{ => common}/protocol/test_goal_state.py | 46 +++--- .../protocol/test_healthservice.py | 4 +- .../{ => common}/protocol/test_hostplugin.py | 20 +-- .../protocol/test_image_info_matcher.py | 0 tests/{ => common}/protocol/test_imds.py | 4 +- .../test_metadata_server_migration_util.py | 2 +- .../protocol/test_protocol_util.py | 2 +- tests/{ => common}/protocol/test_wire.py | 86 +++++----- tests/common/test_agent_supported_feature.py | 2 +- tests/common/test_cgroupapi.py | 6 +- tests/common/test_cgroupconfigurator.py | 8 +- tests/common/test_cgroups.py | 2 +- tests/common/test_cgroupstelemetry.py | 2 +- tests/common/test_conf.py | 2 +- tests/common/test_errorstate.py | 2 +- tests/common/test_event.py | 16 +- tests/common/test_logcollector.py | 2 +- tests/common/test_logger.py | 2 +- tests/common/test_persist_firewall_rules.py | 2 +- tests/common/test_singletonperthread.py | 2 +- tests/common/test_telemetryevent.py | 2 +- tests/common/test_version.py | 2 +- tests/{protocol => common/utils}/__init__.py | 0 tests/{ => common}/utils/test_archive.py | 2 +- tests/{ => common}/utils/test_crypt_util.py | 2 +- .../utils/test_extension_process_util.py | 2 +- tests/{ => common}/utils/test_file_util.py | 2 +- .../utils/test_flexible_version.py | 0 tests/{ => common}/utils/test_network_util.py | 2 +- tests/{ => common}/utils/test_passwords.txt | 0 tests/{ => common}/utils/test_rest_util.py | 2 +- tests/{ => common}/utils/test_shell_util.py | 4 +- tests/{ => common}/utils/test_text_util.py | 2 +- tests/daemon/test_daemon.py | 2 +- tests/daemon/test_resourcedisk.py | 122 ++++++++++++++- tests/{distro => daemon}/test_scvmm.py | 2 +- tests/distro/test_resourceDisk.py | 148 ------------------ tests/ga/test_agent_update_handler.py | 8 +- tests/ga/test_collect_logs.py | 8 +- tests/ga/test_collect_telemetry_events.py | 4 +- tests/ga/test_env.py | 2 +- tests/ga/test_extension.py | 138 ++++++++-------- tests/ga/test_exthandlers.py | 8 +- .../ga/test_exthandlers_download_extension.py | 8 +- .../ga/test_exthandlers_exthandlerinstance.py | 2 +- tests/ga/test_guestagent.py | 12 +- tests/ga/test_monitor.py | 8 +- tests/ga/test_multi_config_extension.py | 10 +- tests/ga/test_periodic_operation.py | 2 +- tests/ga/test_remoteaccess.py | 10 +- tests/ga/test_remoteaccess_handler.py | 6 +- tests/ga/test_report_status.py | 18 +-- tests/ga/test_send_telemetry_events.py | 10 +- tests/ga/test_update.py | 48 +++--- tests/{utils => lib}/__init__.py | 0 tests/{utils => lib}/cgroups_tools.py | 0 tests/{utils => lib}/event_logger_tools.py | 10 +- tests/{ga => lib}/extension_emulator.py | 10 +- .../http_request_predicates.py} | 0 tests/{utils => lib}/miscellaneous_tools.py | 0 .../mock_cgroup_environment.py | 5 +- tests/{common => lib}/mock_command.py | 0 tests/{common => lib}/mock_environment.py | 2 +- .../mocks.py => lib/mock_update_handler.py} | 2 +- .../mocks.py => lib/mock_wire_protocol.py} | 10 +- tests/{ => lib}/tools.py | 4 +- .../wire_protocol_data.py} | 4 +- tests/pa/test_deprovision.py | 2 +- tests/pa/test_provision.py | 2 +- tests/test_agent.py | 2 +- 91 files changed, 456 insertions(+), 481 deletions(-) rename tests/{distro => common/protocol}/__init__.py (100%) rename tests/{ => common}/protocol/test_datacontract.py (100%) rename tests/{ => common}/protocol/test_extensions_goal_state_from_extensions_config.py (85%) rename tests/{ => common}/protocol/test_extensions_goal_state_from_vm_settings.py (93%) rename tests/{ => common}/protocol/test_goal_state.py (93%) rename tests/{ => common}/protocol/test_healthservice.py (99%) rename tests/{ => common}/protocol/test_hostplugin.py (98%) rename tests/{ => common}/protocol/test_image_info_matcher.py (100%) rename tests/{ => common}/protocol/test_imds.py (99%) rename tests/{ => common}/protocol/test_metadata_server_migration_util.py (99%) rename tests/{ => common}/protocol/test_protocol_util.py (99%) rename tests/{ => common}/protocol/test_wire.py (94%) rename tests/{protocol => common/utils}/__init__.py (100%) rename tests/{ => common}/utils/test_archive.py (99%) rename tests/{ => common}/utils/test_crypt_util.py (96%) rename tests/{ => common}/utils/test_extension_process_util.py (99%) rename tests/{ => common}/utils/test_file_util.py (99%) rename tests/{ => common}/utils/test_flexible_version.py (100%) rename tests/{ => common}/utils/test_network_util.py (99%) rename tests/{ => common}/utils/test_passwords.txt (100%) rename tests/{ => common}/utils/test_rest_util.py (99%) rename tests/{ => common}/utils/test_shell_util.py (99%) rename tests/{ => common}/utils/test_text_util.py (99%) rename tests/{distro => daemon}/test_scvmm.py (98%) delete mode 100644 tests/distro/test_resourceDisk.py rename tests/{utils => lib}/__init__.py (100%) rename tests/{utils => lib}/cgroups_tools.py (100%) rename tests/{utils => lib}/event_logger_tools.py (89%) rename tests/{ga => lib}/extension_emulator.py (98%) rename tests/{protocol/HttpRequestPredicates.py => lib/http_request_predicates.py} (100%) rename tests/{utils => lib}/miscellaneous_tools.py (100%) rename tests/{common => lib}/mock_cgroup_environment.py (97%) rename tests/{common => lib}/mock_command.py (100%) rename tests/{common => lib}/mock_environment.py (99%) rename tests/{ga/mocks.py => lib/mock_update_handler.py} (99%) rename tests/{protocol/mocks.py => lib/mock_wire_protocol.py} (96%) rename tests/{ => lib}/tools.py (99%) rename tests/{protocol/mockwiredata.py => lib/wire_protocol_data.py} (99%) diff --git a/tests/common/dhcp/test_dhcp.py b/tests/common/dhcp/test_dhcp.py index b4eece5c2e..dda28985ae 100644 --- a/tests/common/dhcp/test_dhcp.py +++ b/tests/common/dhcp/test_dhcp.py @@ -18,7 +18,7 @@ import mock import azurelinuxagent.common.dhcp as dhcp import azurelinuxagent.common.osutil.default as osutil -from tests.tools import AgentTestCase, open_patch, patch +from tests.lib.tools import AgentTestCase, open_patch, patch class TestDHCP(AgentTestCase): diff --git a/tests/common/osutil/test_alpine.py b/tests/common/osutil/test_alpine.py index d2eb361148..ec669cf3eb 100644 --- a/tests/common/osutil/test_alpine.py +++ b/tests/common/osutil/test_alpine.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.alpine import AlpineOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_arch.py b/tests/common/osutil/test_arch.py index 1133eae270..67ada5e547 100644 --- a/tests/common/osutil/test_arch.py +++ b/tests/common/osutil/test_arch.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.arch import ArchUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_bigip.py b/tests/common/osutil/test_bigip.py index 421d4d920a..7312f3ae57 100644 --- a/tests/common/osutil/test_bigip.py +++ b/tests/common/osutil/test_bigip.py @@ -26,7 +26,7 @@ import azurelinuxagent.common.utils.shellutil as shellutil from azurelinuxagent.common.exception import OSUtilError from azurelinuxagent.common.osutil.bigip import BigIpOSUtil -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_clearlinux.py b/tests/common/osutil/test_clearlinux.py index a7d75722fa..4824c95511 100644 --- a/tests/common/osutil/test_clearlinux.py +++ b/tests/common/osutil/test_clearlinux.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.clearlinux import ClearLinuxUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_coreos.py b/tests/common/osutil/test_coreos.py index dca06fa155..36e3987389 100644 --- a/tests/common/osutil/test_coreos.py +++ b/tests/common/osutil/test_coreos.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.coreos import CoreOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index ab4fa5c999..42a7050522 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -34,8 +34,8 @@ from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import AddFirewallRules -from tests.common.mock_environment import MockEnvironment -from tests.tools import AgentTestCase, patch, open_patch, load_data, data_dir, is_python_version_26_or_34, skip_if_predicate_true +from tests.lib.mock_environment import MockEnvironment +from tests.lib.tools import AgentTestCase, patch, open_patch, load_data, data_dir, is_python_version_26_or_34, skip_if_predicate_true actual_get_proc_net_route = 'azurelinuxagent.common.osutil.default.DefaultOSUtil._get_proc_net_route' diff --git a/tests/common/osutil/test_default_osutil.py b/tests/common/osutil/test_default_osutil.py index 070f1d653c..1b94dd5ca8 100644 --- a/tests/common/osutil/test_default_osutil.py +++ b/tests/common/osutil/test_default_osutil.py @@ -16,7 +16,7 @@ # from azurelinuxagent.common.osutil.default import DefaultOSUtil, shellutil # pylint: disable=unused-import -from tests.tools import AgentTestCase, patch # pylint: disable=unused-import +from tests.lib.tools import AgentTestCase, patch # pylint: disable=unused-import class DefaultOsUtilTestCase(AgentTestCase): diff --git a/tests/common/osutil/test_factory.py b/tests/common/osutil/test_factory.py index 7bd729c3b3..fbe5e24d3d 100644 --- a/tests/common/osutil/test_factory.py +++ b/tests/common/osutil/test_factory.py @@ -34,7 +34,7 @@ from azurelinuxagent.common.osutil.suse import SUSEOSUtil, SUSE11OSUtil from azurelinuxagent.common.osutil.ubuntu import UbuntuOSUtil, Ubuntu12OSUtil, Ubuntu14OSUtil, \ UbuntuSnappyOSUtil, Ubuntu16OSUtil, Ubuntu18OSUtil -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class TestOsUtilFactory(AgentTestCase): diff --git a/tests/common/osutil/test_freebsd.py b/tests/common/osutil/test_freebsd.py index 3851836013..0236b47190 100644 --- a/tests/common/osutil/test_freebsd.py +++ b/tests/common/osutil/test_freebsd.py @@ -20,7 +20,7 @@ import azurelinuxagent.common.utils.shellutil as shellutil from azurelinuxagent.common.osutil.freebsd import FreeBSDOSUtil from azurelinuxagent.common.utils import textutil -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_nsbsd.py b/tests/common/osutil/test_nsbsd.py index 4e97f7444b..37d79e61ad 100644 --- a/tests/common/osutil/test_nsbsd.py +++ b/tests/common/osutil/test_nsbsd.py @@ -19,7 +19,7 @@ from azurelinuxagent.common.osutil.nsbsd import NSBSDOSUtil from azurelinuxagent.common.utils.fileutil import read_file -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class TestNSBSDOSUtil(AgentTestCase): diff --git a/tests/common/osutil/test_openbsd.py b/tests/common/osutil/test_openbsd.py index e82a1d8e49..666e4efabe 100644 --- a/tests/common/osutil/test_openbsd.py +++ b/tests/common/osutil/test_openbsd.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.openbsd import OpenBSDOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_openwrt.py b/tests/common/osutil/test_openwrt.py index 05620ff4d0..e204cae1fb 100644 --- a/tests/common/osutil/test_openwrt.py +++ b/tests/common/osutil/test_openwrt.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.openwrt import OpenWRTOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_photonos.py b/tests/common/osutil/test_photonos.py index f63e7c2f9c..506025e2eb 100644 --- a/tests/common/osutil/test_photonos.py +++ b/tests/common/osutil/test_photonos.py @@ -18,7 +18,7 @@ import unittest from azurelinuxagent.common.osutil.photonos import PhotonOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_redhat.py b/tests/common/osutil/test_redhat.py index dfd5e07a85..3c4787fc15 100644 --- a/tests/common/osutil/test_redhat.py +++ b/tests/common/osutil/test_redhat.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.redhat import Redhat6xOSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_suse.py b/tests/common/osutil/test_suse.py index 8fd6141bea..1e752ca72e 100644 --- a/tests/common/osutil/test_suse.py +++ b/tests/common/osutil/test_suse.py @@ -17,7 +17,7 @@ import unittest from azurelinuxagent.common.osutil.suse import SUSE11OSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/common/osutil/test_ubuntu.py b/tests/common/osutil/test_ubuntu.py index f386fb7c76..24ce7b7f6b 100644 --- a/tests/common/osutil/test_ubuntu.py +++ b/tests/common/osutil/test_ubuntu.py @@ -18,7 +18,7 @@ import unittest from azurelinuxagent.common.osutil.ubuntu import Ubuntu12OSUtil, Ubuntu18OSUtil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase from .test_default import osutil_get_dhcp_pid_should_return_a_list_of_pids diff --git a/tests/distro/__init__.py b/tests/common/protocol/__init__.py similarity index 100% rename from tests/distro/__init__.py rename to tests/common/protocol/__init__.py diff --git a/tests/protocol/test_datacontract.py b/tests/common/protocol/test_datacontract.py similarity index 100% rename from tests/protocol/test_datacontract.py rename to tests/common/protocol/test_datacontract.py diff --git a/tests/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py similarity index 85% rename from tests/protocol/test_extensions_goal_state_from_extensions_config.py rename to tests/common/protocol/test_extensions_goal_state_from_extensions_config.py index 5af0aa288f..61380a46f7 100644 --- a/tests/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py @@ -2,20 +2,20 @@ # Licensed under the Apache License. from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel -from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase +from tests.lib.mock_wire_protocol import wire_protocol_data, mock_wire_protocol +from tests.lib.tools import AgentTestCase class ExtensionsGoalStateFromExtensionsConfigTestCase(AgentTestCase): def test_it_should_parse_in_vm_metadata(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_META_DATA) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_META_DATA) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual("555e551c-600e-4fb4-90ba-8ab8ec28eccc", extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual("400de90b-522e-491f-9d89-ec944661f531", extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('2020-11-09T17:48:50.412125Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_use_default_values_when_in_vm_metadata_is_missing(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf-no_gs_metadata.xml" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -24,14 +24,14 @@ def test_it_should_use_default_values_when_in_vm_metadata_is_missing(self): self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_use_default_values_when_in_vm_metadata_is_invalid(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_INVALID_VM_META_DATA) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_INVALID_VM_META_DATA) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.activity_id, "Incorrect activity Id") self.assertEqual(AgentGlobals.GUID_ZERO, extensions_goal_state.correlation_id, "Incorrect correlation Id") self.assertEqual('1900-01-01T00:00:00.000000Z', extensions_goal_state.created_on_timestamp, "Incorrect GS Creation time") def test_it_should_parse_missing_status_upload_blob_as_none(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "hostgaplugin/ext_conf-no_status_upload_blob.xml" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -40,14 +40,14 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "hostgaplugin/ext_conf-invalid_blob_type.xml" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBlob for an invalid statusBlobType') def test_it_should_parse_empty_depends_on_as_dependency_level_0(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-empty_depends_on.json" data_file["ext_conf"] = "hostgaplugin/ext_conf-empty_depends_on.xml" with mock_wire_protocol(data_file) as protocol: @@ -56,7 +56,7 @@ def test_it_should_parse_empty_depends_on_as_dependency_level_0(self): self.assertEqual(0, extensions[0].settings[0].dependencyLevel, "Incorrect dependencyLevel") def test_its_source_channel_should_be_wire_server(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.channel, "The channel is incorrect") diff --git a/tests/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py similarity index 93% rename from tests/protocol/test_extensions_goal_state_from_vm_settings.py rename to tests/common/protocol/test_extensions_goal_state_from_vm_settings.py index 1100b05bf9..bea1063f70 100644 --- a/tests/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py @@ -5,13 +5,13 @@ from azurelinuxagent.common.protocol.goal_state import GoalState from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateChannel from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import _CaseFoldedDict -from tests.protocol.mocks import mockwiredata, mock_wire_protocol -from tests.tools import AgentTestCase +from tests.lib.mock_wire_protocol import wire_protocol_data, mock_wire_protocol +from tests.lib.tools import AgentTestCase class ExtensionsGoalStateFromVmSettingsTestCase(AgentTestCase): def test_it_should_parse_vm_settings(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state def assert_property(name, value): @@ -49,13 +49,13 @@ def assert_property(name, value): self.assertEqual(1, extensions_goal_state.extensions[3].settings[1].dependencyLevel, "Incorrect dependency level (multi-config)") def test_it_should_parse_requested_version_properly(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: goal_state = GoalState(protocol.client) families = goal_state.extensions_goal_state.agent_families for family in families: self.assertEqual(family.requested_version_string, "0.0.0.0", "Version should be None") - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" with mock_wire_protocol(data_file) as protocol: protocol.mock_wire_data.set_etag(888) @@ -65,7 +65,7 @@ def test_it_should_parse_requested_version_properly(self): self.assertEqual(family.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") def test_it_should_parse_missing_status_upload_blob_as_none(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-no_status_upload_blob.json" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -74,7 +74,7 @@ def test_it_should_parse_missing_status_upload_blob_as_none(self): self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, "Expected status upload blob to be Block") def test_it_should_parse_missing_agent_manifests_as_empty(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-no_manifests.json" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -82,7 +82,7 @@ def test_it_should_parse_missing_agent_manifests_as_empty(self): self.assertListEqual([], extensions_goal_state.agent_families[0].uris, "Expected an empty list of agent manifests") def test_it_should_parse_missing_extension_manifests_as_empty(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-no_manifests.json" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -98,7 +98,7 @@ def test_it_should_parse_missing_extension_manifests_as_empty(self): extensions_goal_state.extensions[2].manifest_uris, "Incorrect list of manifests for {0}".format(extensions_goal_state.extensions[2])) def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-invalid_blob_type.json" with mock_wire_protocol(data_file) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state @@ -106,7 +106,7 @@ def test_it_should_default_to_block_blob_when_the_status_blob_type_is_not_valid( self.assertEqual("BlockBlob", extensions_goal_state.status_upload_blob_type, 'Expected BlockBlob for an invalid statusBlobType') def test_its_source_channel_should_be_host_ga_plugin(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.channel, "The channel is incorrect") diff --git a/tests/protocol/test_goal_state.py b/tests/common/protocol/test_goal_state.py similarity index 93% rename from tests/protocol/test_goal_state.py rename to tests/common/protocol/test_goal_state.py index 61653b2af6..3f00a62d8b 100644 --- a/tests/protocol/test_goal_state.py +++ b/tests/common/protocol/test_goal_state.py @@ -19,15 +19,15 @@ from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol import mockwiredata -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.tools import AgentTestCase, patch, load_data +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib import wire_protocol_data +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.tools import AgentTestCase, patch, load_data class GoalStateTestCase(AgentTestCase, HttpRequestPredicates): def test_it_should_use_vm_settings_by_default(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_etag(888) extensions_goal_state = GoalState(protocol.client).extensions_goal_state self.assertTrue( @@ -41,7 +41,7 @@ def _assert_is_extensions_goal_state_from_extensions_config(self, extensions_goa def test_it_should_use_extensions_config_when_fast_track_is_disabled(self): with patch("azurelinuxagent.common.conf.get_enable_fast_track", return_value=False): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(GoalState(protocol.client).extensions_goal_state) def test_it_should_use_extensions_config_when_fast_track_is_not_supported(self): @@ -50,11 +50,11 @@ def http_get_handler(url, *_, **__): return MockHttpResponse(httpclient.NOT_FOUND) return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(GoalState(protocol.client).extensions_goal_state) def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" with mock_wire_protocol(data_file) as protocol: @@ -63,7 +63,7 @@ def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_ def test_it_should_retry_get_vm_settings_on_resource_gone_error(self): # Requests to the hostgaplugin incude the Container ID and the RoleConfigName as headers; when the hostgaplugin returns GONE (HTTP status 410) the agent # needs to get a new goal state and retry the request with updated values for the Container ID and RoleConfigName headers. - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: # Do not mock the vmSettings request at the level of azurelinuxagent.common.utils.restutil.http_request. The GONE status is handled # in the internal _http_request, which we mock below. protocol.do_not_mock = lambda method, url: method == "GET" and self.is_host_plugin_vm_settings_request(url) @@ -89,8 +89,8 @@ def http_get_vm_settings(_method, _host, _relative_url, _timeout, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: - protocol.mock_wire_data.data_files = mockwiredata.DATA_FILE_NOOP_GS + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + protocol.mock_wire_data.data_files = wire_protocol_data.DATA_FILE_NOOP_GS protocol.mock_wire_data.reload() protocol.mock_wire_data.set_incarnation(2) @@ -101,14 +101,14 @@ def test_fetch_goal_state_should_raise_on_incomplete_goal_state(self): def test_fetching_the_goal_state_should_save_the_shared_config(self): # SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband); verify that we do not delete it - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: _ = GoalState(protocol.client) shared_config = os.path.join(conf.get_lib_dir(), 'SharedConfig.xml') self.assertTrue(os.path.exists(shared_config), "{0} should have been created".format(shared_config)) def test_fetching_the_goal_state_should_save_the_goal_state_to_the_history_directory(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(999) protocol.mock_wire_data.set_etag(888) @@ -132,7 +132,7 @@ def _assert_directory_contents(self, directory, expected_files): self.assertEqual(expected_files, actual_files, "The expected files were not saved to {0}".format(directory)) def test_update_should_create_new_history_subdirectories(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(123) protocol.mock_wire_data.set_etag(654) @@ -160,7 +160,7 @@ def http_get_handler(url, *_, **__): self._find_history_subdirectory("234-987"), ["VmSettings.json", "Certificates.json"]) def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.mock_wire_data.set_incarnation(888) protocol.mock_wire_data.set_etag(888) @@ -195,11 +195,11 @@ def test_it_should_redact_the_protected_settings_when_saving_to_the_history_dire "Could not find the expected number of redacted settings in {0}.\nExpected {1}.\n{2}".format(file_name, len(protected_settings), file_contents)) def test_it_should_save_vm_settings_on_parse_errors(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: invalid_vm_settings_file = "hostgaplugin/vm_settings-parse_error.json" - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = invalid_vm_settings_file - protocol.mock_wire_data = mockwiredata.WireProtocolData(data_file) + protocol.mock_wire_data = wire_protocol_data.WireProtocolData(data_file) with self.assertRaises(ProtocolError): # the parsing error will cause an exception _ = GoalState(protocol.client) @@ -228,7 +228,7 @@ def _create_protocol_ws_and_hgap_in_sync(): Creates a mock protocol in which the HostGAPlugin and the WireServer are in sync, both of them returning the same Fabric goal state. """ - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() with mock_wire_protocol(data_file) as protocol: timestamp = datetime.datetime.utcnow() @@ -372,7 +372,7 @@ def http_get_handler(url, *_, **__): self.assertTrue(goal_state.extensions_goal_state.is_outdated, "The updated goal state should be marked as outdated") def test_it_should_raise_when_the_tenant_certificate_is_missing(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() with mock_wire_protocol(data_file) as protocol: data_file["vm_settings"] = "hostgaplugin/vm_settings-missing_cert.json" @@ -386,7 +386,7 @@ def test_it_should_raise_when_the_tenant_certificate_is_missing(self): self.assertIn(expected_message, str(context.exception)) def test_it_should_download_certs_on_a_new_fast_track_goal_state(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() with mock_wire_protocol(data_file) as protocol: goal_state = GoalState(protocol.client) @@ -410,7 +410,7 @@ def test_it_should_download_certs_on_a_new_fast_track_goal_state(self): self.assertTrue(os.path.isfile(crt_path)) def test_it_should_download_certs_on_a_new_fabric_goal_state(self): - data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() with mock_wire_protocol(data_file) as protocol: protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.Fabric) @@ -457,7 +457,7 @@ def http_get_handler(url, *_, **__): return None http_get_handler.certificate_requests = 0 - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) protocol.mock_wire_data.reset_call_counts() diff --git a/tests/protocol/test_healthservice.py b/tests/common/protocol/test_healthservice.py similarity index 99% rename from tests/protocol/test_healthservice.py rename to tests/common/protocol/test_healthservice.py index cb523a78f8..d9ba17755d 100644 --- a/tests/protocol/test_healthservice.py +++ b/tests/common/protocol/test_healthservice.py @@ -18,8 +18,8 @@ from azurelinuxagent.common.exception import HttpError from azurelinuxagent.common.protocol.healthservice import Observation, HealthService from azurelinuxagent.common.utils import restutil -from tests.protocol.test_hostplugin import MockResponse -from tests.tools import AgentTestCase, patch +from tests.common.protocol.test_hostplugin import MockResponse +from tests.lib.tools import AgentTestCase, patch class TestHealthService(AgentTestCase): diff --git a/tests/protocol/test_hostplugin.py b/tests/common/protocol/test_hostplugin.py similarity index 98% rename from tests/protocol/test_hostplugin.py rename to tests/common/protocol/test_hostplugin.py index 47e6871bea..4c97c73fd9 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/common/protocol/test_hostplugin.py @@ -34,10 +34,10 @@ from azurelinuxagent.common.protocol.goal_state import GoalState from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import AGENT_VERSION, AGENT_NAME -from tests.protocol.mocks import mock_wire_protocol, mockwiredata, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_NO_EXT -from tests.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, patch +from tests.lib.mock_wire_protocol import mock_wire_protocol, wire_protocol_data, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE, DATA_FILE_NO_EXT +from tests.lib.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, patch hostplugin_status_url = "http://168.63.129.16:32526/status" @@ -852,7 +852,7 @@ def http_get_handler(url, *_, **__): return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): protocol.client.get_host_plugin().fetch_vm_settings() @@ -875,7 +875,7 @@ def http_get_handler(url, *_, **__): return mock_response return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) self._fetch_vm_settings_ignoring_errors(protocol) @@ -913,7 +913,7 @@ def http_get_handler(url, *_, **__): self.assertEqual(expected, summary, "The count of errors is incorrect") def test_it_should_limit_the_number_of_errors_it_reports(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): return MockHttpResponse(httpclient.BAD_GATEWAY) # HostGAPlugin returns 502 for internal errors @@ -941,7 +941,7 @@ def get_telemetry_messages(): self.assertEqual(1, len(telemetry_messages), "Expected additional errors to be reported to telemetry in the next period (got: {0})".format(telemetry_messages)) def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: def http_get_handler(url, *_, **__): if self.is_host_plugin_vm_settings_request(url): return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported @@ -969,7 +969,7 @@ def http_get_handler(url, *_, **__): return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: host_ga_plugin = protocol.client.get_host_plugin() # Do an initial call to ensure the API is supported @@ -984,7 +984,7 @@ def http_get_handler(url, *_, **__): self.assertEqual(vm_settings.created_on_timestamp, cm.exception.timestamp) def test_it_should_save_the_timestamp_of_the_most_recent_fast_track_goal_state(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: host_ga_plugin = protocol.client.get_host_plugin() vm_settings, _ = host_ga_plugin.fetch_vm_settings() diff --git a/tests/protocol/test_image_info_matcher.py b/tests/common/protocol/test_image_info_matcher.py similarity index 100% rename from tests/protocol/test_image_info_matcher.py rename to tests/common/protocol/test_image_info_matcher.py diff --git a/tests/protocol/test_imds.py b/tests/common/protocol/test_imds.py similarity index 99% rename from tests/protocol/test_imds.py rename to tests/common/protocol/test_imds.py index 1f8e428c1f..efc705ffab 100644 --- a/tests/protocol/test_imds.py +++ b/tests/common/protocol/test_imds.py @@ -26,8 +26,8 @@ from azurelinuxagent.common.exception import HttpError, ResourceGoneError from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.utils import restutil -from tests.protocol.mocks import MockHttpResponse -from tests.tools import AgentTestCase, data_dir, MagicMock, Mock, patch +from tests.lib.mock_wire_protocol import MockHttpResponse +from tests.lib.tools import AgentTestCase, data_dir, MagicMock, Mock, patch def get_mock_compute_response(): diff --git a/tests/protocol/test_metadata_server_migration_util.py b/tests/common/protocol/test_metadata_server_migration_util.py similarity index 99% rename from tests/protocol/test_metadata_server_migration_util.py rename to tests/common/protocol/test_metadata_server_migration_util.py index 5950b43f17..70ef053335 100644 --- a/tests/protocol/test_metadata_server_migration_util.py +++ b/tests/common/protocol/test_metadata_server_migration_util.py @@ -27,7 +27,7 @@ _LEGACY_METADATA_SERVER_P7B_FILE_NAME, \ _KNOWN_METADATASERVER_IP from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP -from tests.tools import AgentTestCase, patch, MagicMock +from tests.lib.tools import AgentTestCase, patch, MagicMock class TestMetadataServerMigrationUtil(AgentTestCase): @patch('azurelinuxagent.common.conf.get_lib_dir') diff --git a/tests/protocol/test_protocol_util.py b/tests/common/protocol/test_protocol_util.py similarity index 99% rename from tests/protocol/test_protocol_util.py rename to tests/common/protocol/test_protocol_util.py index 3529e95d42..06683cef25 100644 --- a/tests/protocol/test_protocol_util.py +++ b/tests/common/protocol/test_protocol_util.py @@ -30,7 +30,7 @@ from azurelinuxagent.common.protocol.util import get_protocol_util, ProtocolUtil, PROTOCOL_FILE_NAME, \ WIRE_PROTOCOL_NAME, ENDPOINT_FILE_NAME from azurelinuxagent.common.utils.restutil import KNOWN_WIRESERVER_IP -from tests.tools import AgentTestCase, MagicMock, Mock, patch, clear_singleton_instances +from tests.lib.tools import AgentTestCase, MagicMock, Mock, patch, clear_singleton_instances @patch("time.sleep") diff --git a/tests/protocol/test_wire.py b/tests/common/protocol/test_wire.py similarity index 94% rename from tests/protocol/test_wire.py rename to tests/common/protocol/test_wire.py index cc73b0fb45..9ce8339e94 100644 --- a/tests/protocol/test_wire.py +++ b/tests/common/protocol/test_wire.py @@ -40,12 +40,12 @@ from azurelinuxagent.common.version import CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION from azurelinuxagent.ga.exthandlers import get_exthandlers_handler from tests.ga.test_monitor import random_generator -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE_NO_EXT, DATA_FILE -from tests.protocol.mockwiredata import WireProtocolData -from tests.tools import patch, AgentTestCase, load_bin_data +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE_NO_EXT, DATA_FILE +from tests.lib.wire_protocol_data import WireProtocolData +from tests.lib.tools import patch, AgentTestCase, load_bin_data data_with_bom = b'\xef\xbb\xbfhehe' testurl = 'http://foo' @@ -120,37 +120,37 @@ def _yield_events(): def test_getters(self, *args): """Normal case""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) self._test_getters(test_data, True, *args) def test_getters_no_ext(self, *args): """Provision with agent is not checked""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_NO_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_NO_EXT) self._test_getters(test_data, True, *args) def test_getters_ext_no_settings(self, *args): """Extensions without any settings""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_NO_SETTINGS) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_NO_SETTINGS) self._test_getters(test_data, True, *args) def test_getters_ext_no_public(self, *args): """Extensions without any public settings""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_NO_PUBLIC) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_NO_PUBLIC) self._test_getters(test_data, True, *args) def test_getters_ext_no_cert_format(self, *args): """Certificate format not specified""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_NO_CERT_FORMAT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_NO_CERT_FORMAT) self._test_getters(test_data, True, *args) def test_getters_ext_cert_format_not_pfx(self, *args): """Certificate format is not Pkcs7BlobWithPfxContents specified""" - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_CERT_FORMAT_NOT_PFX) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_CERT_FORMAT_NOT_PFX) self._test_getters(test_data, False, *args) @patch("azurelinuxagent.common.protocol.healthservice.HealthService.report_host_plugin_extension_artifact") def test_getters_with_stale_goal_state(self, patch_report, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) test_data.emulate_stale_goal_state = True self._test_getters(test_data, True, *args) @@ -202,7 +202,7 @@ def test_call_storage_kwargs(self, *args): # pylint: disable=unused-argument self.assertTrue(c == (True if i != 3 else False)) def test_status_blob_parsing(self, *args): # pylint: disable=unused-argument - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertIsInstance(extensions_goal_state, ExtensionsGoalStateFromExtensionsConfig) self.assertEqual(extensions_goal_state.status_upload_blob, @@ -212,7 +212,7 @@ def test_status_blob_parsing(self, *args): # pylint: disable=unused-argument self.assertEqual(protocol.get_goal_state().extensions_goal_state.status_upload_blob_type, u'BlockBlob') def test_get_host_ga_plugin(self, *args): # pylint: disable=unused-argument - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: host_plugin = protocol.client.get_host_plugin() goal_state = protocol.client.get_goal_state() self.assertEqual(goal_state.container_id, host_plugin.container_id) @@ -223,7 +223,7 @@ def http_put_handler(url, *_, **__): # pylint: disable=inconsistent-return-stat if protocol.get_endpoint() in url and url.endswith('/status'): return MockHttpResponse(200) - with mock_wire_protocol(mockwiredata.DATA_FILE, http_put_handler=http_put_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_put_handler=http_put_handler) as protocol: HostPluginProtocol.is_default_channel = False protocol.client.status_blob.vm_status = VMStatus(message="Ready", status="Ready") @@ -254,14 +254,14 @@ def test_upload_status_blob_reports_prepare_error(self, *_): self.assertEqual(1, mock_prepare.call_count) def test_get_in_vm_artifacts_profile_blob_not_available(self, *_): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_in_vm_empty_artifacts_profile.xml" with mock_wire_protocol(data_file) as protocol: self.assertFalse(protocol.get_goal_state().extensions_goal_state.on_hold) def test_it_should_set_on_hold_to_false_when_the_in_vm_artifacts_profile_is_not_valid(self, *_): - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: extensions_on_hold = protocol.get_goal_state().extensions_goal_state.on_hold self.assertTrue(extensions_on_hold, "Extensions should be on hold in the test data") @@ -485,7 +485,7 @@ def test_get_ext_conf_without_extensions_should_retrieve_vmagent_manifests_info( # Basic test for extensions_goal_state when extensions are not present in the config. The test verifies that # extensions_goal_state fetches the correct data by comparing the returned data with the test data provided the # mock_wire_protocol. - with mock_wire_protocol(mockwiredata.DATA_FILE_NO_EXT) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_NO_EXT) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state ext_handlers_names = [ext_handler.name for ext_handler in extensions_goal_state.extensions] @@ -500,7 +500,7 @@ def test_get_ext_conf_without_extensions_should_retrieve_vmagent_manifests_info( def test_get_ext_conf_with_extensions_should_retrieve_ext_handlers_and_vmagent_manifests_info(self): # Basic test for extensions_goal_state when extensions are present in the config. The test verifies that extensions_goal_state # fetches the correct data by comparing the returned data with the test data provided the mock_wire_protocol. - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: extensions_goal_state = protocol.get_goal_state().extensions_goal_state ext_handlers_names = [ext_handler.name for ext_handler in extensions_goal_state.extensions] @@ -527,7 +527,7 @@ def http_get_handler(url, *_, **__): return MockHttpResponse(200, body=load_bin_data("ga/fake_extension.zip")) return None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: protocol.client.download_zip_package("extension package", [extension_url], target_file, target_directory, use_verify_header=False) self.assertTrue(os.path.exists(target_directory), "The extension package was not downloaded") @@ -545,7 +545,7 @@ def http_get_handler(url, *_, **__): self.fail('The host channel should not have been used') return None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False protocol.client.download_zip_package("extension package", [extension_url], target_file, target_directory, use_verify_header=False) @@ -568,7 +568,7 @@ def http_get_handler(url, *_, **kwargs): return MockHttpResponse(200, body=load_bin_data("ga/fake_extension.zip")) return None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False protocol.client.download_zip_package("extension package", [extension_url], target_file, target_directory, use_verify_header=False) @@ -599,7 +599,7 @@ def http_get_handler(url, *_, **kwargs): return None http_get_handler.goal_state_requests = 0 - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: HostPluginProtocol.is_default_channel = False try: @@ -633,7 +633,7 @@ def http_get_handler(url, *_, **kwargs): protocol.track_url(url) # keep track of goal state requests return None - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: HostPluginProtocol.is_default_channel = False # initialization of the host plugin triggers a request for the goal state; do it here before we start tracking those requests. @@ -661,7 +661,7 @@ def http_get_handler(url, *_, **kwargs): return MockHttpResponse(status=200, body=b"NOT A ZIP") return None - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaises(ExtensionDownloadError): @@ -681,7 +681,7 @@ def http_get_handler(url, *_, **__): self.fail('The Host GA Plugin should not have been invoked') return None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) @@ -703,7 +703,7 @@ def http_get_handler(url, *_, **kwargs): return MockHttpResponse(200, body=manifest_xml.encode('utf-8')) return None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False try: @@ -736,7 +736,7 @@ def http_get_handler(url, *_, **kwargs): return None http_get_handler.goal_state_requests = 0 - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: HostPluginProtocol.is_default_channel = False try: @@ -768,7 +768,7 @@ def http_get_handler(url, *_, **kwargs): return None # Everything fails. Goal state should have been updated and host channel should not have been set as default. - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: HostPluginProtocol.is_default_channel = False # initialization of the host plugin triggers a request for the goal state; do it here before we start @@ -796,7 +796,7 @@ def http_get_handler(url, *_, **__): protocol.track_url(url) return None - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) HostPluginProtocol.is_default_channel = False @@ -814,7 +814,7 @@ def http_get_handler(url, *_, **kwargs): protocol.track_url(url) return None - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) HostPluginProtocol.is_default_channel = False @@ -843,7 +843,7 @@ def http_get_handler(url, *_, **kwargs): return None http_get_handler.host_plugin_calls = 0 - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: HostPluginProtocol.is_default_channel = False try: @@ -876,7 +876,7 @@ def http_get_handler(url, *_, **kwargs): return None http_get_handler.host_plugin_calls = 0 - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: HostPluginProtocol.is_default_channel = False # initialization of the host plugin triggers a request for the goal state; do it here before we start tracking those requests. @@ -917,7 +917,7 @@ def host_func(*_): return direct_func, host_func def test_download_using_appropriate_channel_should_not_invoke_secondary_when_primary_channel_succeeds(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: # Scenario #1: Direct channel default HostPluginProtocol.is_default_channel = False @@ -943,7 +943,7 @@ def test_download_using_appropriate_channel_should_not_invoke_secondary_when_pri self.assertTrue(HostPluginProtocol.is_default_channel) def test_download_using_appropriate_channel_should_not_change_default_channel_if_none_succeeds(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: # Scenario #1: Direct channel is default HostPluginProtocol.is_default_channel = False direct_func, host_func = self._set_and_fail_helper_channel_functions(fail_direct=True, fail_host=True) @@ -969,7 +969,7 @@ def test_download_using_appropriate_channel_should_not_change_default_channel_if self.assertTrue(HostPluginProtocol.is_default_channel) def test_download_using_appropriate_channel_should_change_default_channel_when_secondary_succeeds(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: # Scenario #1: Direct channel is default HostPluginProtocol.is_default_channel = False direct_func, host_func = self._set_and_fail_helper_channel_functions(fail_direct=True, fail_host=False) @@ -1015,7 +1015,7 @@ class UpdateGoalStateTestCase(HttpRequestPredicates, AgentTestCase): """ def test_it_should_update_the_goal_state_and_the_host_plugin_when_the_incarnation_changes(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.client.get_host_plugin() # if the incarnation changes the behavior is the same for forced and non-forced updates @@ -1072,7 +1072,7 @@ def test_it_should_update_the_goal_state_and_the_host_plugin_when_the_incarnatio self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) def test_non_forced_update_should_not_update_the_goal_state_but_should_update_the_host_plugin_when_the_incarnation_does_not_change(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.client.get_host_plugin() # The container id, role config name and shared config can change without the incarnation changing; capture the initial @@ -1096,7 +1096,7 @@ def test_non_forced_update_should_not_update_the_goal_state_but_should_update_th self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the_incarnation_does_not_change(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.client.get_host_plugin() # The container id, role config name and shared config can change without the incarnation changing @@ -1119,7 +1119,7 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) def test_reset_should_init_provided_goal_state_properties(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.client.reset_goal_state(goal_state_properties=GoalStateProperties.All & ~GoalStateProperties.Certificates) with self.assertRaises(ProtocolError) as context: @@ -1129,7 +1129,7 @@ def test_reset_should_init_provided_goal_state_properties(self): self.assertIn(expected_message, str(context.exception)) def test_reset_should_init_the_goal_state(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: new_container_id = str(uuid.uuid4()) new_role_config_name = str(uuid.uuid4()) protocol.mock_wire_data.set_container_id(new_container_id) @@ -1146,7 +1146,7 @@ class UpdateHostPluginFromGoalStateTestCase(AgentTestCase): Tests for WireClient.update_host_plugin_from_goal_state() """ def test_it_should_update_the_host_plugin_with_or_without_incarnation_changes(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.client.get_host_plugin() # the behavior should be the same whether the incarnation changes or not diff --git a/tests/common/test_agent_supported_feature.py b/tests/common/test_agent_supported_feature.py index c2d3b1981e..6a49dd8875 100644 --- a/tests/common/test_agent_supported_feature.py +++ b/tests/common/test_agent_supported_feature.py @@ -18,7 +18,7 @@ from azurelinuxagent.common.agent_supported_feature import SupportedFeatureNames, \ get_agent_supported_features_list_for_crp, get_supported_feature_by_name, \ get_agent_supported_features_list_for_extensions -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class TestAgentSupportedFeature(AgentTestCase): diff --git a/tests/common/test_cgroupapi.py b/tests/common/test_cgroupapi.py index a31d57d722..a7b47b44f6 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/common/test_cgroupapi.py @@ -26,9 +26,9 @@ from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil -from tests.common.mock_cgroup_environment import mock_cgroup_environment -from tests.tools import AgentTestCase, patch, mock_sleep -from tests.utils.cgroups_tools import CGroupsTools +from tests.lib.mock_cgroup_environment import mock_cgroup_environment +from tests.lib.tools import AgentTestCase, patch, mock_sleep +from tests.lib.cgroups_tools import CGroupsTools class _MockedFileSystemTestCase(AgentTestCase): def setUp(self): diff --git a/tests/common/test_cgroupconfigurator.py b/tests/common/test_cgroupconfigurator.py index 7e2dc45b44..ebf7bac0e0 100644 --- a/tests/common/test_cgroupconfigurator.py +++ b/tests/common/test_cgroupconfigurator.py @@ -37,10 +37,10 @@ AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils import shellutil, fileutil -from tests.common.mock_environment import MockCommand -from tests.common.mock_cgroup_environment import mock_cgroup_environment, UnitFilePaths -from tests.tools import AgentTestCase, patch, mock_sleep, i_am_root, data_dir, is_python_version_26_or_34, skip_if_predicate_true -from tests.utils.miscellaneous_tools import format_processes, wait_for +from tests.lib.mock_environment import MockCommand +from tests.lib.mock_cgroup_environment import mock_cgroup_environment, UnitFilePaths +from tests.lib.tools import AgentTestCase, patch, mock_sleep, i_am_root, data_dir, is_python_version_26_or_34, skip_if_predicate_true +from tests.lib.miscellaneous_tools import format_processes, wait_for class CGroupConfiguratorSystemdTestCase(AgentTestCase): diff --git a/tests/common/test_cgroups.py b/tests/common/test_cgroups.py index 7f549e5b8d..58e179d82a 100644 --- a/tests/common/test_cgroups.py +++ b/tests/common/test_cgroups.py @@ -26,7 +26,7 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, patch, data_dir +from tests.lib.tools import AgentTestCase, patch, data_dir def consume_cpu_time(): diff --git a/tests/common/test_cgroupstelemetry.py b/tests/common/test_cgroupstelemetry.py index fe1ff299a3..e9b163437b 100644 --- a/tests/common/test_cgroupstelemetry.py +++ b/tests/common/test_cgroupstelemetry.py @@ -22,7 +22,7 @@ from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, data_dir, patch +from tests.lib.tools import AgentTestCase, data_dir, patch def raise_ioerror(*_): diff --git a/tests/common/test_conf.py b/tests/common/test_conf.py index ebc57ffed9..972b289a79 100644 --- a/tests/common/test_conf.py +++ b/tests/common/test_conf.py @@ -19,7 +19,7 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, data_dir +from tests.lib.tools import AgentTestCase, data_dir class TestConf(AgentTestCase): diff --git a/tests/common/test_errorstate.py b/tests/common/test_errorstate.py index 263d95ed7d..c51682b700 100644 --- a/tests/common/test_errorstate.py +++ b/tests/common/test_errorstate.py @@ -2,7 +2,7 @@ from datetime import timedelta, datetime from azurelinuxagent.common.errorstate import ErrorState -from tests.tools import Mock, patch +from tests.lib.tools import Mock, patch class TestErrorState(unittest.TestCase): diff --git a/tests/common/test_event.py b/tests/common/test_event.py index de5ad7353a..28f2e3860d 100644 --- a/tests/common/test_event.py +++ b/tests/common/test_event.py @@ -41,11 +41,11 @@ GuestAgentExtensionEventsSchema, GuestAgentPerfCounterEventsSchema from azurelinuxagent.common.version import CURRENT_AGENT, CURRENT_VERSION, AGENT_EXECUTION_MODE from azurelinuxagent.ga.collect_telemetry_events import _CollectAndEnqueueEvents -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.tools import AgentTestCase, data_dir, load_data, patch, skip_if_predicate_true, is_python_version_26_or_34 -from tests.utils.event_logger_tools import EventLoggerTools +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.tools import AgentTestCase, data_dir, load_data, patch, skip_if_predicate_true, is_python_version_26_or_34 +from tests.lib.event_logger_tools import EventLoggerTools class TestEvent(HttpRequestPredicates, AgentTestCase): @@ -155,7 +155,7 @@ def create_event_and_return_container_id(): # pylint: disable=inconsistent-retu self.fail("Could not find Contained ID on event") - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: contained_id = create_event_and_return_container_id() # The expect value comes from DATA_FILE self.assertEqual(contained_id, 'c6d5526c-5ac2-4200-b6e2-56f2b70c5ab2', "Incorrect container ID") @@ -787,7 +787,7 @@ def http_post_handler(url, body, **__): return None http_post_handler.request_body = None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_post_handler=http_post_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_post_handler=http_post_handler) as protocol: event_file_path = self._create_test_event_file("event_with_callstack.waagent.tld") expected_message = get_event_message_from_event_file(event_file_path) @@ -807,7 +807,7 @@ def http_post_handler(url, body, **__): return None http_post_handler.request_body = None - with mock_wire_protocol(mockwiredata.DATA_FILE, http_post_handler=http_post_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_post_handler=http_post_handler) as protocol: test_messages = [ 'Non-English message - 此文字不是英文的', "Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία", diff --git a/tests/common/test_logcollector.py b/tests/common/test_logcollector.py index 521e0f23ed..1ef2474c34 100644 --- a/tests/common/test_logcollector.py +++ b/tests/common/test_logcollector.py @@ -25,7 +25,7 @@ from azurelinuxagent.common.logcollector import LogCollector from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.fileutil import rm_dirs, mkdir, rm_files -from tests.tools import AgentTestCase, is_python_version_26, patch, skip_if_predicate_true, data_dir +from tests.lib.tools import AgentTestCase, is_python_version_26, patch, skip_if_predicate_true, data_dir SMALL_FILE_SIZE = 1 * 1024 * 1024 # 1 MB LARGE_FILE_SIZE = 5 * 1024 * 1024 # 5 MB diff --git a/tests/common/test_logger.py b/tests/common/test_logger.py index a10ea47c6a..d792eb8577 100644 --- a/tests/common/test_logger.py +++ b/tests/common/test_logger.py @@ -24,7 +24,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, MagicMock, patch, skip_if_predicate_true +from tests.lib.tools import AgentTestCase, MagicMock, patch, skip_if_predicate_true _MSG_INFO = "This is our test info logging message {0} {1}" _MSG_WARN = "This is our test warn logging message {0} {1}" diff --git a/tests/common/test_persist_firewall_rules.py b/tests/common/test_persist_firewall_rules.py index 307c8536e7..da414c952d 100644 --- a/tests/common/test_persist_firewall_rules.py +++ b/tests/common/test_persist_firewall_rules.py @@ -28,7 +28,7 @@ from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.utils import fileutil, shellutil from azurelinuxagent.common.utils.networkutil import AddFirewallRules, FirewallCmdDirectCommands -from tests.tools import AgentTestCase, MagicMock, patch +from tests.lib.tools import AgentTestCase, MagicMock, patch class TestPersistFirewallRulesHandler(AgentTestCase): diff --git a/tests/common/test_singletonperthread.py b/tests/common/test_singletonperthread.py index 39d8c99178..7b1972635e 100644 --- a/tests/common/test_singletonperthread.py +++ b/tests/common/test_singletonperthread.py @@ -3,7 +3,7 @@ from threading import Thread, currentThread from azurelinuxagent.common.singletonperthread import SingletonPerThread -from tests.tools import AgentTestCase, clear_singleton_instances +from tests.lib.tools import AgentTestCase, clear_singleton_instances class TestClassToTestSingletonPerThread(SingletonPerThread): diff --git a/tests/common/test_telemetryevent.py b/tests/common/test_telemetryevent.py index e23ab68446..ce232dab0b 100644 --- a/tests/common/test_telemetryevent.py +++ b/tests/common/test_telemetryevent.py @@ -16,7 +16,7 @@ # from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam, GuestAgentExtensionEventsSchema, \ CommonTelemetryEventSchema -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase def get_test_event(name="DummyExtension", op="Unknown", is_success=True, duration=0, version="foo", evt_type="", is_internal=False, diff --git a/tests/common/test_version.py b/tests/common/test_version.py index 625c0bcfc3..bdc07c0046 100644 --- a/tests/common/test_version.py +++ b/tests/common/test_version.py @@ -30,7 +30,7 @@ get_f5_platform, get_distro, get_lis_version, PY_VERSION_MAJOR, \ PY_VERSION_MINOR, get_daemon_version, set_daemon_version, __DAEMON_VERSION_ENV_VARIABLE as DAEMON_VERSION_ENV_VARIABLE from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from tests.tools import AgentTestCase, open_patch, patch +from tests.lib.tools import AgentTestCase, open_patch, patch def freebsd_system(): diff --git a/tests/protocol/__init__.py b/tests/common/utils/__init__.py similarity index 100% rename from tests/protocol/__init__.py rename to tests/common/utils/__init__.py diff --git a/tests/utils/test_archive.py b/tests/common/utils/test_archive.py similarity index 99% rename from tests/utils/test_archive.py rename to tests/common/utils/test_archive.py index 54766862f8..e65fef1e70 100644 --- a/tests/utils/test_archive.py +++ b/tests/common/utils/test_archive.py @@ -9,7 +9,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.utils import fileutil, timeutil from azurelinuxagent.common.utils.archive import GoalStateHistory, StateArchiver, _MAX_ARCHIVED_STATES, ARCHIVE_DIRECTORY_NAME -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch debug = False if os.environ.get('DEBUG') == '1': diff --git a/tests/utils/test_crypt_util.py b/tests/common/utils/test_crypt_util.py similarity index 96% rename from tests/utils/test_crypt_util.py rename to tests/common/utils/test_crypt_util.py index 4c8ab2e375..c724c246c8 100644 --- a/tests/utils/test_crypt_util.py +++ b/tests/common/utils/test_crypt_util.py @@ -21,7 +21,7 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.common.exception import CryptError from azurelinuxagent.common.utils.cryptutil import CryptUtil -from tests.tools import AgentTestCase, data_dir, load_data, is_python_version_26, skip_if_predicate_true +from tests.lib.tools import AgentTestCase, data_dir, load_data, is_python_version_26, skip_if_predicate_true class TestCryptoUtilOperations(AgentTestCase): diff --git a/tests/utils/test_extension_process_util.py b/tests/common/utils/test_extension_process_util.py similarity index 99% rename from tests/utils/test_extension_process_util.py rename to tests/common/utils/test_extension_process_util.py index a74c4ff733..7f2d4aadf6 100644 --- a/tests/utils/test_extension_process_util.py +++ b/tests/common/utils/test_extension_process_util.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils.extensionprocessutil import format_stdout_stderr, read_output, \ wait_for_process_completion_or_timeout, handle_process_completion -from tests.tools import AgentTestCase, patch, data_dir +from tests.lib.tools import AgentTestCase, patch, data_dir class TestProcessUtils(AgentTestCase): diff --git a/tests/utils/test_file_util.py b/tests/common/utils/test_file_util.py similarity index 99% rename from tests/utils/test_file_util.py rename to tests/common/utils/test_file_util.py index 2dfa3bf966..f1514e5d04 100644 --- a/tests/utils/test_file_util.py +++ b/tests/common/utils/test_file_util.py @@ -27,7 +27,7 @@ import azurelinuxagent.common.utils.fileutil as fileutil from azurelinuxagent.common.future import ustr -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class TestFileOperations(AgentTestCase): diff --git a/tests/utils/test_flexible_version.py b/tests/common/utils/test_flexible_version.py similarity index 100% rename from tests/utils/test_flexible_version.py rename to tests/common/utils/test_flexible_version.py diff --git a/tests/utils/test_network_util.py b/tests/common/utils/test_network_util.py similarity index 99% rename from tests/utils/test_network_util.py rename to tests/common/utils/test_network_util.py index 4c3f5d0144..e08f1aab3e 100644 --- a/tests/utils/test_network_util.py +++ b/tests/common/utils/test_network_util.py @@ -19,7 +19,7 @@ from mock.mock import patch import azurelinuxagent.common.utils.networkutil as networkutil -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase class TestNetworkOperations(AgentTestCase): diff --git a/tests/utils/test_passwords.txt b/tests/common/utils/test_passwords.txt similarity index 100% rename from tests/utils/test_passwords.txt rename to tests/common/utils/test_passwords.txt diff --git a/tests/utils/test_rest_util.py b/tests/common/utils/test_rest_util.py similarity index 99% rename from tests/utils/test_rest_util.py rename to tests/common/utils/test_rest_util.py index a0b00f6cd9..efcebb082f 100644 --- a/tests/utils/test_rest_util.py +++ b/tests/common/utils/test_rest_util.py @@ -22,7 +22,7 @@ import azurelinuxagent.common.utils.restutil as restutil from azurelinuxagent.common.utils.restutil import HTTP_USER_AGENT from azurelinuxagent.common.future import httpclient, ustr -from tests.tools import AgentTestCase, call, Mock, MagicMock, patch +from tests.lib.tools import AgentTestCase, call, Mock, MagicMock, patch class TestIOErrorCounter(AgentTestCase): diff --git a/tests/utils/test_shell_util.py b/tests/common/utils/test_shell_util.py similarity index 99% rename from tests/utils/test_shell_util.py rename to tests/common/utils/test_shell_util.py index 83082bf7e7..3c6afc60e6 100644 --- a/tests/utils/test_shell_util.py +++ b/tests/common/utils/test_shell_util.py @@ -24,8 +24,8 @@ from azurelinuxagent.common.future import ustr import azurelinuxagent.common.utils.shellutil as shellutil -from tests.tools import AgentTestCase, patch -from tests.utils.miscellaneous_tools import wait_for, format_processes +from tests.lib.tools import AgentTestCase, patch +from tests.lib.miscellaneous_tools import wait_for, format_processes class ShellQuoteTestCase(AgentTestCase): diff --git a/tests/utils/test_text_util.py b/tests/common/utils/test_text_util.py similarity index 99% rename from tests/utils/test_text_util.py rename to tests/common/utils/test_text_util.py index ff129c40be..5029cfb921 100644 --- a/tests/utils/test_text_util.py +++ b/tests/common/utils/test_text_util.py @@ -22,7 +22,7 @@ import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.future import ustr -from tests.tools import AgentTestCase +from tests.lib.tools import AgentTestCase class TestTextUtil(AgentTestCase): diff --git a/tests/daemon/test_daemon.py b/tests/daemon/test_daemon.py index b5a75902ba..4b34ddec73 100644 --- a/tests/daemon/test_daemon.py +++ b/tests/daemon/test_daemon.py @@ -22,7 +22,7 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.daemon.main import OPENSSL_FIPS_ENVIRONMENT, get_daemon_handler from azurelinuxagent.pa.provision.default import ProvisionHandler -from tests.tools import AgentTestCase, Mock, patch +from tests.lib.tools import AgentTestCase, Mock, patch class MockDaemonCall(object): diff --git a/tests/daemon/test_resourcedisk.py b/tests/daemon/test_resourcedisk.py index 301ac695e4..0927414424 100644 --- a/tests/daemon/test_resourcedisk.py +++ b/tests/daemon/test_resourcedisk.py @@ -15,10 +15,15 @@ # Requires Python 2.6+ and Openssl 1.0+ # +import os +import stat +import sys import unittest -from tests.tools import AgentTestCase, patch, DEFAULT +from tests.lib.tools import AgentTestCase, patch, DEFAULT +from azurelinuxagent.daemon.resourcedisk import get_resourcedisk_handler from azurelinuxagent.daemon.resourcedisk.default import ResourceDiskHandler +from azurelinuxagent.common.utils import shellutil class TestResourceDisk(AgentTestCase): @@ -80,6 +85,121 @@ def run_side_effect(*args, **kwargs): # pylint: disable=unused-argument size_mb=size_mb ) + def test_mkfile(self): + # setup + test_file = os.path.join(self.tmp_dir, 'test_file') + file_size = 1024 * 128 + if os.path.exists(test_file): + os.remove(test_file) + + # execute + get_resourcedisk_handler().mkfile(test_file, file_size) + + # assert + assert os.path.exists(test_file) + + # only the owner should have access + mode = os.stat(test_file).st_mode & ( + stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) + assert mode == stat.S_IRUSR | stat.S_IWUSR + + # cleanup + os.remove(test_file) + + def test_mkfile_dd_fallback(self): + with patch.object(shellutil, "run") as run_patch: + # setup + run_patch.return_value = 1 + test_file = os.path.join(self.tmp_dir, 'test_file') + file_size = 1024 * 128 + + # execute + if sys.version_info >= (3, 3): + with patch("os.posix_fallocate", + side_effect=Exception('failure')): + get_resourcedisk_handler().mkfile(test_file, file_size) + else: + get_resourcedisk_handler().mkfile(test_file, file_size) + + # assert + assert run_patch.call_count > 1 + assert "fallocate" in run_patch.call_args_list[0][0][0] + assert "dd if" in run_patch.call_args_list[-1][0][0] + + def test_mkfile_xfs_fs(self): + # setup + test_file = os.path.join(self.tmp_dir, 'test_file') + file_size = 1024 * 128 + if os.path.exists(test_file): + os.remove(test_file) + + # execute + resource_disk_handler = get_resourcedisk_handler() + resource_disk_handler.fs = 'xfs' + + with patch.object(shellutil, "run") as run_patch: + resource_disk_handler.mkfile(test_file, file_size) + + # assert + if sys.version_info >= (3, 3): + with patch("os.posix_fallocate") as posix_fallocate: + self.assertEqual(0, posix_fallocate.call_count) + + assert run_patch.call_count == 1 + assert "dd if" in run_patch.call_args_list[0][0][0] + + def test_change_partition_type(self): + resource_handler = get_resourcedisk_handler() + # test when sfdisk --part-type does not exist + with patch.object(shellutil, "run_get_output", + side_effect=[[1, ''], [0, '']]) as run_patch: + resource_handler.change_partition_type( + suppress_message=True, option_str='') + + # assert + assert run_patch.call_count == 2 + assert "sfdisk --part-type" in run_patch.call_args_list[0][0][0] + assert "sfdisk -c" in run_patch.call_args_list[1][0][0] + + # test when sfdisk --part-type exists + with patch.object(shellutil, "run_get_output", + side_effect=[[0, '']]) as run_patch: + resource_handler.change_partition_type( + suppress_message=True, option_str='') + + # assert + assert run_patch.call_count == 1 + assert "sfdisk --part-type" in run_patch.call_args_list[0][0][0] + + def test_check_existing_swap_file(self): + test_file = os.path.join(self.tmp_dir, 'test_swap_file') + file_size = 1024 * 128 + if os.path.exists(test_file): + os.remove(test_file) + + with open(test_file, "wb") as file: # pylint: disable=redefined-builtin + file.write(bytearray(file_size)) + + os.chmod(test_file, stat.S_ISUID | stat.S_ISGID | stat.S_IRUSR | + stat.S_IWUSR | stat.S_IRWXG | stat.S_IRWXO) # 0o6677 + + def swap_on(_): # mimic the output of "swapon -s" + return [ + "Filename Type Size Used Priority", + "{0} partition 16498684 0 -2".format(test_file) + ] + + with patch.object(shellutil, "run_get_output", side_effect=swap_on): + get_resourcedisk_handler().check_existing_swap_file( + test_file, test_file, file_size) + + # it should remove access from group, others + mode = os.stat(test_file).st_mode & (stat.S_ISUID | stat.S_ISGID | + stat.S_IRWXU | stat.S_IWUSR | stat.S_IRWXG | stat.S_IRWXO) # 0o6777 + assert mode == stat.S_ISUID | stat.S_ISGID | stat.S_IRUSR | stat.S_IWUSR # 0o6600 + + os.remove(test_file) + if __name__ == '__main__': unittest.main() diff --git a/tests/distro/test_scvmm.py b/tests/daemon/test_scvmm.py similarity index 98% rename from tests/distro/test_scvmm.py rename to tests/daemon/test_scvmm.py index 109a960522..275f3f6e37 100644 --- a/tests/distro/test_scvmm.py +++ b/tests/daemon/test_scvmm.py @@ -26,7 +26,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.osutil.default import DefaultOSUtil from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, Mock, patch +from tests.lib.tools import AgentTestCase, Mock, patch class TestSCVMM(AgentTestCase): diff --git a/tests/distro/test_resourceDisk.py b/tests/distro/test_resourceDisk.py deleted file mode 100644 index 04acd39152..0000000000 --- a/tests/distro/test_resourceDisk.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.6+ and Openssl 1.0+ -# -# Implements parts of RFC 2131, 1541, 1497 and -# http://msdn.microsoft.com/en-us/library/cc227282%28PROT.10%29.aspx -# http://msdn.microsoft.com/en-us/library/cc227259%28PROT.13%29.aspx - -import os -import stat -import sys -import unittest -from azurelinuxagent.common.utils import shellutil -from azurelinuxagent.daemon.resourcedisk import get_resourcedisk_handler -from tests.tools import AgentTestCase, patch - - -class TestResourceDisk(AgentTestCase): - def test_mkfile(self): - # setup - test_file = os.path.join(self.tmp_dir, 'test_file') - file_size = 1024 * 128 - if os.path.exists(test_file): - os.remove(test_file) - - # execute - get_resourcedisk_handler().mkfile(test_file, file_size) - - # assert - assert os.path.exists(test_file) - - # only the owner should have access - mode = os.stat(test_file).st_mode & ( - stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) - assert mode == stat.S_IRUSR | stat.S_IWUSR - - # cleanup - os.remove(test_file) - - def test_mkfile_dd_fallback(self): - with patch.object(shellutil, "run") as run_patch: - # setup - run_patch.return_value = 1 - test_file = os.path.join(self.tmp_dir, 'test_file') - file_size = 1024 * 128 - - # execute - if sys.version_info >= (3, 3): - with patch("os.posix_fallocate", - side_effect=Exception('failure')): - get_resourcedisk_handler().mkfile(test_file, file_size) - else: - get_resourcedisk_handler().mkfile(test_file, file_size) - - # assert - assert run_patch.call_count > 1 - assert "fallocate" in run_patch.call_args_list[0][0][0] - assert "dd if" in run_patch.call_args_list[-1][0][0] - - def test_mkfile_xfs_fs(self): - # setup - test_file = os.path.join(self.tmp_dir, 'test_file') - file_size = 1024 * 128 - if os.path.exists(test_file): - os.remove(test_file) - - # execute - resource_disk_handler = get_resourcedisk_handler() - resource_disk_handler.fs = 'xfs' - - with patch.object(shellutil, "run") as run_patch: - resource_disk_handler.mkfile(test_file, file_size) - - # assert - if sys.version_info >= (3, 3): - with patch("os.posix_fallocate") as posix_fallocate: - self.assertEqual(0, posix_fallocate.call_count) - - assert run_patch.call_count == 1 - assert "dd if" in run_patch.call_args_list[0][0][0] - - def test_change_partition_type(self): - resource_handler = get_resourcedisk_handler() - # test when sfdisk --part-type does not exist - with patch.object(shellutil, "run_get_output", - side_effect=[[1, ''], [0, '']]) as run_patch: - resource_handler.change_partition_type( - suppress_message=True, option_str='') - - # assert - assert run_patch.call_count == 2 - assert "sfdisk --part-type" in run_patch.call_args_list[0][0][0] - assert "sfdisk -c" in run_patch.call_args_list[1][0][0] - - # test when sfdisk --part-type exists - with patch.object(shellutil, "run_get_output", - side_effect=[[0, '']]) as run_patch: - resource_handler.change_partition_type( - suppress_message=True, option_str='') - - # assert - assert run_patch.call_count == 1 - assert "sfdisk --part-type" in run_patch.call_args_list[0][0][0] - - def test_check_existing_swap_file(self): - test_file = os.path.join(self.tmp_dir, 'test_swap_file') - file_size = 1024 * 128 - if os.path.exists(test_file): - os.remove(test_file) - - with open(test_file, "wb") as file: # pylint: disable=redefined-builtin - file.write(bytearray(file_size)) - - os.chmod(test_file, stat.S_ISUID | stat.S_ISGID | stat.S_IRUSR | - stat.S_IWUSR | stat.S_IRWXG | stat.S_IRWXO) # 0o6677 - - def swap_on(_): # mimic the output of "swapon -s" - return [ - "Filename Type Size Used Priority", - "{0} partition 16498684 0 -2".format(test_file) - ] - - with patch.object(shellutil, "run_get_output", side_effect=swap_on): - get_resourcedisk_handler().check_existing_swap_file( - test_file, test_file, file_size) - - # it should remove access from group, others - mode = os.stat(test_file).st_mode & (stat.S_ISUID | stat.S_ISGID | - stat.S_IRWXU | stat.S_IWUSR | stat.S_IRWXG | stat.S_IRWXO) # 0o6777 - assert mode == stat.S_ISUID | stat.S_ISGID | stat.S_IRUSR | stat.S_IWUSR # 0o6600 - - os.remove(test_file) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 35ec714264..a78471df08 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -13,10 +13,10 @@ from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.guestagent import GAUpdateReportState from tests.ga.test_update import UpdateTestCase -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.mockwiredata import DATA_FILE -from tests.tools import clear_singleton_instances, load_bin_data, patch +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.wire_protocol_data import DATA_FILE +from tests.lib.tools import clear_singleton_instances, load_bin_data, patch class TestAgentUpdate(UpdateTestCase): diff --git a/tests/ga/test_collect_logs.py b/tests/ga/test_collect_logs.py index 14593726d6..fa0add0153 100644 --- a/tests/ga/test_collect_logs.py +++ b/tests/ga/test_collect_logs.py @@ -25,10 +25,10 @@ from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed, \ get_log_collector_monitor_handler -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE -from tests.tools import Mock, MagicMock, patch, AgentTestCase, clear_singleton_instances, skip_if_predicate_true, \ +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE +from tests.lib.tools import Mock, MagicMock, patch, AgentTestCase, clear_singleton_instances, skip_if_predicate_true, \ is_python_version_26, data_dir diff --git a/tests/ga/test_collect_telemetry_events.py b/tests/ga/test_collect_telemetry_events.py index bdd763effb..509af2ceff 100644 --- a/tests/ga/test_collect_telemetry_events.py +++ b/tests/ga/test_collect_telemetry_events.py @@ -36,8 +36,8 @@ CommonTelemetryEventSchema from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.collect_telemetry_events import ExtensionEventSchema, _ProcessExtensionEvents -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.tools import AgentTestCase, clear_singleton_instances, data_dir +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.tools import AgentTestCase, clear_singleton_instances, data_dir class TestExtensionTelemetryHandler(AgentTestCase, HttpRequestPredicates): diff --git a/tests/ga/test_env.py b/tests/ga/test_env.py index aa4b74ab1c..29ca6fec1e 100644 --- a/tests/ga/test_env.py +++ b/tests/ga/test_env.py @@ -19,7 +19,7 @@ from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.osutil.default import DefaultOSUtil, shellutil from azurelinuxagent.ga.env import MonitorDhcpClientRestart -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class MonitorDhcpClientRestartTestCase(AgentTestCase): diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 76dde881d2..201868fc68 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -50,12 +50,12 @@ get_exthandlers_handler, ExtCommandEnvVariable, HandlerManifest, NOT_RUN, \ ExtensionStatusValue, HANDLER_COMPLETE_NAME_PATTERN, HandlerEnvironment, GoalStateStatus -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_EXT_ADDITIONAL_LOCATIONS -from tests.tools import AgentTestCase, data_dir, MagicMock, Mock, patch, mock_sleep -from tests.ga.extension_emulator import Actions, ExtensionCommandNames, extension_emulator, \ +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE, DATA_FILE_EXT_ADDITIONAL_LOCATIONS +from tests.lib.tools import AgentTestCase, data_dir, MagicMock, Mock, patch, mock_sleep +from tests.lib.extension_emulator import Actions, ExtensionCommandNames, extension_emulator, \ enable_invocations, generate_put_handler # Mocking the original sleep to reduce test execution time @@ -137,7 +137,7 @@ def mock_http_put(url, *args, **_): yield exthandlers_handler, protocol, no_of_extensions def test_cleanup_leaves_installed_extensions(self): - with self._setup_test_env(mockwiredata.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, no_of_exts): + with self._setup_test_env(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, no_of_exts): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -147,7 +147,7 @@ def test_cleanup_leaves_installed_extensions(self): version="1.0.0") def test_cleanup_removes_uninstalled_extensions(self): - with self._setup_test_env(mockwiredata.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, no_of_exts): + with self._setup_test_env(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, no_of_exts): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() self._assert_ext_handler_status(protocol.aggregate_status, "Ready", expected_ext_handler_count=no_of_exts, @@ -167,7 +167,7 @@ def test_cleanup_removes_uninstalled_extensions(self): self.assertEqual(0, TestExtensionCleanup._count_extension_directories(), "All extension directories should be removed") def test_cleanup_removes_orphaned_packages(self): - data_file = mockwiredata.DATA_FILE_NO_EXT.copy() + data_file = wire_protocol_data.DATA_FILE_NO_EXT.copy() data_file["ext_conf"] = "wire/ext_conf_no_extensions-no_status_blob.xml" no_of_orphaned_packages = 5 @@ -197,7 +197,7 @@ def test_cleanup_leaves_failed_extensions(self): def mock_fail_popen(*args, **kwargs): # pylint: disable=unused-argument return original_popen("fail_this_command", **kwargs) - with self._setup_test_env(mockwiredata.DATA_FILE_EXT_SINGLE) as (exthandlers_handler, protocol, no_of_exts): + with self._setup_test_env(wire_protocol_data.DATA_FILE_EXT_SINGLE) as (exthandlers_handler, protocol, no_of_exts): with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", mock_fail_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -235,7 +235,7 @@ def assert_extension_seq_no(expected_seq_no): self.assertEqual(expected_seq_no, handler_status['runtimeSettingsStatus']['sequenceNumber'], "Sequence number mismatch") - with self._setup_test_env(mockwiredata.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, orig_no_of_exts): + with self._setup_test_env(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) as (exthandlers_handler, protocol, orig_no_of_exts): # Run 1 - GS has no required features and contains 5 extensions exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -249,7 +249,7 @@ def assert_extension_seq_no(expected_seq_no): # Run 2 - Change the GS to one with Required features not supported by the agent # This ExtensionConfig has 1 extension - ExampleHandlerLinuxWithRequiredFeatures - protocol.mock_wire_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_REQUIRED_FEATURES) + protocol.mock_wire_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_REQUIRED_FEATURES) protocol.mock_wire_data.set_incarnation(2) protocol.mock_wire_data.set_extensions_config_sequence_number(random.randint(10, 100)) protocol.client.update_goal_state() @@ -272,7 +272,7 @@ def assert_extension_seq_no(expected_seq_no): # Run 3 - Run a GS with no Required Features and ensure we execute all extensions properly # This ExtensionConfig has 1 extension - OSTCExtensions.ExampleHandlerLinux - protocol.mock_wire_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + protocol.mock_wire_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) protocol.mock_wire_data.set_incarnation(3) extension_seq_no = random.randint(10, 100) protocol.mock_wire_data.set_extensions_config_sequence_number(extension_seq_no) @@ -495,7 +495,7 @@ def _set_up_update_test_and_update_gs(self, patch_command, *args): :param args: Any additional args passed to the function, needed for creating a mock for handler and protocol :return: test_data, exthandlers_handler, protocol """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter # Ensure initial install and enable is successful @@ -524,7 +524,7 @@ def _create_extension_handlers_handler(protocol): def test_ext_handler(self, *args): # Test enable scenario. - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -612,7 +612,7 @@ def _assert_handler_status_and_manifest_download_count(protocol, test_data, mani self.assertEqual(test_data.call_counts['manifest.xml'], manifest_count, "We should have downloaded extension manifest {0} times".format(manifest_count)) - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -634,7 +634,7 @@ def test_it_should_fail_handler_on_bad_extension_config_and_report_error(self, m for bad_config_file_path in os.listdir(invalid_config_dir): bad_conf = DATA_FILE.copy() bad_conf["ext_conf"] = os.path.join(invalid_config_dir, bad_config_file_path) - test_data = mockwiredata.WireProtocolData(bad_conf) + test_data = wire_protocol_data.WireProtocolData(bad_conf) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) with patch('azurelinuxagent.ga.exthandlers.add_event') as patch_add_event: @@ -651,7 +651,7 @@ def test_it_should_process_valid_extensions_if_present(self, mock_get, mock_cryp bad_conf = DATA_FILE.copy() bad_conf["ext_conf"] = os.path.join("wire", "ext_conf_invalid_and_valid_handlers.xml") - test_data = mockwiredata.WireProtocolData(bad_conf) + test_data = wire_protocol_data.WireProtocolData(bad_conf) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) exthandlers_handler.run() @@ -675,7 +675,7 @@ def test_it_should_process_valid_extensions_if_present(self, mock_get, mock_cryp def test_it_should_ignore_case_when_parsing_plugin_settings(self, mock_get, mock_crypt_util, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_CASE_MISMATCH_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_CASE_MISMATCH_EXT) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) exthandlers_handler.run() @@ -704,7 +704,7 @@ def test_it_should_ignore_case_when_parsing_plugin_settings(self, mock_get, mock self.assertEqual(0, len(expected_ext_handlers), "All handlers not reported") def test_ext_handler_no_settings(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_NO_SETTINGS) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_NO_SETTINGS) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter test_ext = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux") @@ -734,7 +734,7 @@ def test_ext_handler_no_settings(self, *args): ) def test_ext_handler_no_public_settings(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_NO_PUBLIC) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_NO_PUBLIC) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -742,7 +742,7 @@ def test_ext_handler_no_public_settings(self, *args): self._assert_handler_status(protocol.report_vm_status, "Ready", 1, "1.0.0") def test_ext_handler_no_ext(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_NO_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_NO_EXT) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter # Assert no extension handler status @@ -752,7 +752,7 @@ def test_ext_handler_no_ext(self, *args): def test_ext_handler_sequencing(self, *args): # Test enable scenario. - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter dep_ext_level_2 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux") @@ -881,7 +881,7 @@ def test_it_should_process_sequencing_properly_even_if_no_settings_for_dependent self, mock_get, mock_crypt, *args): test_data_file = DATA_FILE.copy() test_data_file["ext_conf"] = "wire/ext_conf_dependencies_with_empty_settings.xml" - test_data = mockwiredata.WireProtocolData(test_data_file) + test_data = wire_protocol_data.WireProtocolData(test_data_file) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt, *args) ext_1 = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux") @@ -910,7 +910,7 @@ def test_it_should_process_sequencing_properly_even_if_no_settings_for_dependent ) def test_ext_handler_sequencing_should_fail_if_handler_failed(self, mock_get, mock_crypt, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt, *args) original_popen = subprocess.Popen @@ -986,7 +986,7 @@ def mock_fail_extension_commands(args, **kwargs): ) def test_ext_handler_sequencing_default_dependency_level(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=unused-variable,no-value-for-parameter exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -995,7 +995,7 @@ def test_ext_handler_sequencing_default_dependency_level(self, *args): self.assertEqual(exthandlers_handler.ext_handlers[0].settings[0].dependencyLevel, 0) def test_ext_handler_sequencing_invalid_dependency_level(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING) test_data.set_incarnation(2) test_data.set_extensions_config_sequence_number(1) test_data.ext_conf = test_data.ext_conf.replace("dependencyLevel=\"1\"", @@ -1012,7 +1012,7 @@ def test_ext_handler_sequencing_invalid_dependency_level(self, *args): def test_ext_handler_rollingupgrade(self, *args): # Test enable scenario. - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_ROLLINGUPGRADE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_ROLLINGUPGRADE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -1123,7 +1123,7 @@ def test_it_should_create_extension_events_dir_and_set_handler_environment_only_ with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", enable_extensions): # Create new object for each run to force re-installation of extensions as we # only create handler_environment on installation - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_MULTIPLE_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -1154,7 +1154,7 @@ def test_it_should_create_extension_events_dir_and_set_handler_environment_only_ shutil.rmtree(tmp_lib_dir, ignore_errors=True) def test_it_should_not_delete_extension_events_directory_on_extension_uninstall(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): @@ -1178,7 +1178,7 @@ def test_it_should_not_delete_extension_events_directory_on_extension_uninstall( self.assertTrue(os.path.exists(ehi.get_extension_events_dir()), "Events directory should still exist") def test_it_should_uninstall_unregistered_extensions_properly(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1205,7 +1205,7 @@ def test_it_should_uninstall_unregistered_extensions_properly(self, *args): @patch('azurelinuxagent.common.errorstate.ErrorState.is_triggered') @patch('azurelinuxagent.ga.exthandlers.add_event') def test_ext_handler_report_status_permanent(self, mock_add_event, mock_error_state, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter protocol.report_vm_status = Mock(side_effect=ProtocolError) @@ -1221,7 +1221,7 @@ def test_ext_handler_report_status_permanent(self, mock_add_event, mock_error_st @patch('azurelinuxagent.ga.exthandlers.add_event') def test_ext_handler_report_status_resource_gone(self, mock_add_event, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter protocol.report_vm_status = Mock(side_effect=ResourceGoneError) @@ -1236,7 +1236,7 @@ def test_ext_handler_report_status_resource_gone(self, mock_add_event, *args): @patch('azurelinuxagent.common.errorstate.ErrorState.is_triggered') @patch('azurelinuxagent.ga.exthandlers.add_event') def test_ext_handler_download_failure_permanent_ProtocolError(self, mock_add_event, mock_error_state, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter protocol.get_goal_state().fetch_extension_manifest = Mock(side_effect=ProtocolError) @@ -1254,7 +1254,7 @@ def test_ext_handler_download_failure_permanent_ProtocolError(self, mock_add_eve @patch('azurelinuxagent.ga.exthandlers.fileutil') def test_ext_handler_io_error(self, mock_fileutil, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=unused-variable,no-value-for-parameter mock_fileutil.write_file.return_value = IOError("Mock IO Error") @@ -1277,7 +1277,7 @@ def _assert_ext_status(self, vm_agent_status, expected_status, self.assertIn(expected_msg, ext_status.message) def test_it_should_initialise_and_use_command_execution_log_for_extensions(self, mock_get, mock_crypt_util, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1290,7 +1290,7 @@ def test_it_should_initialise_and_use_command_execution_log_for_extensions(self, self.assertGreater(os.path.getsize(command_execution_log), 0, "The file should not be empty") def test_ext_handler_no_reporting_status(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1317,7 +1317,7 @@ def test_wait_for_handler_completion_no_status(self, mock_http_get, mock_crypt_u Expected to retry and eventually report failure for all dependent extensions. """ exthandlers_handler, protocol = self._create_mock( - mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) + wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) original_popen = subprocess.Popen @@ -1371,7 +1371,7 @@ def mock_popen(cmd, *_, **kwargs): aks_test_mock = DATA_FILE.copy() aks_test_mock["ext_conf"] = "wire/ext_conf_aks_extension.xml" - exthandlers_handler, protocol = self._create_mock(mockwiredata.WireProtocolData(aks_test_mock), + exthandlers_handler, protocol = self._create_mock(wire_protocol_data.WireProtocolData(aks_test_mock), mock_http_get, mock_crypt_util, *args) with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): @@ -1405,7 +1405,7 @@ def test_it_should_include_part_of_status_in_ext_handler_message(self, mock_http debugging. """ exthandlers_handler, protocol = self._create_mock( - mockwiredata.WireProtocolData(mockwiredata.DATA_FILE), mock_http_get, mock_crypt_util, *args) + wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE), mock_http_get, mock_crypt_util, *args) original_popen = subprocess.Popen @@ -1440,7 +1440,7 @@ def test_wait_for_handler_completion_success_status(self, mock_http_get, mock_cr Testing depends-on scenario on a successful case. Expected to report the status for both extensions properly. """ exthandlers_handler, protocol = self._create_mock( - mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) + wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1463,7 +1463,7 @@ def test_wait_for_handler_completion_error_status(self, mock_http_get, mock_cryp Expected to return False. """ exthandlers_handler, protocol = self._create_mock( - mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) + wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SEQUENCING), mock_http_get, mock_crypt_util, *args) original_popen = subprocess.Popen @@ -1491,7 +1491,7 @@ def test_get_ext_handling_status(self, *args): Testing get_ext_handling_status() function with various cases and verifying against the expected values """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=unused-variable,no-value-for-parameter handler_name = "Handler" @@ -1534,7 +1534,7 @@ def test_is_ext_handling_complete(self, *args): Testing is_ext_handling_complete() with various input and verifying against the expected output values. """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=unused-variable,no-value-for-parameter handler_name = "Handler" @@ -1572,18 +1572,18 @@ def test_ext_handler_version_decide_autoupgrade_internalversion(self, *args): config_version = '1.3.0' decision_version = '1.3.0' if autoupgrade: - datafile = mockwiredata.DATA_FILE_EXT_AUTOUPGRADE_INTERNALVERSION + datafile = wire_protocol_data.DATA_FILE_EXT_AUTOUPGRADE_INTERNALVERSION else: - datafile = mockwiredata.DATA_FILE_EXT_INTERNALVERSION + datafile = wire_protocol_data.DATA_FILE_EXT_INTERNALVERSION else: config_version = '1.0.0' decision_version = '1.0.0' if autoupgrade: - datafile = mockwiredata.DATA_FILE_EXT_AUTOUPGRADE + datafile = wire_protocol_data.DATA_FILE_EXT_AUTOUPGRADE else: - datafile = mockwiredata.DATA_FILE + datafile = wire_protocol_data.DATA_FILE - _, protocol = self._create_mock(mockwiredata.WireProtocolData(datafile), *args) # pylint: disable=no-value-for-parameter + _, protocol = self._create_mock(wire_protocol_data.WireProtocolData(datafile), *args) # pylint: disable=no-value-for-parameter ext_handlers = protocol.get_goal_state().extensions_goal_state.extensions self.assertEqual(1, len(ext_handlers)) ext_handler = ext_handlers[0] @@ -1616,7 +1616,7 @@ def test_ext_handler_version_decide_between_minor_versions(self, *args): (None, '4.1', '4.1.0.0'), ] - _, protocol = self._create_mock(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE), *args) # pylint: disable=no-value-for-parameter + _, protocol = self._create_mock(wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE), *args) # pylint: disable=no-value-for-parameter version_uri = 'http://mock-goal-state/Microsoft.OSTCExtensions_ExampleHandlerLinux_asiaeast_manifest.xml' for (installed_version, config_version, expected_version) in cases: @@ -1635,7 +1635,7 @@ def test_ext_handler_version_decide_between_minor_versions(self, *args): @patch('azurelinuxagent.common.conf.get_extensions_enabled', return_value=False) def test_extensions_disabled(self, _, *args): # test status is reported for no extensions - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_NO_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_NO_EXT) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -1644,7 +1644,7 @@ def test_extensions_disabled(self, _, *args): self._assert_no_handler_status(protocol.report_vm_status) # test status is reported, but extensions are not processed - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1653,7 +1653,7 @@ def test_extensions_disabled(self, _, *args): def test_extensions_deleted(self, *args): # Ensure initial enable is successful - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_DELETION) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_DELETION) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter exthandlers_handler.run() @@ -1682,7 +1682,7 @@ def test_install_failure(self, patch_get_install_command, patch_install, *args): """ When extension install fails, the operation should not be retried. """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter # Ensure initial install is unsuccessful @@ -1700,7 +1700,7 @@ def test_install_failure_check_exception_handling(self, patch_get_install_comman """ When extension install fails, the operation should be reported to our telemetry service. """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter # Ensure install is unsuccessful @@ -1717,7 +1717,7 @@ def test_enable_failure_check_exception_handling(self, patch_get_enable_command, """ When extension enable fails, the operation should be reported. """ - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter # Ensure initial install is successful, but enable fails @@ -1737,7 +1737,7 @@ def test_disable_failure_with_exception_handling(self, patch_get_disable_command When extension disable fails, the operation should be reported. """ # Ensure initial install and enable is successful, but disable fails - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter patch_get_disable_command.call_count = 0 patch_get_disable_command.return_value = "exit 1" @@ -1768,7 +1768,7 @@ def test_uninstall_failure(self, patch_get_uninstall_command, *args): When extension uninstall fails, the operation should not be retried. """ # Ensure initial install and enable is successful, but uninstall fails - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter patch_get_uninstall_command.call_count = 0 patch_get_uninstall_command.return_value = "exit 1" @@ -2131,7 +2131,7 @@ def test_uninstall_rc_env_var_should_report_not_run_for_non_update_calls_to_exth self._assert_ext_status(protocol.report_vm_status, "success", 0) def test_ext_path_and_version_env_variables_set_for_ever_operation(self, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter with patch.object(CGroupConfigurator.get_instance(), "start_extension_command") as patch_start_cmd: @@ -2150,7 +2150,7 @@ def test_ext_path_and_version_env_variables_set_for_ever_operation(self, *args): @patch("azurelinuxagent.common.cgroupconfigurator.handle_process_completion", side_effect="Process Successful") def test_ext_sequence_no_should_be_set_for_every_command_call(self, _, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_MULTIPLE_EXT) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter with patch("subprocess.Popen") as patch_popen: @@ -2205,7 +2205,7 @@ def test_ext_sequence_no_should_be_set_from_within_extension(self, *args): os.mkdir(base_dir) self.create_script(os.path.join(base_dir, test_file_name), test_file) - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_EXT_SINGLE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_EXT_SINGLE) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=unused-variable,no-value-for-parameter expected_seq_no = 0 @@ -2297,7 +2297,7 @@ def test_correct_exit_code_should_be_set_on_uninstall_cmd_failure(self, *args): self.assertIn("%s=%s" % (ExtCommandEnvVariable.UninstallReturnCode, exit_code), enable_kwargs['message']) def test_it_should_persist_goal_state_aggregate_status_until_new_incarnation(self, mock_get, mock_crypt_util, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) exthandlers_handler.run() @@ -2325,7 +2325,7 @@ def test_it_should_persist_goal_state_aggregate_status_until_new_incarnation(sel self.assertEqual(new_gs_aggregate_status.in_svd_seq_no, "2", "Incorrect seq no") def test_it_should_parse_required_features_properly(self, mock_get, mock_crypt_util, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_REQUIRED_FEATURES) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_REQUIRED_FEATURES) _, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) required_features = protocol.get_goal_state().extensions_goal_state.required_features @@ -2334,7 +2334,7 @@ def test_it_should_parse_required_features_properly(self, mock_get, mock_crypt_u self.assertEqual(feature, "TestRequiredFeature{0}".format(i+1), "Name mismatch") def test_it_should_fail_goal_state_if_required_features_not_supported(self, mock_get, mock_crypt_util, *args): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE_REQUIRED_FEATURES) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_REQUIRED_FEATURES) exthandlers_handler, protocol = self._create_mock(test_data, mock_get, mock_crypt_util, *args) exthandlers_handler.run() @@ -2355,7 +2355,7 @@ def test_it_should_fail_goal_state_if_required_features_not_supported(self, mock class TestExtensionSequencing(AgentTestCase): def _create_mock(self, mock_http_get, MockCryptUtil): - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE) # Mock protocol to return test data mock_http_get.side_effect = test_data.mock_http_get @@ -3211,7 +3211,7 @@ def tearDown(self): @patch('time.gmtime', MagicMock(return_value=time.gmtime(0))) @patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("0.0.0.0")) def test_ext_handler_reporting_status_file(self, _): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: def mock_http_put(url, *args, **_): if HttpRequestPredicates.is_host_plugin_status_request(url): @@ -3346,7 +3346,7 @@ def http_get_handler(url, *_, **kwargs): mock_in_vm_artifacts_profile_response = MockHttpResponse(200, body='{ "onHold": false }'.encode('utf-8')) - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE, http_get_handler=http_get_handler) as protocol: protocol.report_vm_status = MagicMock() exthandlers_handler = get_exthandlers_handler(protocol) @@ -3388,7 +3388,7 @@ def http_get_handler(url, *_, **kwargs): def test_it_should_process_extensions_appropriately_on_artifact_hold(self): with patch('time.sleep', side_effect=lambda _: mock_sleep(0.001)): with patch("azurelinuxagent.common.conf.get_enable_overprovisioning", return_value=True): - with mock_wire_protocol(mockwiredata.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_IN_VM_ARTIFACTS_PROFILE) as protocol: protocol.report_vm_status = MagicMock() exthandlers_handler = get_exthandlers_handler(protocol) # @@ -3427,7 +3427,7 @@ def test_it_should_redact_access_tokens_in_extension_output(self): TWO:HTTPS://bar.blob.core.com/foo/bar/foo.txt? TWO https://bar.com/foo?uid=2018&sr=b THREE''' - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: exthandlers_handler = get_exthandlers_handler(protocol) original_popen = subprocess.Popen diff --git a/tests/ga/test_exthandlers.py b/tests/ga/test_exthandlers.py index 67b0771779..10303ce7a1 100644 --- a/tests/ga/test_exthandlers.py +++ b/tests/ga/test_exthandlers.py @@ -32,8 +32,8 @@ read_output from azurelinuxagent.ga.exthandlers import parse_ext_status, ExtHandlerInstance, ExtCommandEnvVariable, \ ExtensionStatusError, _DEFAULT_SEQ_NO, get_exthandlers_handler, ExtHandlerState -from tests.protocol.mocks import mock_wire_protocol, mockwiredata -from tests.tools import AgentTestCase, patch, mock_sleep, clear_singleton_instances +from tests.lib.mock_wire_protocol import mock_wire_protocol, wire_protocol_data +from tests.lib.tools import AgentTestCase, patch, mock_sleep, clear_singleton_instances class TestExtHandlers(AgentTestCase): @@ -247,7 +247,7 @@ def test_extension_sequence_number(self): expected_sequence_number=-1) def test_it_should_report_error_if_plugin_settings_version_mismatch(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_PLUGIN_SETTINGS_MISMATCH) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_PLUGIN_SETTINGS_MISMATCH) as protocol: with patch("azurelinuxagent.common.protocol.extensions_goal_state_from_extensions_config.add_event") as mock_add_event: # Forcing update of GoalState to allow the ExtConfig to report an event protocol.mock_wire_data.set_incarnation(2) @@ -292,7 +292,7 @@ def heartbeat_with_message(): return {'code': 0, 'formattedMessage': {'lang': 'en-US', 'message': 'This is a heartbeat message'}, 'status': 'ready'} - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: with patch("azurelinuxagent.common.protocol.wire.WireProtocol.report_vm_status", return_value=None): with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.collect_heartbeat", side_effect=heartbeat_with_message): diff --git a/tests/ga/test_exthandlers_download_extension.py b/tests/ga/test_exthandlers_download_extension.py index 556254fa3b..b3ed96a89a 100644 --- a/tests/ga/test_exthandlers_download_extension.py +++ b/tests/ga/test_exthandlers_download_extension.py @@ -10,9 +10,9 @@ from azurelinuxagent.common.protocol.restapi import Extension, ExtHandlerPackage from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.ga.exthandlers import ExtHandlerInstance, ExtHandlerState -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol -from tests.tools import AgentTestCase, patch, Mock +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol +from tests.lib.tools import AgentTestCase, patch, Mock class DownloadExtensionTestCase(AgentTestCase): @@ -42,7 +42,7 @@ def setUp(self): protocol.client.get_artifact_request = Mock(return_value=(None, None)) # create a dummy goal state, since downloads are done via the GoalState class - with mock_wire_protocol(mockwiredata.DATA_FILE) as p: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as p: goal_state = p.get_goal_state() goal_state._wire_client = protocol.client protocol.client._goal_state = goal_state diff --git a/tests/ga/test_exthandlers_exthandlerinstance.py b/tests/ga/test_exthandlers_exthandlerinstance.py index 6295d68d27..846bb89e92 100644 --- a/tests/ga/test_exthandlers_exthandlerinstance.py +++ b/tests/ga/test_exthandlers_exthandlerinstance.py @@ -7,7 +7,7 @@ from azurelinuxagent.common.protocol.restapi import Extension, ExtHandlerPackage from azurelinuxagent.ga.exthandlers import ExtHandlerInstance -from tests.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch class ExtHandlerInstanceTestCase(AgentTestCase): diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py index 81e248bb04..a127341d15 100644 --- a/tests/ga/test_guestagent.py +++ b/tests/ga/test_guestagent.py @@ -9,9 +9,9 @@ from azurelinuxagent.common.protocol.restapi import ExtHandlerPackage from azurelinuxagent.common.version import AGENT_NAME from tests.ga.test_update import UpdateTestCase, EMPTY_MANIFEST, WITH_ERROR, NO_ERROR -from tests.protocol import mockwiredata -from tests.protocol.mocks import MockHttpResponse, mock_wire_protocol -from tests.tools import load_bin_data, patch +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import MockHttpResponse, mock_wire_protocol +from tests.lib.tools import load_bin_data, patch class TestGuestAgent(UpdateTestCase): @@ -155,7 +155,7 @@ def http_get_handler(uri, *_, **__): pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(agent_uri) - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) agent = GuestAgent.from_agent_package(pkg, protocol, False) @@ -176,7 +176,7 @@ def http_get_handler(uri, *_, **__): pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(agent_uri) - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) with patch("azurelinuxagent.ga.guestagent.add_event") as add_event: agent = GuestAgent.from_agent_package(pkg, protocol, False) @@ -201,7 +201,7 @@ def http_get_handler(uri, *_, **__): pkg = ExtHandlerPackage(version="9.9.9.9") pkg.uris.append(agent_uri) - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.set_http_handlers(http_get_handler=http_get_handler) agent = GuestAgent.from_agent_package(pkg, protocol, False) diff --git a/tests/ga/test_monitor.py b/tests/ga/test_monitor.py index 5853b23eff..9b11d81114 100644 --- a/tests/ga/test_monitor.py +++ b/tests/ga/test_monitor.py @@ -30,10 +30,10 @@ from azurelinuxagent.ga.monitor import get_monitor_handler, PeriodicOperation, SendImdsHeartbeat, \ ResetPeriodicLogMessages, SendHostPluginHeartbeat, PollResourceUsage, \ ReportNetworkErrors, ReportNetworkConfigurationChanges, PollSystemWideResourceUsage -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE -from tests.tools import Mock, MagicMock, patch, AgentTestCase, clear_singleton_instances +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE +from tests.lib.tools import Mock, MagicMock, patch, AgentTestCase, clear_singleton_instances def random_generator(size=6, chars=string.ascii_uppercase + string.digits + string.ascii_lowercase): diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 365052f5d4..66b366fd0a 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -13,12 +13,12 @@ from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, ExtensionStatusValue, ExtCommandEnvVariable, \ GoalStateStatus, ExtHandlerInstance -from tests.ga.extension_emulator import enable_invocations, extension_emulator, ExtensionCommandNames, Actions, \ +from tests.lib.extension_emulator import enable_invocations, extension_emulator, ExtensionCommandNames, Actions, \ extract_extension_info_from_command -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE, WireProtocolData -from tests.tools import AgentTestCase, mock_sleep, patch +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE, WireProtocolData +from tests.lib.tools import AgentTestCase, mock_sleep, patch class TestMultiConfigExtensionsConfigParsing(AgentTestCase): diff --git a/tests/ga/test_periodic_operation.py b/tests/ga/test_periodic_operation.py index 8fd8d32dc8..65980a1470 100644 --- a/tests/ga/test_periodic_operation.py +++ b/tests/ga/test_periodic_operation.py @@ -17,7 +17,7 @@ import datetime import time from azurelinuxagent.ga.monitor import PeriodicOperation -from tests.tools import AgentTestCase, patch, PropertyMock +from tests.lib.tools import AgentTestCase, patch, PropertyMock class TestPeriodicOperation(AgentTestCase): diff --git a/tests/ga/test_remoteaccess.py b/tests/ga/test_remoteaccess.py index 069931a157..f0e2ff2665 100644 --- a/tests/ga/test_remoteaccess.py +++ b/tests/ga/test_remoteaccess.py @@ -17,9 +17,9 @@ import xml from azurelinuxagent.common.protocol.goal_state import GoalState, RemoteAccess # pylint: disable=unused-import -from tests.tools import AgentTestCase, load_data, patch, Mock # pylint: disable=unused-import -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol +from tests.lib.tools import AgentTestCase, load_data, patch, Mock # pylint: disable=unused-import +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol class TestRemoteAccess(AgentTestCase): @@ -34,7 +34,7 @@ def test_parse_remote_access(self): self.assertEqual("2019-01-01", remote_access.user_list.users[0].expiration, "Expiration does not match.") def test_goal_state_with_no_remote_access(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: self.assertIsNone(protocol.client.get_remote_access()) def test_parse_two_remote_access_accounts(self): @@ -75,7 +75,7 @@ def test_parse_zero_remote_access_accounts(self): self.assertEqual(0, len(remote_access.user_list.users), "User count does not match.") def test_update_remote_access_conf_remote_access(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_REMOTE_ACCESS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_REMOTE_ACCESS) as protocol: self.assertIsNotNone(protocol.client.get_remote_access()) self.assertEqual(1, len(protocol.client.get_remote_access().user_list.users)) self.assertEqual('testAccount', protocol.client.get_remote_access().user_list.users[0].name) diff --git a/tests/ga/test_remoteaccess_handler.py b/tests/ga/test_remoteaccess_handler.py index 37187702e3..d4f1579260 100644 --- a/tests/ga/test_remoteaccess_handler.py +++ b/tests/ga/test_remoteaccess_handler.py @@ -22,9 +22,9 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler -from tests.tools import AgentTestCase, load_data, patch, clear_singleton_instances -from tests.protocol.mocks import mock_wire_protocol -from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_REMOTE_ACCESS +from tests.lib.tools import AgentTestCase, load_data, patch, clear_singleton_instances +from tests.lib.mock_wire_protocol import mock_wire_protocol +from tests.lib.wire_protocol_data import DATA_FILE, DATA_FILE_REMOTE_ACCESS class MockOSUtil(DefaultOSUtil): diff --git a/tests/ga/test_report_status.py b/tests/ga/test_report_status.py index 1dcfe33edc..370bcb60f8 100644 --- a/tests/ga/test_report_status.py +++ b/tests/ga/test_report_status.py @@ -7,11 +7,11 @@ from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.update import get_update_handler -from tests.ga.mocks import mock_update_handler -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.tools import AgentTestCase, patch -from tests.protocol import mockwiredata -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.lib.mock_update_handler import mock_update_handler +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.tools import AgentTestCase, patch +from tests.lib import wire_protocol_data +from tests.lib.http_request_predicates import HttpRequestPredicates class ReportStatusTestCase(AgentTestCase): @@ -32,7 +32,7 @@ def http_get_handler(url, *_, **__): def on_new_iteration(iteration): fail_goal_state_request[0] = iteration == 2 - with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE, http_get_handler=http_get_handler) as protocol: exthandlers_handler = ExtHandlersHandler(protocol) with patch.object(exthandlers_handler, "run", wraps=exthandlers_handler.run) as exthandlers_handler_run: with mock_update_handler(protocol, iterations=2, on_new_iteration=on_new_iteration, exthandlers_handler=exthandlers_handler) as update_handler: @@ -75,7 +75,7 @@ def remove_timestamps(x): self.assertEqual(first_status, second_status) def test_report_status_should_log_errors_only_once_per_goal_state(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=False): # skip agent update with patch("azurelinuxagent.ga.update.logger.warn") as logger_warn: with patch("azurelinuxagent.common.version.get_daemon_version", return_value=FlexibleVersion("2.2.53")): @@ -101,7 +101,7 @@ def test_report_status_should_log_errors_only_once_per_goal_state(self): self.assertEqual(2, len(get_warnings()), "UpdateHandler._report_status() should continue reporting errors after a new goal state") def test_update_handler_should_add_fast_track_to_supported_features_when_it_is_supported(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: self._test_supported_features_includes_fast_track(protocol, True) def test_update_handler_should_not_add_fast_track_to_supported_features_when_it_is_not_supported(self): @@ -110,7 +110,7 @@ def http_get_handler(url, *_, **__): return MockHttpResponse(status=404) return None - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: self._test_supported_features_includes_fast_track(protocol, False) def _test_supported_features_includes_fast_track(self, protocol, expected): diff --git a/tests/ga/test_send_telemetry_events.py b/tests/ga/test_send_telemetry_events.py index 005a07b09c..c9e04a38ca 100644 --- a/tests/ga/test_send_telemetry_events.py +++ b/tests/ga/test_send_telemetry_events.py @@ -42,11 +42,11 @@ from azurelinuxagent.ga.collect_telemetry_events import _CollectAndEnqueueEvents from azurelinuxagent.ga.send_telemetry_events import get_send_telemetry_events_handler from tests.ga.test_monitor import random_generator -from tests.protocol.mocks import MockHttpResponse, mock_wire_protocol -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.protocol.mockwiredata import DATA_FILE -from tests.tools import AgentTestCase, clear_singleton_instances, mock_sleep -from tests.utils.event_logger_tools import EventLoggerTools +from tests.lib.mock_wire_protocol import MockHttpResponse, mock_wire_protocol +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.wire_protocol_data import DATA_FILE +from tests.lib.tools import AgentTestCase, clear_singleton_instances, mock_sleep +from tests.lib.event_logger_tools import EventLoggerTools class TestSendTelemetryEventsHandler(AgentTestCase, HttpRequestPredicates): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index e342fec7d3..ef2e3e66e5 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -49,13 +49,13 @@ get_update_handler, ORPHAN_POLL_INTERVAL, AGENT_PARTITION_FILE, ORPHAN_WAIT_INTERVAL, \ CHILD_LAUNCH_RESTART_MAX, CHILD_HEALTH_INTERVAL, GOAL_STATE_PERIOD_EXTENSIONS_DISABLED, UpdateHandler, \ READONLY_FILE_GLOBS, ExtensionsSummary -from tests.ga.mocks import mock_update_handler -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse -from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS -from tests.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ +from tests.lib.mock_update_handler import mock_update_handler +from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse +from tests.lib.wire_protocol_data import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS +from tests.lib.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ clear_singleton_instances, is_python_version_26_or_34, skip_if_predicate_true -from tests.protocol import mockwiredata -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.lib import wire_protocol_data +from tests.lib.http_request_predicates import HttpRequestPredicates NO_ERROR = { @@ -1504,7 +1504,7 @@ def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VE "Guest Agent should be reported as Ready") def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file, iterations=10) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1543,7 +1543,7 @@ def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): "New agent directory should not be found") def test_it_should_download_only_requested_version_if_available(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1553,7 +1553,7 @@ def test_it_should_download_only_requested_version_if_available(self): self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"]) def test_it_should_download_largest_version_if_ga_versioning_disabled(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): with patch.object(conf, "get_enable_ga_versioning", return_value=False): @@ -1564,7 +1564,7 @@ def test_it_should_download_largest_version_if_ga_versioning_disabled(self): self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0"]) def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" # Set the test environment by adding 20 random agents to the agent directory @@ -1580,7 +1580,7 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): self.prepare_agents(1) - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version_missing_in_manifest.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1628,7 +1628,7 @@ def reload_conf(url, protocol): reload_conf.call_count = 0 reload_conf.incarnation = 2 - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) @@ -1673,7 +1673,7 @@ def reload_conf(url, protocol): reload_conf.call_count = 0 reload_conf.incarnation = 2 - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) @@ -1712,7 +1712,7 @@ def reload_conf(url, protocol): reload_conf.call_count = 0 reload_conf.incarnation = 2 - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() # This is to fail the agent update at first attempt so that agent doesn't go through update data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, @@ -1752,7 +1752,7 @@ def reload_conf(url, protocol): reload_conf.call_count = 0 reload_conf.incarnation = 2 - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, hotfix_frequency=0.001, normal_frequency=0.001) as (update_handler, mock_telemetry): @@ -1765,7 +1765,7 @@ def reload_conf(url, protocol): self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) def test_it_should_not_download_anything_if_requested_version_is_current_version(self): - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" # Set the test environment by adding 20 random agents to the agent directory @@ -1803,7 +1803,7 @@ def reload_conf(url, protocol): reload_conf.call_count = 0 - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) @@ -1824,7 +1824,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): "The current agent should not be blacklisted") downgraded_version = "2.5.0" - data_file = mockwiredata.DATA_FILE.copy() + data_file = wire_protocol_data.DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) @@ -2047,11 +2047,11 @@ class TryUpdateGoalStateTestCase(HttpRequestPredicates, AgentTestCase): """ def test_it_should_return_true_on_success(self): update_handler = get_update_handler() - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: self.assertTrue(update_handler._try_update_goal_state(protocol), "try_update_goal_state should have succeeded") def test_it_should_return_false_on_failure(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: def http_get_handler(url, *_, **__): if self.is_goal_state_request(url): return HttpError('Exception to fake an error retrieving the goal state') @@ -2063,7 +2063,7 @@ def http_get_handler(url, *_, **__): def test_it_should_update_the_goal_state(self): update_handler = get_update_handler() - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: protocol.mock_wire_data.set_incarnation(12345) # the first goal state should produce an update @@ -2080,7 +2080,7 @@ def test_it_should_update_the_goal_state(self): self.assertEqual(update_handler._goal_state.incarnation, '6789', "The goal state was not updated (received unexpected incarnation)") def test_it_should_log_errors_only_when_the_error_state_changes(self): - with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: def http_get_handler(url, *_, **__): if self.is_goal_state_request(url): if fail_goal_state_request: @@ -2259,7 +2259,7 @@ def _prepare_fast_track_goal_state(): invokes HostPluginProtocol.fetch_vm_settings() to save the Fast Track status to disk """ # Do a query for the vmSettings; this would retrieve a FastTrack goal state and keep track of its timestamp - mock_wire_data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() + mock_wire_data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() with mock_wire_protocol(mock_wire_data_file) as protocol: protocol.mock_wire_data.set_etag("0123456789") _ = protocol.client.get_host_plugin().fetch_vm_settings() @@ -2359,7 +2359,7 @@ class HeartbeatTestCase(AgentTestCase): @patch("azurelinuxagent.ga.update.add_event") def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_): - with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as mock_protocol: update_handler = get_update_handler() update_handler.last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) diff --git a/tests/utils/__init__.py b/tests/lib/__init__.py similarity index 100% rename from tests/utils/__init__.py rename to tests/lib/__init__.py diff --git a/tests/utils/cgroups_tools.py b/tests/lib/cgroups_tools.py similarity index 100% rename from tests/utils/cgroups_tools.py rename to tests/lib/cgroups_tools.py diff --git a/tests/utils/event_logger_tools.py b/tests/lib/event_logger_tools.py similarity index 89% rename from tests/utils/event_logger_tools.py rename to tests/lib/event_logger_tools.py index 626d71d9ef..5150cebd54 100644 --- a/tests/utils/event_logger_tools.py +++ b/tests/lib/event_logger_tools.py @@ -19,9 +19,9 @@ import platform import azurelinuxagent.common.event as event from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, DISTRO_CODE_NAME -import tests.tools as tools -from tests.protocol import mockwiredata -from tests.protocol.mocks import mock_wire_protocol +import tests.lib.tools as tools +from tests.lib import wire_protocol_data +from tests.lib.mock_wire_protocol import mock_wire_protocol class EventLoggerTools(object): @@ -37,7 +37,7 @@ class EventLoggerTools(object): def initialize_event_logger(event_dir): """ Initializes the event logger using mock data for the common parameters; the goal state fields are taken - from mockwiredata.DATA_FILE and the IMDS fields from mock_imds_data. + from wire_protocol_data.DATA_FILE and the IMDS fields from mock_imds_data. """ if not os.path.exists(event_dir): os.mkdir(event_dir) @@ -53,7 +53,7 @@ def initialize_event_logger(event_dir): mock_imds_client = tools.Mock() mock_imds_client.get_compute = tools.Mock(return_value=mock_imds_info) - with mock_wire_protocol(mockwiredata.DATA_FILE) as mock_protocol: + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as mock_protocol: with tools.patch("azurelinuxagent.common.event.get_imds_client", return_value=mock_imds_client): event.initialize_event_logger_vminfo_common_parameters(mock_protocol) diff --git a/tests/ga/extension_emulator.py b/tests/lib/extension_emulator.py similarity index 98% rename from tests/ga/extension_emulator.py rename to tests/lib/extension_emulator.py index dafd365dff..5b4f69d142 100644 --- a/tests/ga/extension_emulator.py +++ b/tests/lib/extension_emulator.py @@ -27,10 +27,10 @@ from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.exthandlers import ExtHandlerInstance, ExtCommandEnvVariable -from tests.tools import Mock, patch -from tests.protocol.mockwiredata import WireProtocolData -from tests.protocol.mocks import MockHttpResponse -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates +from tests.lib.tools import Mock, patch +from tests.lib.wire_protocol_data import WireProtocolData +from tests.lib.mock_wire_protocol import MockHttpResponse +from tests.lib.http_request_predicates import HttpRequestPredicates class ExtensionCommandNames(object): @@ -107,7 +107,7 @@ def enable_invocations(*emulators): def generate_put_handler(*emulators): """ Create a HTTP handler to store status blobs for each provided emulator. - For use with tests.protocol.mocks.mock_wire_protocol. + For use with tests.lib.mocks.mock_wire_protocol. """ def mock_put_handler(url, *args, **_): diff --git a/tests/protocol/HttpRequestPredicates.py b/tests/lib/http_request_predicates.py similarity index 100% rename from tests/protocol/HttpRequestPredicates.py rename to tests/lib/http_request_predicates.py diff --git a/tests/utils/miscellaneous_tools.py b/tests/lib/miscellaneous_tools.py similarity index 100% rename from tests/utils/miscellaneous_tools.py rename to tests/lib/miscellaneous_tools.py diff --git a/tests/common/mock_cgroup_environment.py b/tests/lib/mock_cgroup_environment.py similarity index 97% rename from tests/common/mock_cgroup_environment.py rename to tests/lib/mock_cgroup_environment.py index e38471060e..408e1c15cc 100644 --- a/tests/common/mock_cgroup_environment.py +++ b/tests/lib/mock_cgroup_environment.py @@ -17,8 +17,8 @@ # import contextlib import os -from tests.tools import patch, data_dir -from tests.common.mock_environment import MockEnvironment, MockCommand +from tests.lib.tools import patch, data_dir +from tests.lib.mock_environment import MockEnvironment, MockCommand _MOCKED_COMMANDS = [ MockCommand(r"^systemctl --version$", @@ -104,6 +104,7 @@ class UnitFilePaths: extension_service_memory_accounting = '/lib/systemd/system/extension.service.d/13-MemoryAccounting.conf' extension_service_memory_limit = '/lib/systemd/system/extension.service.d/14-MemoryLimit.conf' + @contextlib.contextmanager def mock_cgroup_environment(tmp_dir): """ diff --git a/tests/common/mock_command.py b/tests/lib/mock_command.py similarity index 100% rename from tests/common/mock_command.py rename to tests/lib/mock_command.py diff --git a/tests/common/mock_environment.py b/tests/lib/mock_environment.py similarity index 99% rename from tests/common/mock_environment.py rename to tests/lib/mock_environment.py index bedce0900d..8f5682cf8e 100644 --- a/tests/common/mock_environment.py +++ b/tests/lib/mock_environment.py @@ -22,7 +22,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils import fileutil -from tests.tools import patch, patch_builtin +from tests.lib.tools import patch, patch_builtin class MockCommand: diff --git a/tests/ga/mocks.py b/tests/lib/mock_update_handler.py similarity index 99% rename from tests/ga/mocks.py rename to tests/lib/mock_update_handler.py index 588825f780..f0b311abe2 100644 --- a/tests/ga/mocks.py +++ b/tests/lib/mock_update_handler.py @@ -23,7 +23,7 @@ from azurelinuxagent.ga.exthandlers import ExtHandlersHandler from azurelinuxagent.ga.remoteaccess import RemoteAccessHandler from azurelinuxagent.ga.update import UpdateHandler, get_update_handler -from tests.tools import patch, Mock, mock_sleep +from tests.lib.tools import patch, Mock, mock_sleep @contextlib.contextmanager diff --git a/tests/protocol/mocks.py b/tests/lib/mock_wire_protocol.py similarity index 96% rename from tests/protocol/mocks.py rename to tests/lib/mock_wire_protocol.py index b74138888b..4e3521fef2 100644 --- a/tests/protocol/mocks.py +++ b/tests/lib/mock_wire_protocol.py @@ -17,18 +17,18 @@ import contextlib from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.common.utils import restutil -from tests.tools import patch -from tests.protocol import mockwiredata +from tests.lib.tools import patch +from tests.lib import wire_protocol_data @contextlib.contextmanager def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_handler=None, http_put_handler=None, do_not_mock=lambda method, url: False, fail_on_unknown_request=True): """ Creates a WireProtocol object that handles requests to the WireServer, the Host GA Plugin, and some requests to storage (requests that provide mock data - in mockwiredata.py). + in wire_protocol_data.py). The data returned by those requests is read from the files specified by 'mock_wire_data_file' (which must follow the structure of the data - files defined in tests/protocol/mockwiredata.py). + files defined in tests/protocol/wire_protocol_data.py). The caller can also provide handler functions for specific HTTP methods using the http_*_handler arguments. The return value of the handler function is interpreted similarly to the "return_value" argument of patch(): if it is an exception the exception is raised or, if it is @@ -135,7 +135,7 @@ def stop(): # create the protocol object # protocol = WireProtocol(restutil.KNOWN_WIRESERVER_IP) - protocol.mock_wire_data = mockwiredata.WireProtocolData(mock_wire_data_file) + protocol.mock_wire_data = wire_protocol_data.WireProtocolData(mock_wire_data_file) protocol.start = start protocol.stop = stop protocol.track_url = lambda url: tracked_urls.append(url) # pylint: disable=unnecessary-lambda diff --git a/tests/tools.py b/tests/lib/tools.py similarity index 99% rename from tests/tools.py rename to tests/lib/tools.py index 85d460d374..008be8552a 100644 --- a/tests/tools.py +++ b/tests/lib/tools.py @@ -38,6 +38,8 @@ from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.version import PY_VERSION_MAJOR +import tests + try: from unittest.mock import Mock, patch, MagicMock, ANY, DEFAULT, call, PropertyMock # pylint: disable=unused-import @@ -46,7 +48,7 @@ except ImportError: from mock import Mock, patch, MagicMock, ANY, DEFAULT, call, PropertyMock -test_dir = os.path.dirname(os.path.abspath(__file__)) +test_dir = tests.__path__[0] data_dir = os.path.join(test_dir, "data") debug = False diff --git a/tests/protocol/mockwiredata.py b/tests/lib/wire_protocol_data.py similarity index 99% rename from tests/protocol/mockwiredata.py rename to tests/lib/wire_protocol_data.py index c3beabf566..2bc18e34f1 100644 --- a/tests/protocol/mockwiredata.py +++ b/tests/lib/wire_protocol_data.py @@ -21,8 +21,8 @@ from azurelinuxagent.common.utils import timeutil from azurelinuxagent.common.utils.textutil import parse_doc, find, findall -from tests.protocol.HttpRequestPredicates import HttpRequestPredicates -from tests.tools import load_bin_data, load_data, MagicMock, Mock +from tests.lib.http_request_predicates import HttpRequestPredicates +from tests.lib.tools import load_bin_data, load_data, MagicMock, Mock from azurelinuxagent.common.protocol.imds import IMDS_ENDPOINT from azurelinuxagent.common.exception import HttpError, ResourceGoneError from azurelinuxagent.common.future import httpclient diff --git a/tests/pa/test_deprovision.py b/tests/pa/test_deprovision.py index 8680366a30..9970a249e4 100644 --- a/tests/pa/test_deprovision.py +++ b/tests/pa/test_deprovision.py @@ -23,7 +23,7 @@ from azurelinuxagent.pa.deprovision import get_deprovision_handler from azurelinuxagent.pa.deprovision.default import DeprovisionHandler -from tests.tools import AgentTestCase, distros, Mock, patch +from tests.lib.tools import AgentTestCase, distros, Mock, patch class TestDeprovision(AgentTestCase): diff --git a/tests/pa/test_provision.py b/tests/pa/test_provision.py index 59de0e97b3..66c525dff4 100644 --- a/tests/pa/test_provision.py +++ b/tests/pa/test_provision.py @@ -28,7 +28,7 @@ from azurelinuxagent.pa.provision.cloudinit import CloudInitProvisionHandler from azurelinuxagent.pa.provision.default import ProvisionHandler from azurelinuxagent.common.utils import fileutil -from tests.tools import AgentTestCase, distros, load_data, MagicMock, Mock, patch +from tests.lib.tools import AgentTestCase, distros, load_data, MagicMock, Mock, patch class TestProvision(AgentTestCase): diff --git a/tests/test_agent.py b/tests/test_agent.py index abbc090ba8..14053de41f 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -22,7 +22,7 @@ from azurelinuxagent.common.cgroupapi import SystemdCgroupsApi from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.collect_logs import CollectLogsHandler -from tests.tools import AgentTestCase, data_dir, Mock, patch +from tests.lib.tools import AgentTestCase, data_dir, Mock, patch EXPECTED_CONFIGURATION = \ """AutoUpdate.Enabled = True From 57e74305a010906af306083231b1c3be963067cf Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 14 Aug 2023 16:50:23 -0700 Subject: [PATCH 055/240] Report useful message when extension processing is disabled (#2895) * Update version to dummy 1.0.0.0' * Revert version change * Fail GS fast in case of extensions disabled * Update extensions_disabled scenario to look for GS failed instead of timeout when extensions are disabled * Update to separate onHold and extensions enabled * Report ext disabled error in handler status * Try using GoalStateUnknownFailure * Fix indentation error * Try failing ext handler and checking logs * Report ext processing error * Attempt to fail fast * Fix param name * Init error * Try to reuse current code * Try to reuse current code * Clean code * Update scenario tests * Add ext status file to fail fast * Fail fast test * Report error when ext disabled * Update timeout to 20 mins * Re enable ext for debugging * Re enable ext for debugging * Log agent status update * Create ext status file with error code * Create ext status file with error code * We should report handler status even if not installed in case of extensions disabled * Clean up code change * Update tests for extensions disabled * Update test comment * Update test * Remove unused line * Remove ununsed timeout * Test failing case * Remove old case * Remove unused import * Test multiconfig ext * Add multi-config test case * Clean up test * Improve logging * Fix dir for testfile * Remove ignore error rules * Remove ununsed imports * Set handler status to not ready explicitly * Use OS Util to get agent conf path --- azurelinuxagent/ga/exthandlers.py | 37 +++++-- tests/ga/test_extension.py | 14 ++- .../extensions_disabled.py | 102 +++++++++++++----- 3 files changed, 117 insertions(+), 36 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 0aa4ed93d4..465c3caba9 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -33,6 +33,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger +from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common import version from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ @@ -299,7 +300,7 @@ def run(self): # we make a deep copy of the extensions, since changes are made to self.ext_handlers while processing the extensions self.ext_handlers = copy.deepcopy(egs.extensions) - if not self._extension_processing_allowed(): + if self._extensions_on_hold(): return utc_start = datetime.datetime.utcnow() @@ -433,17 +434,15 @@ def _cleanup_outdated_handlers(self): except OSError as e: logger.warn("Failed to remove extension package {0}: {1}".format(pkg, e.strerror)) - def _extension_processing_allowed(self): - if not conf.get_extensions_enabled(): - logger.verbose("Extension handling is disabled") - return False - + def _extensions_on_hold(self): if conf.get_enable_overprovisioning(): if self.protocol.get_goal_state().extensions_goal_state.on_hold: - logger.info("Extension handling is on hold") - return False + msg = "Extension handling is on hold" + logger.info(msg) + add_event(op=WALAEventOperation.ExtensionProcessing, message=msg) + return True - return True + return False @staticmethod def __get_dependency_level(tup): @@ -478,10 +477,26 @@ def handle_ext_handlers(self, goal_state_id): max_dep_level = self.__get_dependency_level(all_extensions[-1]) if any(all_extensions) else 0 depends_on_err_msg = None + extensions_enabled = conf.get_extensions_enabled() for extension, ext_handler in all_extensions: handler_i = ExtHandlerInstance(ext_handler, self.protocol, extension=extension) + # In case of extensions disabled, we skip processing extensions. But CRP is still waiting for some status + # back for the skipped extensions. In order to propagate the status back to CRP, we will report status back + # here with an error message. + if not extensions_enabled: + agent_conf_file_path = get_osutil().agent_conf_file_path + msg = "Extension will not be processed since extension processing is disabled. To enable extension " \ + "processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path) + handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=msg, code=-1) + handler_i.create_status_file_if_not_exist(extension, + status=ExtensionStatusValue.error, + code=-1, + operation=handler_i.operation, + message=msg) + continue + # In case of depends-on errors, we skip processing extensions if there was an error processing dependent extensions. # But CRP is still waiting for some status back for the skipped extensions. In order to propagate the status back to CRP, # we will report status back here with the relevant error message for each of the dependent extension. @@ -991,7 +1006,9 @@ def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): # For MultiConfig, we need to report status per extension even for Handler level failures. # If we have HandlerStatus for a MultiConfig handler and GS is requesting for it, we would report status per # extension even if HandlerState == NotInstalled (Sample scenario: ExtensionsGoalStateError, DecideVersionError, etc) - if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config: + # We also need to report extension status for an uninstalled handler if extensions are disabled because CRP + # waits for extension runtime status before failing the extension operation. + if handler_state != ExtHandlerState.NotInstalled or ext_handler.supports_multi_config or not conf.get_extensions_enabled(): # Since we require reading the Manifest for reading the heartbeat, this would fail if HandlerManifest not found. # Only try to read heartbeat if HandlerState != NotInstalled. diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 201868fc68..ff7f170060 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -1649,7 +1649,19 @@ def test_extensions_disabled(self, _, *args): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() - self._assert_no_handler_status(protocol.report_vm_status) + report_vm_status = protocol.report_vm_status + self.assertTrue(report_vm_status.called) + args, kw = report_vm_status.call_args # pylint: disable=unused-variable + vm_status = args[0] + self.assertEqual(1, len(vm_status.vmAgent.extensionHandlers)) + exthandler = vm_status.vmAgent.extensionHandlers[0] + self.assertEqual(-1, exthandler.code) + self.assertEqual('NotReady', exthandler.status) + self.assertEqual("Extension will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", exthandler.message) + ext_status = exthandler.extension_status + self.assertEqual(-1, ext_status.code) + self.assertEqual('error', ext_status.status) + self.assertEqual("Extension will not be processed since extension processing is disabled. To enable extension processing, set Extensions.Enabled=y in '/etc/waagent.conf'", ext_status.message) def test_extensions_deleted(self, *args): # Ensure initial enable is successful diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 66cafcfc1c..27c62427ab 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -24,9 +24,10 @@ import datetime import pytz +import uuid from assertpy import assert_that, fail -from typing import Any, Dict, List +from typing import Any from azure.mgmt.compute.models import VirtualMachineInstanceView @@ -39,39 +40,77 @@ class ExtensionsDisabled(AgentTest): + class TestCase: + def __init__(self, extension: VirtualMachineExtensionClient, settings: Any): + self.extension = extension + self.settings = settings + def run(self): ssh_client: SshClient = self._context.create_ssh_client() # Disable extension processing on the test VM + log.info("") log.info("Disabling extension processing on the test VM [%s]", self._context.vm.name) output = ssh_client.run_command("update-waagent-conf Extensions.Enabled=n", use_sudo=True) log.info("Disable completed:\n%s", output) - - # From now on, extensions will time out; set the timeout to the minimum allowed(15 minutes) - log.info("Setting the extension timeout to 15 minutes") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - - vm.update({"extensionsTimeBudget": "PT15M"}) - disabled_timestamp: datetime.datetime = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) - # - # Validate that the agent is not processing extensions by attempting to run CustomScript - # - log.info("Executing CustomScript; it should time out after 15 min or so.") - custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - try: - custom_script.enable(settings={'commandToExecute': "date"}, force_update=True, timeout=20 * 60) - fail("CustomScript should have timed out") - except Exception as error: - assert_that("VMExtensionProvisioningTimeout" in str(error)) \ - .described_as(f"Expected a VMExtensionProvisioningTimeout: {error}") \ - .is_true() - log.info("CustomScript timed out as expected") + # Prepare test cases + unique = str(uuid.uuid4()) + test_file = f"waagent-test.{unique}" + test_cases = [ + ExtensionsDisabled.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, + resource_name="CustomScript"), + {'commandToExecute': f"echo '{unique}' > /tmp/{test_file}"} + ), + ExtensionsDisabled.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="RunCommandHandler"), + {'source': {'script': f"echo '{unique}' > /tmp/{test_file}"}} + ) + ] + + for t in test_cases: + log.info("") + log.info("Test case: %s", t.extension) + # + # Validate that the agent is not processing extensions by attempting to enable extension & checking that + # provisioning fails fast + # + log.info( + "Executing {0}; the agent should report a VMExtensionProvisioningError without processing the extension" + .format(t.extension.__str__())) + + try: + t.extension.enable(settings=t.settings, force_update=True, timeout=6 * 60) + fail("The agent should have reported an error processing the goal state") + except Exception as error: + assert_that("VMExtensionProvisioningError" in str(error)) \ + .described_as(f"Expected a VMExtensionProvisioningError error, but actual error was: {error}") \ + .is_true() + assert_that("Extension will not be processed since extension processing is disabled" in str(error)) \ + .described_as( + f"Error message should communicate that extension will not be processed, but actual error " + f"was: {error}").is_true() + log.info("Goal state processing for {0} failed as expected".format(t.extension.__str__())) + + # + # Validate the agent did not process the extension by checking it did not execute the extension settings + # + output = ssh_client.run_command("dir /tmp", use_sudo=True) + assert_that(output) \ + .described_as( + f"Contents of '/tmp' on test VM contains {test_file}. Contents: {output}. \n This indicates " + f"{t.extension.__str__()} was unexpectedly processed") \ + .does_not_contain(f"{test_file}") + log.info("The agent did not process the extension settings for {0} as expected".format(t.extension.__str__())) # # Validate that the agent continued reporting status even if it is not processing extensions # + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + log.info("") instance_view: VirtualMachineInstanceView = vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) @@ -82,10 +121,23 @@ def run(self): .is_greater_than(pytz.utc.localize(disabled_timestamp)) log.info("The VM Agent reported status after extensions were disabled, as expected.") - def get_ignore_error_rules(self) -> List[Dict[str, Any]]: - return [ - {'message': 'No handler status found for Microsoft.Azure.Extensions.CustomScript'}, - ] + # + # Validate that the agent processes extensions after re-enabling extension processing + # + log.info("") + log.info("Enabling extension processing on the test VM [%s]", self._context.vm.name) + output = ssh_client.run_command("update-waagent-conf Extensions.Enabled=y", use_sudo=True) + log.info("Enable completed:\n%s", output) + + for t in test_cases: + try: + log.info("") + log.info("Executing {0}; the agent should process the extension".format(t.extension.__str__())) + t.extension.enable(settings=t.settings, force_update=True, timeout=15 * 60) + log.info("Goal state processing for {0} succeeded as expected".format(t.extension.__str__())) + except Exception as error: + fail(f"Unexpected error while processing {t.extension.__str__()} after re-enabling extension " + f"processing: {error}") if __name__ == "__main__": From a1fb057de6ea18dea46334b8969106e69c67e702 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 15 Aug 2023 09:10:00 -0700 Subject: [PATCH 056/240] Retry tar operations after 'Unexpected EOF in archive' during node setup (#2891) * Update version to dummy 1.0.0.0' * Revert version change * Capture output of the copy commands during setup * Add verbose to copy command * Update typing for copy to node methods * Print contents of tar before extracting * Print contents of tar before extracting * Print contents of tar before extracting * Print contents of tar before extracting * Retry copying tarball if contents on test node do not match * Revert copy method def * Revert copy method def * Catch EOF error * Retry tar operations if we see failure * Revert target_path * Remove accidental copy of exception * Remove blank line * tar cvf and copy commands overwrite --- .../orchestrator/lib/agent_test_suite.py | 89 ++++++++++++------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 49e104d8db..eb90e4edcc 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -52,7 +52,7 @@ from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.logging import set_current_thread_log from tests_e2e.tests.lib.agent_log import AgentLogRecord -from tests_e2e.tests.lib.shell import run_command +from tests_e2e.tests.lib.shell import run_command, CommandError from tests_e2e.tests.lib.ssh_client import SshClient @@ -304,29 +304,6 @@ def _setup_node(self, install_test_agent: bool) -> None: log.info("Downloading %s to %s", pypy_download, pypy_path) run_command(["wget", pypy_download, "-O", pypy_path]) - # - # Create a tarball with the files we need to copy to the test node. The tarball includes two directories: - # - # * bin - Executables file (Bash and Python scripts) - # * lib - Library files (Python modules) - # - # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. - # - # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. - # - tarball_path: Path = Path("/tmp/waagent.tar") - log.info("Creating %s with the files need on the test node", tarball_path) - log.info("Adding orchestrator/scripts") - command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/scripts") - command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/lib") - command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)])) - # # Cleanup the test node (useful for developer runs) # @@ -335,24 +312,70 @@ def _setup_node(self, install_test_agent: bool) -> None: self.context.ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True) # - # Copy the tarball, Pypy and the test Agent to the test node + # Copy Pypy and the test Agent to the test node # target_path = Path("~")/"tmp" self.context.ssh_client.run_command(f"mkdir {target_path}") - log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(tarball_path, target_path) log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, target_path) self.context.ssh_client.copy_to_node(pypy_path, target_path) agent_package_path: Path = self._get_agent_package_path() log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, target_path) self.context.ssh_client.copy_to_node(agent_package_path, target_path) - # - # Extract the tarball and execute the install scripts - # - log.info('Installing tools on the test node') - command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools" - log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) + # tar commands sometimes fail with 'tar: Unexpected EOF in archive' error. Retry tarball creation, copy, and + # extraction if we hit this error + tar_retries = 3 + while tar_retries > 0: + try: + # + # Create a tarball with the files we need to copy to the test node. The tarball includes two directories: + # + # * bin - Executables file (Bash and Python scripts) + # * lib - Library files (Python modules) + # + # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. + # + # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. + # + tarball_path: Path = Path("/tmp/waagent.tar") + log.info("Creating %s with the files need on the test node", tarball_path) + log.info("Adding orchestrator/scripts") + command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) + log.info("Adding tests/scripts") + command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) + log.info("Adding tests/lib") + command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) + log.info("%s\n%s", command, run_command(command, shell=True)) + log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)])) + + # + # Copy the tarball to the test node + # + log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path) + self.context.ssh_client.copy_to_node(tarball_path, target_path) + + # + # Extract the tarball and execute the install scripts + # + log.info('Installing tools on the test node') + command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools" + log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) + + # Tarball creation and extraction was successful - no need to retry + tar_retries = 0 + + except CommandError as error: + if "tar: Unexpected EOF in archive" in error.stderr: + tar_retries -= 1 + # Log the error with traceback to see which tar operation failed + log.info(f"Tarball creation or extraction failed: \n{error}") + # Retry tar operations + if tar_retries > 0: + log.info("Retrying tarball creation and extraction...") + else: + raise Exception(f"Unexpected error when creating or extracting tarball during node setup: {error}") if self.context.is_vhd: log.info("Using a VHD; will not install the Test Agent.") From fd2a181844e4bc880c60f777e55065d8fcaab3c2 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:36:11 -0700 Subject: [PATCH 057/240] Add log and telemetry event for extension disabled (#2897) * Update version to dummy 1.0.0.0' * Revert version change * Add logs and telemetry for processing extensions when extensions disabled * Reformat string --- azurelinuxagent/ga/exthandlers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 465c3caba9..35fa44c076 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -489,6 +489,10 @@ def handle_ext_handlers(self, goal_state_id): agent_conf_file_path = get_osutil().agent_conf_file_path msg = "Extension will not be processed since extension processing is disabled. To enable extension " \ "processing, set Extensions.Enabled=y in '{0}'".format(agent_conf_file_path) + ext_full_name = handler_i.get_extension_full_name(extension) + logger.info('') + logger.info("{0}: {1}".format(ext_full_name, msg)) + add_event(op=WALAEventOperation.ExtensionProcessing, message="{0}: {1}".format(ext_full_name, msg)) handler_i.set_handler_status(status=ExtHandlerStatusValue.not_ready, message=msg, code=-1) handler_i.create_status_file_if_not_exist(extension, status=ExtensionStatusValue.error, From 98f3ac4aba56cfe691961b8005e9e204ca5c4f24 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:14:45 -0700 Subject: [PATCH 058/240] Agent status scenario (#2875) * Update version to dummy 1.0.0.0' * Revert version change * Create files for agent status scenario * Add agent status test logic * fix pylint error * Add comment for retry * Mark failures as exceptions * Improve messages in logs * Improve comments * Update comments * Check that agent status updates without processing additional goal states 3 times * Remove unused agent status exception * Update comment * Clean up comments, logs, and imports * Exception should inherit from baseexception * Import datetime * Import datetime * Import timedelta * instance view time is already formatted * Increse status update time * Increse status update time * Increse status update time * Increase timeout * Update comments and timeoutS * Allow retry if agent status timestamp isn't updated after 30s * Remove unused import * Update time value in comment * address PR comments * Check if properties are None * Make types & errors more readable * Re-use vm_agent variable * Add comment for dot operator --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/agent_status.yml | 9 + tests_e2e/tests/agent_status/agent_status.py | 195 ++++++++++++++++++ .../agent_status-get_last_gs_processed.py | 47 +++++ 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/test_suites/agent_status.yml create mode 100644 tests_e2e/tests/agent_status/agent_status.py create mode 100755 tests_e2e/tests/scripts/agent_status-get_last_gs_processed.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 7e19205747..cec2c1a58b 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/agent_status.yml b/tests_e2e/test_suites/agent_status.yml new file mode 100644 index 0000000000..86acfe05e1 --- /dev/null +++ b/tests_e2e/test_suites/agent_status.yml @@ -0,0 +1,9 @@ +# +# This scenario validates the agent status is updated without any goal state changes +# +name: "AgentStatus" +tests: + - "agent_status/agent_status.py" +images: + - "endorsed" + - "endorsed-arm64" diff --git a/tests_e2e/tests/agent_status/agent_status.py b/tests_e2e/tests/agent_status/agent_status.py new file mode 100644 index 0000000000..b9caef8f32 --- /dev/null +++ b/tests_e2e/tests/agent_status/agent_status.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Validates the agent status is updated without processing additional goal states (aside from the first goal state +# from fabric) +# + +from azure.mgmt.compute.models import VirtualMachineInstanceView, InstanceViewStatus, VirtualMachineAgentInstanceView +from assertpy import assert_that +from datetime import datetime, timedelta +from time import sleep +import json + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient + + +class RetryableAgentStatusException(BaseException): + pass + + +class AgentStatus(AgentTest): + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def validate_instance_view_vmagent_status(self, instance_view: VirtualMachineInstanceView): + status: InstanceViewStatus = instance_view.vm_agent.statuses[0] + + # Validate message field + if status.message is None: + raise RetryableAgentStatusException("Agent status is invalid: 'message' property in instance view is None") + elif 'unresponsive' in status.message: + raise RetryableAgentStatusException("Agent status is invalid: Instance view shows unresponsive agent") + + # Validate display status field + if status.display_status is None: + raise RetryableAgentStatusException("Agent status is invalid: 'display_status' property in instance view is None") + elif 'Not Ready' in status.display_status: + raise RetryableAgentStatusException("Agent status is invalid: Instance view shows agent status is not ready") + + # Validate time field + if status.time is None: + raise RetryableAgentStatusException("Agent status is invalid: 'time' property in instance view is None") + + def validate_instance_view_vmagent(self, instance_view: VirtualMachineInstanceView): + """ + Checks that instance view has vm_agent.statuses and vm_agent.vm_agent_version properties which report the Guest + Agent as running and Ready: + + "vm_agent": { + "extension_handlers": [], + "vm_agent_version": "9.9.9.9", + "statuses": [ + { + "level": "Info", + "time": "2023-08-11T09:13:01.000Z", + "message": "Guest Agent is running", + "code": "ProvisioningState/succeeded", + "display_status": "Ready" + } + ] + } + """ + # Using dot operator for properties here because azure.mgmt.compute.models has classes for InstanceViewStatus + # and VirtualMachineAgentInstanceView. All the properties we validate are attributes of these classes and + # initialized to None + if instance_view.vm_agent is None: + raise RetryableAgentStatusException("Agent status is invalid: 'vm_agent' property in instance view is None") + + # Validate vm_agent_version field + vm_agent: VirtualMachineAgentInstanceView = instance_view.vm_agent + if vm_agent.vm_agent_version is None: + raise RetryableAgentStatusException("Agent status is invalid: 'vm_agent_version' property in instance view is None") + elif 'Unknown' in vm_agent.vm_agent_version: + raise RetryableAgentStatusException("Agent status is invalid: Instance view shows agent version is unknown") + + # Validate statuses field + if vm_agent.statuses is None: + raise RetryableAgentStatusException("Agent status is invalid: 'statuses' property in instance view is None") + elif len(instance_view.vm_agent.statuses) < 1: + raise RetryableAgentStatusException("Agent status is invalid: Instance view is missing an agent status entry") + else: + self.validate_instance_view_vmagent_status(instance_view=instance_view) + + log.info("Instance view has valid agent status, agent version: {0}, status: {1}" + .format(vm_agent.vm_agent_version, vm_agent.statuses[0].display_status)) + + def check_status_updated(self, status_timestamp: datetime, prev_status_timestamp: datetime, gs_processed_log: str, prev_gs_processed_log: str): + log.info("") + log.info("Check that the agent status updated without processing any additional goal states...") + + # If prev_ variables are not updated, then this is the first reported agent status + if prev_status_timestamp is not None and prev_gs_processed_log is not None: + # The agent status timestamp should be greater than the prev timestamp + if status_timestamp > prev_status_timestamp: + log.info( + "Current agent status timestamp {0} is greater than previous status timestamp {1}" + .format(status_timestamp, prev_status_timestamp)) + else: + raise RetryableAgentStatusException("Agent status failed to update: Current agent status timestamp {0} " + "is not greater than previous status timestamp {1}" + .format(status_timestamp, prev_status_timestamp)) + + # The last goal state processed in the agent log should be the same as before + if prev_gs_processed_log == gs_processed_log: + log.info( + "The last processed goal state is the same as the last processed goal state in the last agent " + "status update: \n{0}".format(gs_processed_log) + .format(status_timestamp, prev_status_timestamp)) + else: + raise Exception("Agent status failed to update without additional goal state: The agent processed an " + "additional goal state since the last agent status update. \n{0}" + "".format(gs_processed_log)) + + log.info("") + log.info("The agent status successfully updated without additional goal states") + + def run(self): + log.info("") + log.info("*******Verifying the agent status updates 3 times*******") + + vm = VirtualMachineClient(self._context.vm) + + timeout = datetime.now() + timedelta(minutes=6) + instance_view_exception = None + status_updated = 0 + prev_status_timestamp = None + prev_gs_processed_log = None + + # Retry validating agent status updates 2 times with timeout of 6 minutes + while datetime.now() <= timeout and status_updated < 2: + instance_view = vm.get_instance_view() + log.info("") + log.info( + "Check instance view to validate that the Guest Agent reports valid status...") + log.info("Instance view of VM is:\n%s", json.dumps(instance_view.serialize(), indent=2)) + + try: + # Validate the guest agent reports valid status + self.validate_instance_view_vmagent(instance_view) + + status_timestamp = instance_view.vm_agent.statuses[0].time + gs_processed_log = self._ssh_client.run_command( + "agent_status-get_last_gs_processed.py", use_sudo=True) + + self.check_status_updated(status_timestamp, prev_status_timestamp, gs_processed_log, prev_gs_processed_log) + + # Update variables with timestamps for this update + status_updated += 1 + prev_status_timestamp = status_timestamp + prev_gs_processed_log = gs_processed_log + + # Sleep 30s to allow agent status to update before we check again + sleep(30) + + except RetryableAgentStatusException as e: + instance_view_exception = str(e) + log.info("") + log.info(instance_view_exception) + log.info("Waiting 30s before retry...") + sleep(30) + + # If status_updated is 0, we know the agent status in the instance view was never valid + log.info("") + assert_that(status_updated > 0).described_as( + "Timeout has expired, instance view has invalid agent status: {0}".format( + instance_view_exception)).is_true() + + # Fail the test if we weren't able to validate the agent status updated 3 times + assert_that(status_updated == 2).described_as( + "Timeout has expired, the agent status failed to update 2 times").is_true() + + +if __name__ == "__main__": + AgentStatus.run_from_command_line() diff --git a/tests_e2e/tests/scripts/agent_status-get_last_gs_processed.py b/tests_e2e/tests/scripts/agent_status-get_last_gs_processed.py new file mode 100755 index 0000000000..8bbe598f1f --- /dev/null +++ b/tests_e2e/tests/scripts/agent_status-get_last_gs_processed.py @@ -0,0 +1,47 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Writes the last goal state processed line in the log to stdout +# +import re +import sys + +from tests_e2e.tests.lib.agent_log import AgentLog + + +def main(): + gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[[a-z_\d]{13,14}\s\d+\sms\]" + last_gs_processed = None + agent_log = AgentLog() + + try: + for agent_record in agent_log.read(): + gs_complete = re.match(gs_completed_regex, agent_record.message) + + if gs_complete is not None: + last_gs_processed = agent_record.text + + except IOError as e: + print("Unable to get last goal state processed: {0}".format(str(e))) + + print(last_gs_processed) + sys.exit(0) + + +if __name__ == "__main__": + main() From 14f6124c5e685099fa54884eb9a26ac1f9acb313 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 21 Aug 2023 14:21:43 -0700 Subject: [PATCH 059/240] multi config scenario (#2898) * Update version to dummy 1.0.0.0' * Revert version change * multi config scenario bare bones * multi config scenario bare bones * Stash * Add multi config test * Run on arm64 * RCv2 is not supported on arm64 * Test should own VM * Add single config ext to test * Add single config ext to test * Do not fail test if there are unexpected extensions on the vm * Update comment for accuracy * Make resource name parameter optional * Clean up code --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/multi_config_ext.yml | 9 + .../lib/virtual_machine_extension_client.py | 4 +- .../multi_config_ext/multi_config_ext.py | 154 ++++++++++++++++++ 4 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 tests_e2e/test_suites/multi_config_ext.yml create mode 100644 tests_e2e/tests/multi_config_ext/multi_config_ext.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index cec2c1a58b..080d7f2c2f 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/multi_config_ext.yml b/tests_e2e/test_suites/multi_config_ext.yml new file mode 100644 index 0000000000..24bdaa7366 --- /dev/null +++ b/tests_e2e/test_suites/multi_config_ext.yml @@ -0,0 +1,9 @@ +# +# Multi-config extensions are no longer supported but there are still customers running RCv2 and we don't want to break +# them. This test suite is used to verify that the agent processes RCv2 (a multi-config extension) as expected. +# +name: "MultiConfigExt" +tests: + - "multi_config_ext/multi_config_ext.py" +images: + - "endorsed" diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index d94226e6ea..6697d594a6 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -40,11 +40,11 @@ class VirtualMachineExtensionClient(AzureClient): """ Client for operations virtual machine extensions. """ - def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_name: str): + def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_name: str = None): super().__init__() self._vm: VmIdentifier = vm self._identifier = extension - self._resource_name = resource_name + self._resource_name = resource_name or extension.type cloud: Cloud = AZURE_CLOUDS[vm.cloud] credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) self._compute_client: ComputeManagementClient = ComputeManagementClient( diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py new file mode 100644 index 0000000000..a42ca8900c --- /dev/null +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test adds multiple instances of RCv2 and verifies that the extensions are processed and deleted as expected. +# + +import uuid +from typing import Dict, Callable, Any + +from assertpy import fail +from azure.mgmt.compute.models import VirtualMachineInstanceView + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class MultiConfigExt(AgentTest): + class TestCase: + def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): + self.extension = extension + self.get_settings = get_settings + self.test_guid: str = str(uuid.uuid4()) + + def enable_and_assert_test_cases(self, cases_to_enable: Dict[str, TestCase], cases_to_assert: Dict[str, TestCase], delete_extensions: bool = False): + for resource_name, test_case in cases_to_enable.items(): + log.info("") + log.info("Adding {0} to the test VM. guid={1}".format(resource_name, test_case.test_guid)) + test_case.extension.enable(settings=test_case.get_settings(test_case.test_guid)) + test_case.extension.assert_instance_view() + + log.info("") + log.info("Check that each extension has the expected guid in its status message...") + for resource_name, test_case in cases_to_assert.items(): + log.info("") + log.info("Checking {0} has expected status message with {1}".format(resource_name, test_case.test_guid)) + test_case.extension.assert_instance_view(expected_message=f"{test_case.test_guid}") + + # Delete each extension on the VM + if delete_extensions: + log.info("") + log.info("Delete each extension...") + self.delete_extensions(cases_to_assert) + + def delete_extensions(self, test_cases: Dict[str, TestCase]): + for resource_name, test_case in test_cases.items(): + log.info("") + log.info("Deleting {0} from the test VM".format(resource_name)) + test_case.extension.delete() + + log.info("") + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + instance_view: VirtualMachineInstanceView = vm.get_instance_view() + if instance_view.extensions is not None: + for ext in instance_view.extensions: + if ext.name in test_cases.keys(): + fail("Extension was not deleted: \n{0}".format(ext)) + log.info("") + log.info("All extensions were successfully deleted.") + + def run(self): + # Create 3 different RCv2 extensions and a single config extension (CSE) and assign each a unique guid. Each + # extension will have settings that echo its assigned guid. We will use this guid to verify the extension + # statuses later. + mc_settings: Callable[[Any], Dict[str, Dict[str, str]]] = lambda s: { + "source": {"script": f"echo {s}"}} + sc_settings: Callable[[Any], Dict[str, str]] = lambda s: {'commandToExecute': f"echo {s}"} + + test_cases: Dict[str, MultiConfigExt.TestCase] = { + "MCExt1": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt1"), mc_settings), + "MCExt2": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt2"), mc_settings), + "MCExt3": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt3"), mc_settings), + "CSE": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript), sc_settings) + } + + # Add each extension to the VM and validate the instance view has succeeded status with its assigned guid in the + # status message + log.info("") + log.info("Add CSE and 3 instances of RCv2 to the VM. Each instance will echo a unique guid...") + self.enable_and_assert_test_cases(cases_to_enable=test_cases, cases_to_assert=test_cases) + + # Update MCExt3 and CSE with new guids and add a new instance of RCv2 to the VM + updated_test_cases: Dict[str, MultiConfigExt.TestCase] = { + "MCExt3": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt3"), mc_settings), + "MCExt4": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt4"), mc_settings), + "CSE": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript), sc_settings) + } + test_cases.update(updated_test_cases) + + # Enable only the updated extensions, verify every extension has the correct test guid is in status message, and + # remove all extensions from the test vm + log.info("") + log.info("Update MCExt3 and CSE with new guids and add a new instance of RCv2 to the VM...") + self.enable_and_assert_test_cases(cases_to_enable=updated_test_cases, cases_to_assert=test_cases, + delete_extensions=True) + + # Enable, verify, and remove only multi config extensions + log.info("") + log.info("Add only multi-config extensions to the VM...") + mc_test_cases: Dict[str, MultiConfigExt.TestCase] = { + "MCExt5": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt5"), mc_settings), + "MCExt6": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + resource_name="MCExt6"), mc_settings) + } + self.enable_and_assert_test_cases(cases_to_enable=mc_test_cases, cases_to_assert=mc_test_cases, + delete_extensions=True) + + # Enable, verify, and delete only single config extensions + log.info("") + log.info("Add only single-config extension to the VM...") + sc_test_cases: Dict[str, MultiConfigExt.TestCase] = { + "CSE": MultiConfigExt.TestCase( + VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript), sc_settings) + } + self.enable_and_assert_test_cases(cases_to_enable=sc_test_cases, cases_to_assert=sc_test_cases, + delete_extensions=True) + + +if __name__ == "__main__": + MultiConfigExt.run_from_command_line() From a2977b8805eaa54250cef8ba36121229a152854e Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 22 Aug 2023 09:10:48 -0700 Subject: [PATCH 060/240] agent and ext cgroups scenario (#2866) * agent-cgroups scenario * address comments * address comments * fix-pylint * pylint warn * address comments * improved logging" * improved ext cgroups scenario * new changes * pylint fix * updated * address comments * pylint warn * address comment * merge conflicts --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/agent_cgroups.yml | 7 + tests_e2e/test_suites/ext_cgroups.yml | 10 + tests_e2e/test_suites/images.yml | 6 + .../tests/agent_cgroups/agent_cgroups.py | 40 ++++ tests_e2e/tests/ext_cgroups/ext_cgroups.py | 43 ++++ .../tests/ext_cgroups/install_extensions.py | 112 +++++++++ tests_e2e/tests/lib/cgroup_helpers.py | 149 ++++++++++++ tests_e2e/tests/lib/identifiers.py | 2 + tests_e2e/tests/lib/remote_test.py | 2 +- .../agent_cgroups-check_cgroups_agent.py | 115 +++++++++ .../ext_cgroups-check_cgroups_extensions.py | 224 ++++++++++++++++++ 12 files changed, 710 insertions(+), 2 deletions(-) create mode 100644 tests_e2e/test_suites/agent_cgroups.yml create mode 100644 tests_e2e/test_suites/ext_cgroups.yml create mode 100644 tests_e2e/tests/agent_cgroups/agent_cgroups.py create mode 100644 tests_e2e/tests/ext_cgroups/ext_cgroups.py create mode 100644 tests_e2e/tests/ext_cgroups/install_extensions.py create mode 100644 tests_e2e/tests/lib/cgroup_helpers.py create mode 100755 tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py create mode 100755 tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 080d7f2c2f..5b18c83f14 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/agent_cgroups.yml b/tests_e2e/test_suites/agent_cgroups.yml new file mode 100644 index 0000000000..239f37e32a --- /dev/null +++ b/tests_e2e/test_suites/agent_cgroups.yml @@ -0,0 +1,7 @@ +# +# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics. +# +name: "AgentCgroups" +tests: + - "agent_cgroups/agent_cgroups.py" +images: "cgroups-endorsed" \ No newline at end of file diff --git a/tests_e2e/test_suites/ext_cgroups.yml b/tests_e2e/test_suites/ext_cgroups.yml new file mode 100644 index 0000000000..5b3e017f52 --- /dev/null +++ b/tests_e2e/test_suites/ext_cgroups.yml @@ -0,0 +1,10 @@ +# +# The test suite installs the few extensions and +# verify those extensions are running in expected cgroups and also, checks agent tracking those cgroups for polling resource metrics. +# +name: "ExtCgroups" +tests: + - "ext_cgroups/ext_cgroups.py" +images: "cgroups-endorsed" +# The DCR test extension installs sample service, so this test suite uses it to test services cgroups but this is only published in southcentralus region in public cloud. +locations: "AzureCloud:southcentralus" \ No newline at end of file diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 5440486c25..433d0733d9 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -37,6 +37,12 @@ image-sets: - "rhel_90_arm64" - "ubuntu_2204_arm64" + # As of today agent only support and enabled resource governance feature on following distros + cgroups-endorsed: + - "ubuntu_1604" + - "ubuntu_1804" + - "ubuntu_2004" + # # An image can be specified by a string giving its urn, as in # diff --git a/tests_e2e/tests/agent_cgroups/agent_cgroups.py b/tests_e2e/tests/agent_cgroups/agent_cgroups.py new file mode 100644 index 0000000000..d976c0338e --- /dev/null +++ b/tests_e2e/tests/agent_cgroups/agent_cgroups.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.logging import log + + +class AgentCgroups(AgentTest): + """ + This test verifies that the agent is running in the expected cgroups. + """ + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def run(self): + log.info("=====Validating agent cgroups=====") + self._run_remote_test("agent_cgroups-check_cgroups_agent.py") + log.info("Successfully Verified that agent present in correct cgroups") + + +if __name__ == "__main__": + AgentCgroups.run_from_command_line() diff --git a/tests_e2e/tests/ext_cgroups/ext_cgroups.py b/tests_e2e/tests/ext_cgroups/ext_cgroups.py new file mode 100644 index 0000000000..33092ca41e --- /dev/null +++ b/tests_e2e/tests/ext_cgroups/ext_cgroups.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tests_e2e.tests.ext_cgroups.install_extensions import InstallExtensions +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.logging import log + + +class ExtCgroups(AgentTest): + """ + This test verifies the installed extensions assigned correctly in their cgroups. + """ + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def run(self): + log.info("=====Installing extensions to validate ext cgroups scenario") + InstallExtensions(self._context).run() + log.info("=====Executing remote script check_cgroups_extensions.py to validate extension cgroups") + self._run_remote_test("ext_cgroups-check_cgroups_extensions.py", use_sudo=True) + log.info("Successfully verified that extensions present in correct cgroup") + + +if __name__ == "__main__": + ExtCgroups.run_from_command_line() diff --git a/tests_e2e/tests/ext_cgroups/install_extensions.py b/tests_e2e/tests/ext_cgroups/install_extensions.py new file mode 100644 index 0000000000..6617730ed0 --- /dev/null +++ b/tests_e2e/tests/ext_cgroups/install_extensions.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from datetime import datetime, timedelta +from pathlib import Path + +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class InstallExtensions: + """ + This test installs the multiple extensions in order to verify extensions cgroups in the next test. + """ + + def __init__(self, context: AgentTestContext): + self._context = context + self._ssh_client = self._context.create_ssh_client() + + def run(self): + self._prepare_agent() + # Install the GATest extension to test service cgroups + self._install_gatest_extension() + # Install the Azure Monitor Agent to test long running process cgroup + self._install_ama() + # Install the VM Access extension to test sample extension + self._install_vmaccess() + # Install the CSE extension to test extension cgroup + self._install_cse() + + def _prepare_agent(self): + log.info("=====Executing update-waagent-conf remote script to update monitoring deadline flag for tracking azuremonitoragent service") + future_date = datetime.utcnow() + timedelta(days=2) + expiry_time = future_date.date().strftime("%Y-%m-%d") + # Agent needs extension info and it's services info in the handlermanifest.xml to monitor and limit the resource usage. + # As part of pilot testing , agent hardcoded azuremonitoragent service name to monitor it for sometime in production without need of manifest update from extesnion side. + # So that they can get sense of resource usage for their extensions. This we did for few months and now we no logner monitoring it in production. + # But we are changing the config flag expiry time to future date in this test. So that test agent will start track the cgroups that is used by the service. + result = self._ssh_client.run_command(f"update-waagent-conf Debug.CgroupMonitorExpiryTime={expiry_time}", use_sudo=True) + log.info(result) + log.info("Updated agent cgroups config(CgroupMonitorExpiryTime)") + + def _install_ama(self): + ama_extension = VirtualMachineExtensionClient( + self._context.vm, VmExtensionIds.AzureMonitorLinuxAgent, + resource_name="AMAAgent") + log.info("Installing %s", ama_extension) + ama_extension.enable() + ama_extension.assert_instance_view() + + def _install_vmaccess(self): + # fetch the public key + public_key_file: Path = Path(self._context.private_key_file).with_suffix(".pub") + with public_key_file.open() as f: + public_key = f.read() + # Invoke the extension + vm_access = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.VmAccess, resource_name="VmAccess") + log.info("Installing %s", vm_access) + vm_access.enable( + protected_settings={ + 'username': self._context.username, + 'ssh_key': public_key, + 'reset_ssh': 'false' + } + ) + vm_access.assert_instance_view() + + def _install_gatest_extension(self): + gatest_extension = VirtualMachineExtensionClient( + self._context.vm, VmExtensionIds.GATestExtension, + resource_name="GATestExt") + log.info("Installing %s", gatest_extension) + gatest_extension.enable() + gatest_extension.assert_instance_view() + + + def _install_cse(self): + # Use custom script to output the cgroups assigned to it at runtime and save to /var/lib/waagent/tmp/custom_script_check. + script_contents = """ +mkdir /var/lib/waagent/tmp +cp /proc/$$/cgroup /var/lib/waagent/tmp/custom_script_check +""" + custom_script_2_0 = VirtualMachineExtensionClient( + self._context.vm, + VmExtensionIds.CustomScript, + resource_name="CustomScript") + + log.info("Installing %s", custom_script_2_0) + custom_script_2_0.enable( + protected_settings={ + 'commandToExecute': f"echo \'{script_contents}\' | bash" + } + ) + custom_script_2_0.assert_instance_view() + diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py new file mode 100644 index 0000000000..7eb3a9b1f7 --- /dev/null +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -0,0 +1,149 @@ +import os +import re + +from assertpy import assert_that, fail + +from azurelinuxagent.common.osutil import systemd +from azurelinuxagent.common.utils import shellutil +from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log + +BASE_CGROUP = '/sys/fs/cgroup' +AGENT_CGROUP_NAME = 'WALinuxAgent' +AGENT_SERVICE_NAME = systemd.get_agent_unit_name() +AGENT_CONTROLLERS = ['cpu', 'memory'] +EXT_CONTROLLERS = ['cpu', 'memory'] + +CGROUP_TRACKED_PATTERN = re.compile(r'Started tracking cgroup ([^\s]+)\s+\[(?P[^\s]+)\]') + +GATESTEXT_FULL_NAME = "Microsoft.Azure.Extensions.Edp.GATestExtGo" +GATESTEXT_SERVICE = "gatestext.service" +AZUREMONITOREXT_FULL_NAME = "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent" +AZUREMONITORAGENT_SERVICE = "azuremonitoragent.service" +MDSD_SERVICE = "mdsd.service" + + +def verify_if_distro_supports_cgroup(): + """ + checks if agent is running in a distro that supports cgroups + """ + log.info("===== Checking if distro supports cgroups") + + base_cgroup_fs_exists = os.path.exists(BASE_CGROUP) + + assert_that(base_cgroup_fs_exists).is_true().described_as("Cgroup file system:{0} not found in Distro {1}-{2}".format(BASE_CGROUP, DISTRO_NAME, DISTRO_VERSION)) + + log.info('Distro %s-%s supports cgroups\n', DISTRO_NAME, DISTRO_VERSION) + + +def print_cgroups(): + """ + log the mounted cgroups information + """ + log.info("====== Currently mounted cgroups ======") + for m in shellutil.run_command(['mount']).splitlines(): + # output is similar to + # mount + # sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime,seclabel) + # proc on /proc type proc (rw,nosuid,nodev,noexec,relatime) + # devtmpfs on /dev type devtmpfs (rw,nosuid,seclabel,size=1842988k,nr_inodes=460747,mode=755) + # cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd) + # cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,pids) + # cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,memory) + # cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,blkio) + # cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,seclabel,hugetlb) + if 'type cgroup' in m: + log.info('\t%s', m) + + +def print_service_status(): + log.info("====== Agent Service status ======") + output = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) + for line in output.splitlines(): + log.info("\t%s", line) + + +def get_agent_cgroup_mount_path(): + return os.path.join('/', 'azure.slice', AGENT_SERVICE_NAME) + + +def get_extension_cgroup_mount_path(extension_name): + return os.path.join('/', 'azure.slice/azure-vmextensions.slice', + "azure-vmextensions-" + extension_name + ".slice") + + +def get_unit_cgroup_mount_path(unit_name): + """ + Returns the cgroup mount path for the given unit + """ + output = shellutil.run_command(["systemctl", "show", unit_name, "--property", "ControlGroup"]) + # Output is similar to + # systemctl show walinuxagent.service --property ControlGroup + # ControlGroup=/azure.slice/walinuxagent.service + # matches above output and extract right side value + match = re.match("[^=]+=(?P.+)", output) + if match is not None: + return match.group('value') + return None + + +def verify_agent_cgroup_assigned_correctly(): + """ + This method checks agent is running and assigned to the correct cgroup using service status output + """ + log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd") + service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) + log.info("Agent service status output:\n%s", service_status) + is_active = False + is_cgroup_assigned = False + cgroup_mount_path = get_agent_cgroup_mount_path() + is_active_pattern = re.compile(r".*Active:\s+active.*") + + for line in service_status.splitlines(): + if re.match(is_active_pattern, line): + is_active = True + elif cgroup_mount_path in line: + is_cgroup_assigned = True + + if not is_active: + fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status)) + if not is_cgroup_assigned: + fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path)) + + log.info("Successfully verified the agent cgroup assigned correctly by systemd\n") + + +def get_agent_cpu_quota(): + """ + Returns the cpu quota for the agent service + """ + output = shellutil.run_command(["systemctl", "show", AGENT_SERVICE_NAME, "--property", "CPUQuotaPerSecUSec"]) + # Output is similar to + # systemctl show walinuxagent --property CPUQuotaPerSecUSec + # CPUQuotaPerSecUSec=infinity + match = re.match("[^=]+=(?P.+)", output) + if match is not None: + return match.group('value') + return None + + +def check_agent_quota_disabled(): + """ + Returns True if the cpu quota is infinity + """ + cpu_quota = get_agent_cpu_quota() + return cpu_quota == 'infinity' + + +def check_cgroup_disabled_with_unknown_process(): + """ + Returns True if the cgroup is disabled with unknown process + """ + for record in AgentLog().read(): + match = re.search("Disabling resource usage monitoring. Reason: Check on cgroups failed:.+UNKNOWN", + record.message, flags=re.DOTALL) + if match is not None: + log.info("Found message:\n\t%s", record.text.replace("\n", "\n\t")) + return True + return False diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 149d89ce3b..7bb067a835 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -63,3 +63,5 @@ class VmExtensionIds(object): RunCommandHandler: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.CPlat.Core', ext_type='RunCommandHandlerLinux', version="1.0") VmAccess: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.OSTCExtensions', ext_type='VMAccessForLinux', version="1.0") GuestAgentDcrTestExtension: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.Azure.TestExtensions.Edp', ext_type='GuestAgentDcrTest', version='1.0') + AzureMonitorLinuxAgent: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.Azure.Monitor', ext_type='AzureMonitorLinuxAgent', version="1.5") + GATestExtension: VmExtensionIdentifier = VmExtensionIdentifier(publisher='Microsoft.Azure.Extensions.Edp', ext_type='GATestExtGo', version="1.2") diff --git a/tests_e2e/tests/lib/remote_test.py b/tests_e2e/tests/lib/remote_test.py index c5bf979f01..ad71ae69b3 100644 --- a/tests_e2e/tests/lib/remote_test.py +++ b/tests_e2e/tests/lib/remote_test.py @@ -27,7 +27,7 @@ ERROR_EXIT_CODE = 200 -def run_remote_test(test_method: Callable[[], int]) -> None: +def run_remote_test(test_method: Callable[[], None]) -> None: """ Helper function to run a remote test; implements coding conventions for remote tests, e.g. error message goes to stderr, test log goes to stdout, etc. diff --git a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py new file mode 100755 index 0000000000..2f3b877a0b --- /dev/null +++ b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py @@ -0,0 +1,115 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import re + +from assertpy import fail + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.cgroup_helpers import BASE_CGROUP, AGENT_CONTROLLERS, get_agent_cgroup_mount_path, \ + AGENT_SERVICE_NAME, verify_if_distro_supports_cgroup, print_cgroups, \ + verify_agent_cgroup_assigned_correctly +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test + + +def verify_if_cgroup_controllers_are_mounted(): + """ + Checks if controllers CPU, Memory that agent use are mounted in the system + """ + log.info("===== Verifying cgroup controllers that agent use are mounted in the system") + + all_controllers_present = os.path.exists(BASE_CGROUP) + missing_controllers = [] + mounted_controllers = [] + + for controller in AGENT_CONTROLLERS: + controller_path = os.path.join(BASE_CGROUP, controller) + if not os.path.exists(controller_path): + all_controllers_present = False + missing_controllers.append(controller_path) + else: + mounted_controllers.append(controller_path) + + if not all_controllers_present: + fail('Not all of the controllers {0} mounted in expected cgroups. Mounted controllers are: {1}.\n ' + 'Missing controllers are: {2} \n System mounted cgroups are:\n{3}'.format(AGENT_CONTROLLERS, mounted_controllers, missing_controllers, print_cgroups())) + + log.info('Verified all cgroup controllers are present.\n {0}'.format(mounted_controllers)) + + +def verify_agent_cgroup_created_on_file_system(): + """ + Checks agent service is running in azure.slice/{agent_service) cgroup and mounted in same system cgroup controllers mounted path + """ + log.info("===== Verifying the agent cgroup paths exist on file system") + agent_cgroup_mount_path = get_agent_cgroup_mount_path() + all_agent_cgroup_controllers_path_exist = True + missing_agent_cgroup_controllers_path = [] + verified_agent_cgroup_controllers_path = [] + + log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) + + for controller in AGENT_CONTROLLERS: + agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) + + if not os.path.exists(agent_controller_path): + all_agent_cgroup_controllers_path_exist = False + missing_agent_cgroup_controllers_path.append(agent_controller_path) + else: + verified_agent_cgroup_controllers_path.append(agent_controller_path) + + if not all_agent_cgroup_controllers_path_exist: + fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path)) + + log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path)) + + +def verify_agent_cgroups_tracked(): + """ + Checks if agent is tracking agent cgroups path for polling resource usage. This is verified by checking the agent log for the message "Started tracking cgroup" + """ + log.info("===== Verifying agent started tracking cgroups from the log") + + tracking_agent_cgroup_message_re = r'Started tracking cgroup [^\s]+\s+\[(?P[^\s]+)\]' + tracked_cgroups = [] + + for record in AgentLog().read(): + match = re.search(tracking_agent_cgroup_message_re, record.message) + if match is not None: + tracked_cgroups.append(match.group('path')) + + for controller in AGENT_CONTROLLERS: + if not any(AGENT_SERVICE_NAME in cgroup_path and controller in cgroup_path for cgroup_path in tracked_cgroups): + fail('Agent {0} is not being tracked. Tracked cgroups:{1}'.format(controller, tracked_cgroups)) + + log.info("Agent is tracking cgroups correctly.\n%s", tracked_cgroups) + + +def main(): + verify_if_distro_supports_cgroup() + + verify_if_cgroup_controllers_are_mounted() + verify_agent_cgroup_created_on_file_system() + + verify_agent_cgroup_assigned_correctly() + verify_agent_cgroups_tracked() + + +run_remote_test(main) diff --git a/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py b/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py new file mode 100755 index 0000000000..48bd3f902e --- /dev/null +++ b/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py @@ -0,0 +1,224 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from assertpy import fail + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.cgroup_helpers import verify_if_distro_supports_cgroup, \ + verify_agent_cgroup_assigned_correctly, BASE_CGROUP, EXT_CONTROLLERS, get_unit_cgroup_mount_path, \ + GATESTEXT_SERVICE, AZUREMONITORAGENT_SERVICE, MDSD_SERVICE, check_agent_quota_disabled, \ + check_cgroup_disabled_with_unknown_process, CGROUP_TRACKED_PATTERN, AZUREMONITOREXT_FULL_NAME, GATESTEXT_FULL_NAME, \ + print_cgroups +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test + + +def verify_custom_script_cgroup_assigned_correctly(): + """ + This method verifies that the CSE script is created expected folder after install and also checks if CSE ran under the expected cgroups + """ + log.info("===== Verifying custom script was assigned to the correct cgroups") + + # CSE creates this folder to save the output of cgroup information where the CSE script was executed. Since CSE process exits after execution, + # and cgroup paths gets cleaned up by the system, so this information saved at run time when the extension executed. + check_temporary_folder_exists() + + cpu_mounted = False + memory_mounted = False + + log.info("custom script cgroup mounts:") + + with open('/var/lib/waagent/tmp/custom_script_check') as fh: + controllers = fh.read() + log.info("%s", controllers) + + extension_path = "/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.Azure.Extensions.CustomScript" + + correct_cpu_mount_v1 = "cpu,cpuacct:{0}".format(extension_path) + correct_cpu_mount_v2 = "cpuacct,cpu:{0}".format(extension_path) + + correct_memory_mount = "memory:{0}".format(extension_path) + + for mounted_controller in controllers.split("\n"): + if correct_cpu_mount_v1 in mounted_controller or correct_cpu_mount_v2 in mounted_controller: + log.info('Custom script extension mounted under correct cgroup ' + 'for CPU: %s', mounted_controller) + cpu_mounted = True + elif correct_memory_mount in mounted_controller: + log.info('Custom script extension mounted under correct cgroup ' + 'for Memory: %s', mounted_controller) + memory_mounted = True + + if not cpu_mounted: + fail('Custom script not mounted correctly for CPU! Expected {0} or {1}'.format(correct_cpu_mount_v1, correct_cpu_mount_v2)) + + if not memory_mounted: + fail('Custom script not mounted correctly for Memory! Expected {0}'.format(correct_memory_mount)) + + +def check_temporary_folder_exists(): + tmp_folder = "/var/lib/waagent/tmp" + if not os.path.exists(tmp_folder): + fail("Temporary folder {0} was not created which means CSE script did not run!".format(tmp_folder)) + + +def verify_ext_cgroup_controllers_created_on_file_system(): + """ + This method ensure that extension cgroup controllers are created on file system after extension install + """ + log.info("===== Verifying ext cgroup controllers exist on file system") + + all_controllers_present = os.path.exists(BASE_CGROUP) + missing_controllers_path = [] + verified_controllers_path = [] + + for controller in EXT_CONTROLLERS: + controller_path = os.path.join(BASE_CGROUP, controller) + if not os.path.exists(controller_path): + all_controllers_present = False + missing_controllers_path.append(controller_path) + else: + verified_controllers_path.append(controller_path) + + if not all_controllers_present: + fail('Expected all of the extension controller: {0} paths present in the file system after extension install. But missing cgroups paths are :{1}\n' + 'and verified cgroup paths are: {2} \nSystem mounted cgroups are \n{3}'.format(EXT_CONTROLLERS, missing_controllers_path, verified_controllers_path, print_cgroups())) + + log.info('Verified all extension cgroup controller paths are present and they are: \n {0}'.format(verified_controllers_path)) + + +def verify_extension_service_cgroup_created_on_file_system(): + """ + This method ensure that extension service cgroup paths are created on file system after running extension + """ + log.info("===== Verifying the extension service cgroup paths exist on file system") + + # GA Test Extension Service + gatestext_cgroup_mount_path = get_unit_cgroup_mount_path(GATESTEXT_SERVICE) + verify_extension_service_cgroup_created(GATESTEXT_SERVICE, gatestext_cgroup_mount_path) + + # Azure Monitor Extension Service + azuremonitoragent_cgroup_mount_path = get_unit_cgroup_mount_path(AZUREMONITORAGENT_SERVICE) + azuremonitoragent_service_name = AZUREMONITORAGENT_SERVICE + # Old versions of AMA extension has different service name + if azuremonitoragent_cgroup_mount_path is None: + azuremonitoragent_cgroup_mount_path = get_unit_cgroup_mount_path(MDSD_SERVICE) + azuremonitoragent_service_name = MDSD_SERVICE + verify_extension_service_cgroup_created(azuremonitoragent_service_name, azuremonitoragent_cgroup_mount_path) + + log.info('Verified all extension service cgroup paths created in file system .\n') + + +def verify_extension_service_cgroup_created(service_name, cgroup_mount_path): + log.info("expected extension service cgroup mount path: %s", cgroup_mount_path) + + all_controllers_present = True + missing_cgroups_path = [] + verified_cgroups_path = [] + + for controller in EXT_CONTROLLERS: + # cgroup_mount_path is similar to /azure.slice/walinuxagent.service + # cgroup_mount_path[1:] = azure.slice/walinuxagent.service + # expected extension_service_controller_path similar to /sys/fs/cgroup/cpu/azure.slice/walinuxagent.service + extension_service_controller_path = os.path.join(BASE_CGROUP, controller, cgroup_mount_path[1:]) + + if not os.path.exists(extension_service_controller_path): + all_controllers_present = False + missing_cgroups_path.append(extension_service_controller_path) + else: + verified_cgroups_path.append(extension_service_controller_path) + + if not all_controllers_present: + fail("Extension service: [{0}] cgroup paths couldn't be found on file system. Missing cgroup paths are: {1} \n Verified cgroup paths are: {2} \n " + "System mounted cgroups are \n{3}".format(service_name, missing_cgroups_path, verified_cgroups_path, print_cgroups())) + + +def verify_ext_cgroups_tracked(): + """ + Checks if ext cgroups are tracked by the agent. This is verified by checking the agent log for the message "Started tracking cgroup {extension_name}" + """ + log.info("===== Verifying ext cgroups tracked") + + cgroups_added_for_telemetry = [] + gatestext_cgroups_tracked = False + azuremonitoragent_cgroups_tracked = False + gatestext_service_cgroups_tracked = False + azuremonitoragent_service_cgroups_tracked = False + + for record in AgentLog().read(): + + # Cgroup tracking logged as + # 2021-11-14T13:09:59.351961Z INFO ExtHandler ExtHandler Started tracking cgroup Microsoft.Azure.Extensions.Edp.GATestExtGo-1.0.0.2 + # [/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.Azure.Extensions.Edp.GATestExtGo_1.0.0.2.slice] + cgroup_tracked_match = CGROUP_TRACKED_PATTERN.findall(record.message) + if len(cgroup_tracked_match) != 0: + name, path = cgroup_tracked_match[0][0], cgroup_tracked_match[0][1] + if name.startswith(GATESTEXT_FULL_NAME): + gatestext_cgroups_tracked = True + elif name.startswith(AZUREMONITOREXT_FULL_NAME): + azuremonitoragent_cgroups_tracked = True + elif name.startswith(GATESTEXT_SERVICE): + gatestext_service_cgroups_tracked = True + elif name.startswith(AZUREMONITORAGENT_SERVICE) or name.startswith(MDSD_SERVICE): + azuremonitoragent_service_cgroups_tracked = True + cgroups_added_for_telemetry.append((name, path)) + + # agent, gatest extension, azuremonitor extension and extension service cgroups + if len(cgroups_added_for_telemetry) < 1: + fail('Expected cgroups were not tracked, according to the agent log. ' + 'Pattern searched for: {0} and found \n{1}'.format(CGROUP_TRACKED_PATTERN.pattern, cgroups_added_for_telemetry)) + + if not gatestext_cgroups_tracked: + fail('Expected gatestext cgroups were not tracked, according to the agent log. ' + 'Pattern searched for: {0} and found \n{1}'.format(CGROUP_TRACKED_PATTERN.pattern, cgroups_added_for_telemetry)) + + if not azuremonitoragent_cgroups_tracked: + fail('Expected azuremonitoragent cgroups were not tracked, according to the agent log. ' + 'Pattern searched for: {0} and found \n{1}'.format(CGROUP_TRACKED_PATTERN.pattern, cgroups_added_for_telemetry)) + + if not gatestext_service_cgroups_tracked: + fail('Expected gatestext service cgroups were not tracked, according to the agent log. ' + 'Pattern searched for: {0} and found \n{1}'.format(CGROUP_TRACKED_PATTERN.pattern, cgroups_added_for_telemetry)) + + if not azuremonitoragent_service_cgroups_tracked: + fail('Expected azuremonitoragent service cgroups were not tracked, according to the agent log. ' + 'Pattern searched for: {0} and found \n{1}'.format(CGROUP_TRACKED_PATTERN.pattern, cgroups_added_for_telemetry)) + + log.info("Extension cgroups tracked as expected\n%s", cgroups_added_for_telemetry) + + +def main(): + verify_if_distro_supports_cgroup() + verify_ext_cgroup_controllers_created_on_file_system() + verify_custom_script_cgroup_assigned_correctly() + verify_agent_cgroup_assigned_correctly() + verify_extension_service_cgroup_created_on_file_system() + verify_ext_cgroups_tracked() + + +try: + run_remote_test(main) +except Exception as e: + # It is possible that agent cgroup can be disabled due to UNKNOWN process or throttled before we run this check, in that case, we should ignore the validation + if check_agent_quota_disabled() and check_cgroup_disabled_with_unknown_process(): + log.info("Cgroup is disabled due to UNKNOWN process, ignoring ext cgroups validations") + else: + raise From d8b3c3b429d6c360edaf6f10ac0b53c9966115e8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 22 Aug 2023 10:01:47 -0700 Subject: [PATCH 061/240] agent firewall scenario (#2879) * agent firewall scenario * address comments * improved logging * pylint warn * address comments * updated * address comments * pylint warning * pylint warning * address comment * merge conflicts --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/agent_firewall.yml | 15 + .../tests/agent_firewall/agent_firewall.py | 42 ++ .../tests/lib/add_network_security_group.py | 10 +- tests_e2e/tests/lib/retry.py | 4 +- ...gent_firewall-verify_all_firewall_rules.py | 473 ++++++++++++++++++ 6 files changed, 538 insertions(+), 8 deletions(-) create mode 100644 tests_e2e/test_suites/agent_firewall.yml create mode 100644 tests_e2e/tests/agent_firewall/agent_firewall.py create mode 100755 tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 5b18c83f14..016f79546e 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/agent_firewall.yml b/tests_e2e/test_suites/agent_firewall.yml new file mode 100644 index 0000000000..0e095ba39e --- /dev/null +++ b/tests_e2e/test_suites/agent_firewall.yml @@ -0,0 +1,15 @@ +# +# This test verifies that the agent firewall rules are set correctly. The expected firewall rules are: +# 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 tcp dpt:53 +# 0 0 ACCEPT tcp -- * * 0.0.0.0/0 168.63.129.16 owner UID match 0 +# 0 0 DROP tcp -- * * 0.0.0.0/0 168.63.129.16 ctstate INVALID,NEW +# The first rule allows tcp traffic to port 53 for non root user. The second rule allows traffic to wireserver for root user. +# The third rule drops all other traffic to wireserver. +# +name: "AgentFirewall" +tests: + - "agent_firewall/agent_firewall.py" +images: + - "endorsed" + - "endorsed-arm64" +owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. \ No newline at end of file diff --git a/tests_e2e/tests/agent_firewall/agent_firewall.py b/tests_e2e/tests/agent_firewall/agent_firewall.py new file mode 100644 index 0000000000..804443a470 --- /dev/null +++ b/tests_e2e/tests/agent_firewall/agent_firewall.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.logging import log + + +class AgentFirewall(AgentTest): + """ + This test verifies the agent firewall rules are added properly. It checks each firewall rule is present and working as expected. + """ + + def __init__(self, context: AgentTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def run(self): + log.info("Checking iptable rules added by the agent") + self._run_remote_test(f"agent_firewall-verify_all_firewall_rules.py --user {self._context.username}", use_sudo=True) + log.info("Successfully verified all rules present and working as expected.") + + +if __name__ == "__main__": + AgentFirewall.run_from_command_line() + + diff --git a/tests_e2e/tests/lib/add_network_security_group.py b/tests_e2e/tests/lib/add_network_security_group.py index 2c65e27f9e..28cf69b59f 100644 --- a/tests_e2e/tests/lib/add_network_security_group.py +++ b/tests_e2e/tests/lib/add_network_security_group.py @@ -16,10 +16,10 @@ # import json -import http.client from typing import Any, Dict, List +from azurelinuxagent.common.utils import shellutil from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate @@ -140,10 +140,10 @@ def _my_ip_address(self) -> str: """ if self.__my_ip_address is None: def get_my_address(): - connection = http.client.HTTPSConnection("ifconfig.io") - connection.request("GET", "/ip") - response = connection.getresponse() - return response.read().decode().strip() + # Forcing -4 option to fetch the ipv4 address + cmd = ["curl", "-4", "ifconfig.io/ip"] + stdout = shellutil.run_command(cmd) + return stdout.strip() self.__my_ip_address = retry(get_my_address, attempts=3, delay=10) return self.__my_ip_address diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index eec6e08a0b..3996b3ba3e 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -72,9 +72,9 @@ def retry_if_false(operation: Callable[[], bool], attempts: int = 5, delay: int log.warning("Error in operation: %s", e) if attempts == 0: raise - if not success: + if not success and attempts != 0: log.info("Current operation failed, retrying in %s secs.", delay) - time.sleep(delay) + time.sleep(delay) return success diff --git a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py new file mode 100755 index 0000000000..3af6bf69e1 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py @@ -0,0 +1,473 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script checks all agent firewall rules added properly and working as expected +# +import argparse +import os +import pwd +import socket +from typing import List, Tuple + +from assertpy import fail + +from azurelinuxagent.common.utils import shellutil +from azurelinuxagent.common.utils.shellutil import CommandError +from azurelinuxagent.common.utils.textutil import format_exception +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test +import http.client as httpclient + +from tests_e2e.tests.lib.retry import retry_if_false, retry + +ROOT_USER = 'root' +WIRESERVER_ENDPOINT_FILE = '/var/lib/waagent/WireServerEndpoint' +WIRESERVER_IP = '168.63.129.16' +VERSIONS_PATH = '/?comp=versions' +FIREWALL_PERIOD = 60 + + +class FirewallRules(object): + # -D deletes the specific rule in the iptable chain + DELETE_COMMAND = "-D" + + # -C checks if a specific rule exists + CHECK_COMMAND = "-C" + + +def get_wireserver_ip() -> str: + try: + with open(WIRESERVER_ENDPOINT_FILE, 'r') as f: + wireserver_ip = f.read() + except Exception: + wireserver_ip = WIRESERVER_IP + return wireserver_ip + + +def switch_user(user: str) -> None: + """ + This function switches the function to a given user + """ + try: + uid = pwd.getpwnam(user)[2] + log.info("uid:%s and user name:%s", uid, user) + os.seteuid(uid) + except Exception as e: + raise Exception("Error -- failed to switch user to {0} : Failed with exception {1}".format(user, e)) + + +def get_root_accept_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', + 'owner', + '--uid-owner', + '0', '-j', 'ACCEPT', '-w'] + + +def get_non_root_accept_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', + '--destination-port', '53', '-j', + 'ACCEPT', '-w'] + + +def get_non_root_drop_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', + 'conntrack', '--ctstate', + 'INVALID,NEW', '-j', 'DROP', '-w'] + + +def execute_cmd(cmd: List[str]): + """ + Note: The shellutil.run_command return stdout if exit_code=0, otherwise returns commanderror + """ + try: + stdout = shellutil.run_command(cmd) + except CommandError as e: + return e.returncode, e.stdout, e.stderr + return 0, stdout, "" + + +def check_if_iptable_rule_is_available(full_command: List[str]) -> bool: + """ + This function is used to check if given rule is present in iptable rule set + "-C" return exit code 0 if the rule is available. + """ + exit_code, _, _ = execute_cmd(full_command) + if exit_code == 0: + return True + return False + + +def print_current_iptable_rules(): + """ + This function prints the current iptable rules + """ + try: + cmd = ['sudo', 'iptables', '-L', 'OUTPUT', '-t', 'security', '-nxv'] + exit_code, stdout, stderr = execute_cmd(cmd) + if exit_code != 0: + log.warning("Warning -- Failed to fetch the ip table rules with error code: %s and error: %s", exit_code, stderr) + else: + for line in stdout.splitlines(): + log.info(str(line)) + except Exception as error: + raise Exception("Error -- Failed to fetch the ip table rule set {0}".format(error)) + + +def get_all_iptable_rule_commands(command: str) -> Tuple[List[str], List[str], List[str]]: + return get_root_accept_rule_command(command), get_non_root_accept_rule_command(command), get_non_root_drop_rule_command(command) + + +def verify_all_rules_exist() -> None: + """ + This function is used to verify all the iptable rules are present in the rule set + """ + def check_all_iptables() -> bool: + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(FirewallRules.CHECK_COMMAND) + found: bool = check_if_iptable_rule_is_available(root_accept) and check_if_iptable_rule_is_available( + non_root_accept) and check_if_iptable_rule_is_available(non_root_drop) + return found + + log.info("-----Verifying all ip table rules are present in rule set") + # Agent will re-add rules within OS.EnableFirewallPeriod, So waiting that time + some buffer + found: bool = retry_if_false(check_all_iptables, attempts=2, delay=FIREWALL_PERIOD+30) + + if not found: + fail("IP table rules missing in rule set.\n Current iptable rules:\n {0}".format( + print_current_iptable_rules())) + + log.info("verified All ip table rules are present in rule set") + + +def verify_rules_deleted_successfully(commands: List[List[str]] = None) -> None: + """ + This function is used to verify if provided rule or all(if not specified) iptable rules are deleted successfully. + """ + log.info("-----Verifying requested rules deleted successfully") + + if commands is None: + commands = [] + + if not commands: + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands("-C") + commands.extend([root_accept, non_root_accept, non_root_drop]) + + # "-C" return error code 1 when not available which is expected after deletion + for command in commands: + if not check_if_iptable_rule_is_available(command): + pass + else: + raise Exception("Deletion of ip table rules not successful\n.Current ip table rules:\n" + print_current_iptable_rules()) + + log.info("ip table rules deleted successfully \n %s", commands) + + +def delete_iptable_rules(commands: List[List[str]] = None) -> None: + """ + This function is used to delete the provided rule or all(if not specified) iptable rules + """ + if commands is None: + commands = [] + if not commands: + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(FirewallRules.CHECK_COMMAND) + commands.extend([root_accept, non_root_accept, non_root_drop]) + + log.info("-----Deleting ip table rules \n %s", commands) + + try: + cmd = None + for command in commands: + cmd = command + retry(lambda: execute_cmd(cmd=cmd), attempts=3) + except Exception as e: + raise Exception("Error -- Failed to Delete the ip table rule set {0}".format(e)) + + log.info("Success --Deletion of ip table rule") + + +def verify_dns_tcp_to_wireserver_is_allowed(user: str) -> None: + """ + This function is used to verify if tcp to wireserver is allowed for the given user + """ + log.info("-----Verifying DNS tcp to wireserver is allowed") + switch_user(user) + try: + socket.create_connection((get_wireserver_ip(), 53), timeout=30) + except Exception as e: + raise Exception( + "Error -- while using DNS TCP request as user:({0}), make sure the firewall rules are set correctly {1}".format(user, + e)) + + log.info("Success -- can connect to wireserver port 53 using TCP as a user:(%s)", user) + + +def verify_dns_tcp_to_wireserver_is_blocked(user: str) -> None: + """ + This function is used to verify if tcp to wireserver is blocked for given user + """ + log.info("-----Verifying DNS tcp to wireserver is blocked") + switch_user(user) + try: + socket.create_connection((get_wireserver_ip(), 53), timeout=10) + raise Exception("Error -- unprivileged user:({0}) could connect to wireserver port 53 using TCP".format(user)) + except Exception as e: + # Expected timeout if unprivileged user reaches wireserver + if isinstance(e, socket.timeout): + log.info("Success -- unprivileged user:(%s) access to wireserver port 53 using TCP is blocked", user) + else: + raise Exception("Unexpected error while connecting to wireserver: {0}".format(format_exception(e))) + + +def verify_http_to_wireserver_blocked(user: str) -> None: + """ + This function is used to verify if http to wireserver is blocked for the given user + """ + log.info("-----Verifying http request to wireserver is blocked") + switch_user(user) + try: + client = httpclient.HTTPConnection(get_wireserver_ip(), timeout=10) + except Exception as e: + raise Exception("Error -- failed to create HTTP connection with user: {0} \n {1}".format(user, e)) + + try: + blocked = False + client.request('GET', VERSIONS_PATH) + except Exception as e: + # if we get timeout exception, it means the request is blocked + if isinstance(e, socket.timeout): + blocked = True + else: + raise Exception("Unexpected error while connecting to wireserver: {0}".format(format_exception(e))) + + if not blocked: + raise Exception("Error -- unprivileged user:({0}) could connect to wireserver, make sure the firewall rules are set correctly".format(user)) + + log.info("Success -- unprivileged user:(%s) access to wireserver is blocked", user) + + +def verify_http_to_wireserver_allowed(user: str) -> None: + """ + This function is used to verify if http to wireserver is allowed for the given user + """ + log.info("-----Verifying http request to wireserver is allowed") + switch_user(user) + try: + client = httpclient.HTTPConnection(get_wireserver_ip(), timeout=30) + except Exception as e: + raise Exception("Error -- failed to create HTTP connection with user:{0} \n {1}".format(user, e)) + + try: + client.request('GET', VERSIONS_PATH) + except Exception as e: + # if we get exception, it means the request is failed to connect + raise Exception("Error -- unprivileged user:({0}) access to wireserver failed:\n {1}".format(user, e)) + + log.info("Success -- privileged user:(%s) access to wireserver is allowed", user) + + +def verify_non_root_accept_rule(): + """ + This function verifies the non root accept rule and make sure it is re added by agent after deletion + """ + log.info("-----Verifying non root accept rule behavior") + log.info("Before deleting the non root accept rule , ensure a non root user can do a tcp to wireserver but cannot do a http request") + verify_dns_tcp_to_wireserver_is_allowed(NON_ROOT_USER) + verify_http_to_wireserver_blocked(NON_ROOT_USER) + + # switch to root user required to stop the agent + switch_user(ROOT_USER) + # stop the agent, so that it won't re-add rules while checking + log.info("Stop Guest Agent service") + # agent-service is script name and stop is argument + stop_agent = ["agent-service", "stop"] + shellutil.run_command(stop_agent) + + # deleting non root accept rule + non_root_accept_delete_cmd = get_non_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([non_root_accept_delete_cmd]) + # verifying deletion successful + non_root_accept_check_cmd = get_non_root_accept_rule_command(FirewallRules.CHECK_COMMAND) + verify_rules_deleted_successfully([non_root_accept_check_cmd]) + + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + log.info("After deleting the non root accept rule , ensure a non root user cannot do a tcp to wireserver request") + verify_dns_tcp_to_wireserver_is_blocked(NON_ROOT_USER) + + switch_user(ROOT_USER) + # restart the agent to re-add the deleted rules + log.info("Restart Guest Agent service to re-add the deleted rules") + # agent-service is script name and start is argument + start_agent = ["agent-service", "start"] + shellutil.run_command(start_agent) + + verify_all_rules_exist() + log.info("** Current IP table rules \n") + print_current_iptable_rules() + + log.info("After appending the rule back , ensure a non root user can do a tcp to wireserver but cannot do a http request\n") + verify_dns_tcp_to_wireserver_is_allowed(NON_ROOT_USER) + verify_http_to_wireserver_blocked(NON_ROOT_USER) + + log.info("Ensuring missing rules are re-added by the running agent") + # deleting non root accept rule + non_root_accept_delete_cmd = get_non_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([non_root_accept_delete_cmd]) + + verify_all_rules_exist() + log.info("** Current IP table rules \n") + print_current_iptable_rules() + + log.info("non root accept rule verified successfully\n") + + +def verify_root_accept_rule(): + """ + This function verifies the root accept rule and make sure it is re added by agent after deletion + """ + log.info("-----Verifying root accept rule behavior") + log.info("Before deleting the root accept rule , ensure a root user can do a http request but non root user cannot") + verify_http_to_wireserver_allowed(ROOT_USER) + verify_http_to_wireserver_blocked(NON_ROOT_USER) + + # switch to root user required to stop the agent + switch_user(ROOT_USER) + # stop the agent, so that it won't re-add rules while checking + log.info("Stop Guest Agent service") + # agent-service is script name and stop is argument + stop_agent = ["agent-service", "stop"] + shellutil.run_command(stop_agent) + + # deleting root accept rule + root_accept_delete_cmd = get_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + # deleting drop rule too otherwise after restart, the daemon will go into loop since it cannot connect to wireserver. This would block the agent initialization + drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([root_accept_delete_cmd, drop_delete_cmd]) + # verifying deletion successful + root_accept_check_cmd = get_root_accept_rule_command(FirewallRules.CHECK_COMMAND) + drop_check_cmd = get_non_root_drop_rule_command(FirewallRules.CHECK_COMMAND) + verify_rules_deleted_successfully([root_accept_check_cmd, drop_check_cmd]) + + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + # restart the agent to re-add the deleted rules + log.info("Restart Guest Agent service to re-add the deleted rules") + # agent-service is script name and start is argument + start_agent = ["agent-service", "start"] + shellutil.run_command(start_agent) + + verify_all_rules_exist() + log.info("** Current IP table rules \n") + print_current_iptable_rules() + + log.info("After appending the rule back , ensure a root user can do a http request but non root user cannot") + verify_dns_tcp_to_wireserver_is_allowed(NON_ROOT_USER) + verify_http_to_wireserver_blocked(NON_ROOT_USER) + verify_http_to_wireserver_allowed(ROOT_USER) + + log.info("Ensuring missing rules are re-added by the running agent") + # deleting root accept rule + root_accept_delete_cmd = get_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([root_accept_delete_cmd]) + + verify_all_rules_exist() + log.info("** Current IP table rules \n") + print_current_iptable_rules() + + log.info("root accept rule verified successfully\n") + + +def verify_non_root_dcp_rule(): + """ + This function verifies drop rule and make sure it is re added by agent after deletion + """ + log.info("-----Verifying non root drop rule behavior") + # switch to root user required to stop the agent + switch_user(ROOT_USER) + # stop the agent, so that it won't re-add rules while checking + log.info("Stop Guest Agent service") + # agent-service is script name and stop is argument + stop_agent = ["agent-service", "stop"] + shellutil.run_command(stop_agent) + + # deleting non root delete rule + non_root_drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([non_root_drop_delete_cmd]) + # verifying deletion successful + non_root_drop_check_cmd = get_non_root_drop_rule_command(FirewallRules.CHECK_COMMAND) + verify_rules_deleted_successfully([non_root_drop_check_cmd]) + + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + log.info("After deleting the non root drop rule, ensure a non root user can do http request to wireserver") + verify_http_to_wireserver_allowed(NON_ROOT_USER) + + # restart the agent to re-add the deleted rules + log.info("Restart Guest Agent service to re-add the deleted rules") + # agent-service is script name and start is argument + start_agent = ["agent-service", "start"] + shellutil.run_command(start_agent) + + verify_all_rules_exist() + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + log.info("After appending the rule back , ensure a non root user can do a tcp to wireserver but cannot do a http request") + verify_dns_tcp_to_wireserver_is_allowed(NON_ROOT_USER) + verify_http_to_wireserver_blocked(NON_ROOT_USER) + verify_http_to_wireserver_allowed(ROOT_USER) + + log.info("Ensuring missing rules are re-added by the running agent") + # deleting non root delete rule + non_root_drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + delete_iptable_rules([non_root_drop_delete_cmd]) + + verify_all_rules_exist() + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + log.info("non root drop rule verified successfully\n") + + +def prepare_agent(): + log.info("Executing script update-waagent-conf to enable agent firewall config flag") + # Changing the firewall period from default 5 mins to 1 min, so that test won't wait for that long to verify rules + shellutil.run_command(["update-waagent-conf", "OS.EnableFirewall=y", f"OS.EnableFirewallPeriod={FIREWALL_PERIOD}"]) + log.info("Successfully enabled agent firewall config flag") + + +def main(): + prepare_agent() + log.info("** Current IP table rules\n") + print_current_iptable_rules() + + verify_all_rules_exist() + + verify_non_root_accept_rule() + verify_root_accept_rule() + verify_non_root_dcp_rule() + + +parser = argparse.ArgumentParser() +parser.add_argument('-u', '--user', required=True, help="Non root user") +args = parser.parse_args() +NON_ROOT_USER = args.user +run_remote_test(main) + From fa0e2cfecc0896d4c047dc23cf1f167c51fd00f0 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 22 Aug 2023 13:10:33 -0700 Subject: [PATCH 062/240] Add retry and improve the log messages in agent update test (#2890) * add retry * improve log messages * merge conflicts --- tests_e2e/tests/agent_update/rsm_update.py | 57 ++++++++++--------- ...ate-verify_agent_reported_update_status.py | 21 ++++--- ...ate-verify_versioning_supported_feature.py | 20 ++++--- .../scripts/agent_update-wait_for_rsm_gs.py | 23 ++++---- 4 files changed, 64 insertions(+), 57 deletions(-) diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index eeb287f33b..bb6dcc6a7a 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -27,7 +27,7 @@ from typing import List, Dict, Any import requests -from assertpy import assert_that +from assertpy import assert_that, fail from azure.identity import DefaultAzureCredential from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud @@ -123,10 +123,10 @@ def run(self) -> None: self._verify_agent_reported_update_status(version) def _check_rsm_gs(self, requested_version: str) -> None: - # This checks if RSM GS available to the agent after we mock the rsm update request - log.info('Verifying latest GS includes requested version available to the agent') - output = self._ssh_client.run_command(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) - log.info('Verified latest GS includes requested version available to the agent. \n%s', output) + # This checks if RSM GS available to the agent after we send the rsm update request + log.info('Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') + self._run_remote_test(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) + log.info('Verified latest GS contain requested version after rsm update requested') def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: """ @@ -134,13 +134,13 @@ def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - log.info('Modifying agent installed version') - output = self._ssh_client.run_command(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) - log.info('Updated agent installed version \n%s', output) + log.info('Executing modify_agent_version remote script to update agent installed version to lower than requested version') + self._run_remote_test(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) + log.info('Successfully updated agent installed version') if update_config: - log.info('Modifying agent update config flags') - output = self._ssh_client.run_command("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) - log.info('updated agent update required config \n%s', output) + log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') + self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Successfully updated agent update config') @staticmethod def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: @@ -161,6 +161,7 @@ def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None: } } } + log.info("updating the vm with osProfile property:\n%s", osprofile) vm.update(osprofile) def _request_rsm_update(self, requested_version: str) -> None: @@ -173,7 +174,7 @@ def _request_rsm_update(self, requested_version: str) -> None: # enable the flag log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") self._enable_agent_update_flag(vm) - log.info("Set the enableVMAgentPlatformUpdates flag to True") + log.info("Updated the enableVMAgentPlatformUpdates flag to True") else: log.info("Already enableVMAgentPlatformUpdates flag set to True") @@ -190,29 +191,33 @@ def _request_rsm_update(self, requested_version: str) -> None: "targetVersion": requested_version } + log.info("Attempting rsm upgrade post request to endpoint: {0} with data: {1}".format(url, data)) response = requests.post(url, data=json.dumps(data), headers=headers) if response.status_code == 202: log.info("RSM upgrade request accepted") else: - raise Exception("Error occurred while RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content)) + raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content)) def _verify_guest_agent_update(self, requested_version: str) -> None: """ Verify current agent version running on rsm requested version """ def _check_agent_version(requested_version: str) -> bool: - stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) expected_version = f"Goal state agent: {requested_version}" - if expected_version in stdout: + if expected_version in waagent_version: return True else: - raise Exception("Guest agent didn't update to requested version {0} but found \n {1}. \n " - "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, stdout)) + return False - log.info("Verifying agent updated to requested version") - retry_if_false(lambda: _check_agent_version(requested_version)) - stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}") + waagent_version: str = "" + log.info("Verifying agent updated to requested version: {0}".format(requested_version)) + success: bool = retry_if_false(lambda: _check_agent_version(requested_version)) + if not success: + fail("Guest agent didn't update to requested version {0} but found \n {1}. \n " + "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, waagent_version)) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info(f"Successfully verified agent updated to requested version. Current agent version running:\n {waagent_version}") def _verify_no_guest_agent_update(self, version: str) -> None: """ @@ -228,17 +233,17 @@ def _verify_agent_reported_supported_feature_flag(self): RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log """ - log.info("Verifying agent reported supported feature flag") - self._ssh_client.run_command("agent_update-verify_versioning_supported_feature.py", use_sudo=True) - log.info("Agent reported VersioningGovernance supported feature flag") + log.info("Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") + self._run_remote_test("agent_update-verify_versioning_supported_feature.py", use_sudo=True) + log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag") def _verify_agent_reported_update_status(self, version: str): """ Verify if the agent reported update status to CRP after update performed """ - log.info("Verifying agent reported update status for version {0}".format(version)) - self._ssh_client.run_command(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + log.info("Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format(version)) + self._run_remote_test(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) log.info("Successfully Agent reported update status for version {0}".format(version)) diff --git a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py index 8e8d50a482..5da40cb423 100755 --- a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py +++ b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py @@ -21,9 +21,11 @@ import argparse import glob import json -import logging -import sys +from assertpy import fail + +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test from tests_e2e.tests.lib.retry import retry_if_false @@ -33,27 +35,28 @@ def check_agent_reported_update_status(expected_version: str) -> bool: for file in file_paths: with open(file, 'r') as f: data = json.load(f) - logging.info("Agent status file is %s and it's content %s", file, data) + log.info("Agent status file is %s and it's content %s", file, data) status = data["__status__"] guest_agent_status = status["aggregateStatus"]["guestAgentStatus"] if "updateStatus" in guest_agent_status.keys(): if guest_agent_status["updateStatus"]["expectedVersion"] == expected_version: + log.info("we found the expected version %s in agent status file", expected_version) return True + log.info("we did not find the expected version %s in agent status file", expected_version) return False -try: +def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', required=True) args = parser.parse_args() + log.info("checking agent status file to verify if agent reported update status") found: bool = retry_if_false(lambda: check_agent_reported_update_status(args.version)) if not found: - raise Exception("Agent failed to report update status, so skipping rest of the agent update validations") + fail("Agent failed to report update status, so skipping rest of the agent update validations") + -except Exception as e: - print(f"{e}", file=sys.stderr) - sys.exit(1) +run_remote_test(main) -sys.exit(0) diff --git a/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py index b6c1307e2d..8627111a3a 100755 --- a/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py +++ b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py @@ -20,9 +20,9 @@ # import glob import json -import logging -import sys +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test from tests_e2e.tests.lib.retry import retry_if_false @@ -32,7 +32,7 @@ def check_agent_supports_versioning() -> bool: for file in file_paths: with open(file, 'r') as f: data = json.load(f) - logging.info("Agent status file is %s and it's content %s", file, data) + log.info("Agent status file is %s and it's content %s", file, data) status = data["__status__"] supported_features = status["supportedFeatures"] for supported_feature in supported_features: @@ -41,13 +41,15 @@ def check_agent_supports_versioning() -> bool: return False -try: +def main(): + log.info("checking agent status file for VersioningGovernance supported feature flag") found: bool = retry_if_false(check_agent_supports_versioning) if not found: - raise Exception("Agent failed to report supported feature flag, so skipping agent update validations") + raise Exception("Agent failed to report supported feature flag. So, skipping agent update validations " + "since CRP will not send RSM requested version in GS if feature flag not found in status") + + +run_remote_test(main) + -except Exception as e: - print(f"{e}", file=sys.stderr) - sys.exit(1) -sys.exit(0) diff --git a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py index 5905a5fe1d..016bcd8c62 100755 --- a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py +++ b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py @@ -19,13 +19,13 @@ # Verify the latest goal state included rsm requested version and if not, retry # import argparse -import sys -import logging from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties from azurelinuxagent.common.protocol.wire import WireProtocol -from tests_e2e.tests.lib.retry import retry_if_false +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test +from tests_e2e.tests.lib.retry import retry_if_false, retry def get_requested_version(gs: GoalState) -> str: @@ -41,6 +41,7 @@ def get_requested_version(gs: GoalState) -> str: def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: str) -> bool: + log.info("fetching the goal state to check if it includes rsm requested version") wire_protocol.client.update_goal_state() goal_state = wire_protocol.client.get_goal_state() requested_version = get_requested_version(goal_state) @@ -50,25 +51,21 @@ def verify_rsm_requested_version(wire_protocol: WireProtocol, expected_version: return False -try: +def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', required=True) args = parser.parse_args() protocol = get_protocol_util().get_protocol(init_goal_state=False) - protocol.client.reset_goal_state( - goal_state_properties=GoalStateProperties.ExtensionsGoalState) + retry(lambda: protocol.client.reset_goal_state( + goal_state_properties=GoalStateProperties.ExtensionsGoalState)) found: bool = retry_if_false(lambda: verify_rsm_requested_version(protocol, args.version)) if not found: - raise Exception("Latest GS does not include rsm requested version : {0}.".format(args.version)) + raise Exception("The latest goal state didn't contain requested version after we submit the rsm request for: {0}.".format(args.version)) else: - logging.info("Latest GS includes rsm requested version : %s", args.version) + log.info("Successfully verified that latest GS contains rsm requested version : %s", args.version) -except Exception as e: - print(f"{e}", file=sys.stderr) - sys.exit(1) - -sys.exit(0) +run_remote_test(main) From 3459215d5329ead10a723b072600fd61011361e7 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 22 Aug 2023 14:14:21 -0700 Subject: [PATCH 063/240] Cleanup common directory (#2902) Co-authored-by: narrieta --- azurelinuxagent/agent.py | 6 +- azurelinuxagent/daemon/main.py | 2 +- azurelinuxagent/{common => ga}/cgroup.py | 0 azurelinuxagent/{common => ga}/cgroupapi.py | 6 +- .../{common => ga}/cgroupconfigurator.py | 8 +-- .../{common => ga}/cgroupstelemetry.py | 2 +- azurelinuxagent/ga/collect_logs.py | 10 +-- .../ga/collect_telemetry_events.py | 2 +- azurelinuxagent/ga/env.py | 2 +- .../utils => ga}/extensionprocessutil.py | 0 azurelinuxagent/ga/exthandlers.py | 2 +- azurelinuxagent/{common => ga}/interfaces.py | 0 .../{common => ga}/logcollector.py | 4 +- .../{common => ga}/logcollector_manifests.py | 0 azurelinuxagent/ga/monitor.py | 8 +-- .../{common => ga}/persist_firewall_rules.py | 0 azurelinuxagent/ga/send_telemetry_events.py | 2 +- azurelinuxagent/ga/update.py | 4 +- azurelinuxagent/pa/deprovision/default.py | 4 +- azurelinuxagent/pa/rdma/centos.py | 2 +- azurelinuxagent/pa/rdma/factory.py | 2 +- azurelinuxagent/{common => pa/rdma}/rdma.py | 0 azurelinuxagent/pa/rdma/suse.py | 2 +- azurelinuxagent/pa/rdma/ubuntu.py | 2 +- tests/common/osutil/test_default.py | 4 +- .../utils/test_extension_process_util.py | 24 +++---- tests/{common => ga}/test_cgroupapi.py | 14 ++-- .../{common => ga}/test_cgroupconfigurator.py | 34 ++++----- tests/{common => ga}/test_cgroups.py | 2 +- tests/{common => ga}/test_cgroupstelemetry.py | 72 +++++++++---------- tests/ga/test_collect_logs.py | 6 +- tests/ga/test_extension.py | 16 ++--- tests/ga/test_exthandlers.py | 6 +- tests/{common => ga}/test_logcollector.py | 54 +++++++------- tests/ga/test_monitor.py | 12 ++-- tests/ga/test_multi_config_extension.py | 6 +- .../test_persist_firewall_rules.py | 8 +-- tests/ga/test_update.py | 8 +-- tests/lib/mock_cgroup_environment.py | 2 +- tests/test_agent.py | 5 +- 40 files changed, 172 insertions(+), 171 deletions(-) rename azurelinuxagent/{common => ga}/cgroup.py (100%) rename azurelinuxagent/{common => ga}/cgroupapi.py (98%) rename azurelinuxagent/{common => ga}/cgroupconfigurator.py (99%) rename azurelinuxagent/{common => ga}/cgroupstelemetry.py (98%) rename azurelinuxagent/{common/utils => ga}/extensionprocessutil.py (100%) rename azurelinuxagent/{common => ga}/interfaces.py (100%) rename azurelinuxagent/{common => ga}/logcollector.py (99%) rename azurelinuxagent/{common => ga}/logcollector_manifests.py (100%) rename azurelinuxagent/{common => ga}/persist_firewall_rules.py (100%) rename azurelinuxagent/{common => pa/rdma}/rdma.py (100%) rename tests/{common => ga}/test_cgroupapi.py (94%) rename tests/{common => ga}/test_cgroupconfigurator.py (97%) rename tests/{common => ga}/test_cgroups.py (98%) rename tests/{common => ga}/test_cgroupstelemetry.py (85%) rename tests/{common => ga}/test_logcollector.py (89%) rename tests/{common => ga}/test_persist_firewall_rules.py (98%) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index af63c068b5..f565f2975f 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -28,14 +28,14 @@ import subprocess import sys import threading -from azurelinuxagent.common import cgroupconfigurator, logcollector -from azurelinuxagent.common.cgroupapi import SystemdCgroupsApi +from azurelinuxagent.ga import logcollector, cgroupconfigurator +from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi import azurelinuxagent.common.conf as conf import azurelinuxagent.common.event as event import azurelinuxagent.common.logger as logger from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH +from azurelinuxagent.ga.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion diff --git a/azurelinuxagent/daemon/main.py b/azurelinuxagent/daemon/main.py index d051af202b..342daf4ac9 100644 --- a/azurelinuxagent/daemon/main.py +++ b/azurelinuxagent/daemon/main.py @@ -30,7 +30,7 @@ from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.protocol.goal_state import GoalState, GoalStateProperties from azurelinuxagent.common.protocol.util import get_protocol_util -from azurelinuxagent.common.rdma import setup_rdma_device +from azurelinuxagent.pa.rdma.rdma import setup_rdma_device from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_NAME, \ AGENT_VERSION, \ diff --git a/azurelinuxagent/common/cgroup.py b/azurelinuxagent/ga/cgroup.py similarity index 100% rename from azurelinuxagent/common/cgroup.py rename to azurelinuxagent/ga/cgroup.py diff --git a/azurelinuxagent/common/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py similarity index 98% rename from azurelinuxagent/common/cgroupapi.py rename to azurelinuxagent/ga/cgroupapi.py index ca0ef3bb5b..040333bdde 100644 --- a/azurelinuxagent/common/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -23,15 +23,15 @@ import uuid from azurelinuxagent.common import logger -from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.conf import get_agent_pid_file_path from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \ ExtensionOperationError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil, shellutil -from azurelinuxagent.common.utils.extensionprocessutil import handle_process_completion, read_output, \ +from azurelinuxagent.ga.extensionprocessutil import handle_process_completion, read_output, \ TELEMETRY_MESSAGE_MAX_LEN from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_distro diff --git a/azurelinuxagent/common/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py similarity index 99% rename from azurelinuxagent/common/cgroupconfigurator.py rename to azurelinuxagent/ga/cgroupconfigurator.py index 767786f014..e52fc15d0d 100644 --- a/azurelinuxagent/common/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -23,15 +23,15 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger -from azurelinuxagent.common.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup -from azurelinuxagent.common.cgroupapi import CGroupsApi, SystemdCgroupsApi, SystemdRunError, EXTENSION_SLICE_PREFIX -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup +from azurelinuxagent.ga.cgroupapi import CGroupsApi, SystemdCgroupsApi, SystemdRunError, EXTENSION_SLICE_PREFIX +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd from azurelinuxagent.common.version import get_distro from azurelinuxagent.common.utils import shellutil, fileutil -from azurelinuxagent.common.utils.extensionprocessutil import handle_process_completion +from azurelinuxagent.ga.extensionprocessutil import handle_process_completion from azurelinuxagent.common.event import add_event, WALAEventOperation AZURE_SLICE = "azure.slice" diff --git a/azurelinuxagent/common/cgroupstelemetry.py b/azurelinuxagent/ga/cgroupstelemetry.py similarity index 98% rename from azurelinuxagent/common/cgroupstelemetry.py rename to azurelinuxagent/ga/cgroupstelemetry.py index 7b6bba0aa7..5943b45ade 100644 --- a/azurelinuxagent/common/cgroupstelemetry.py +++ b/azurelinuxagent/ga/cgroupstelemetry.py @@ -17,7 +17,7 @@ import threading from azurelinuxagent.common import logger -from azurelinuxagent.common.cgroup import CpuCgroup +from azurelinuxagent.ga.cgroup import CpuCgroup from azurelinuxagent.common.future import ustr diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index 95c42f3a76..244d45b1e4 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -21,16 +21,16 @@ import sys import threading import time -from azurelinuxagent.common import cgroupconfigurator, logcollector +from azurelinuxagent.ga import logcollector, cgroupconfigurator import azurelinuxagent.common.conf as conf from azurelinuxagent.common import logger -from azurelinuxagent.common.cgroup import MetricsCounter +from azurelinuxagent.ga.cgroup import MetricsCounter from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.interfaces import ThreadHandlerInterface -from azurelinuxagent.common.logcollector import COMPRESSED_ARCHIVE_PATH, GRACEFUL_KILL_ERRCODE -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator, LOGCOLLECTOR_MEMORY_LIMIT +from azurelinuxagent.ga.interfaces import ThreadHandlerInterface +from azurelinuxagent.ga.logcollector import COMPRESSED_ARCHIVE_PATH, GRACEFUL_KILL_ERRCODE +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, LOGCOLLECTOR_MEMORY_LIMIT from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.shellutil import CommandError diff --git a/azurelinuxagent/ga/collect_telemetry_events.py b/azurelinuxagent/ga/collect_telemetry_events.py index 01049ee875..e0144a6399 100644 --- a/azurelinuxagent/ga/collect_telemetry_events.py +++ b/azurelinuxagent/ga/collect_telemetry_events.py @@ -31,7 +31,7 @@ CollectOrReportEventDebugInfo, EVENT_FILE_REGEX, parse_event from azurelinuxagent.common.exception import InvalidExtensionEventError, ServiceStoppedError from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.interfaces import ThreadHandlerInterface +from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam, \ GuestAgentGenericLogsSchema, GuestAgentExtensionEventsSchema from azurelinuxagent.common.utils import textutil diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 5e17059345..76d9ee73f1 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -28,7 +28,7 @@ from azurelinuxagent.common.dhcp import get_dhcp_handler from azurelinuxagent.common.event import add_periodic, WALAEventOperation, add_event from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.interfaces import ThreadHandlerInterface +from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION diff --git a/azurelinuxagent/common/utils/extensionprocessutil.py b/azurelinuxagent/ga/extensionprocessutil.py similarity index 100% rename from azurelinuxagent/common/utils/extensionprocessutil.py rename to azurelinuxagent/ga/extensionprocessutil.py diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 35fa44c076..3575c3e6d2 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -38,7 +38,7 @@ from azurelinuxagent.common import version from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ SupportedFeatureNames, get_supported_feature_by_name, get_agent_supported_features_list_for_crp -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.datacontract import get_properties, set_properties from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.event import add_event, elapsed_milliseconds, WALAEventOperation, \ diff --git a/azurelinuxagent/common/interfaces.py b/azurelinuxagent/ga/interfaces.py similarity index 100% rename from azurelinuxagent/common/interfaces.py rename to azurelinuxagent/ga/interfaces.py diff --git a/azurelinuxagent/common/logcollector.py b/azurelinuxagent/ga/logcollector.py similarity index 99% rename from azurelinuxagent/common/logcollector.py rename to azurelinuxagent/ga/logcollector.py index fe62a7db6a..f2947e9373 100644 --- a/azurelinuxagent/common/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -26,11 +26,11 @@ from datetime import datetime from heapq import heappush, heappop -from azurelinuxagent.common.cgroup import CpuCgroup, AGENT_LOG_COLLECTOR, MemoryCgroup +from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_LOG_COLLECTOR, MemoryCgroup from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.logcollector_manifests import MANIFEST_NORMAL, MANIFEST_FULL +from azurelinuxagent.ga.logcollector_manifests import MANIFEST_NORMAL, MANIFEST_FULL # Please note: be careful when adding agent dependencies in this module. # This module uses its own logger and logs to its own file, not to the agent log. diff --git a/azurelinuxagent/common/logcollector_manifests.py b/azurelinuxagent/ga/logcollector_manifests.py similarity index 100% rename from azurelinuxagent/common/logcollector_manifests.py rename to azurelinuxagent/ga/logcollector_manifests.py diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index e2744bc434..1c123d70e3 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -22,13 +22,13 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.networkutil as networkutil -from azurelinuxagent.common.cgroup import MetricValue, MetricsCategory, MetricsCounter -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.event import add_event, WALAEventOperation, report_metric from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.interfaces import ThreadHandlerInterface +from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.protocol.healthservice import HealthService from azurelinuxagent.common.protocol.imds import get_imds_client diff --git a/azurelinuxagent/common/persist_firewall_rules.py b/azurelinuxagent/ga/persist_firewall_rules.py similarity index 100% rename from azurelinuxagent/common/persist_firewall_rules.py rename to azurelinuxagent/ga/persist_firewall_rules.py diff --git a/azurelinuxagent/ga/send_telemetry_events.py b/azurelinuxagent/ga/send_telemetry_events.py index c2e277769a..2923a43b13 100644 --- a/azurelinuxagent/ga/send_telemetry_events.py +++ b/azurelinuxagent/ga/send_telemetry_events.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import ServiceStoppedError from azurelinuxagent.common.future import ustr, Queue, Empty -from azurelinuxagent.common.interfaces import ThreadHandlerInterface +from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.common.utils import textutil diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 6f666156f4..4eeb408c25 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -35,13 +35,13 @@ from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames, \ get_agent_supported_features_list_for_crp -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.event import add_event, initialize_event_logger_vminfo_common_parameters, \ WALAEventOperation, EVENTS_DIRECTORY from azurelinuxagent.common.exception import ExitException, AgentUpgradeExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil, systemd -from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler +from azurelinuxagent.ga.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import VERSION_0 from azurelinuxagent.common.protocol.util import get_protocol_util diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index 89492b75e2..286858f5a1 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -26,11 +26,11 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.utils.fileutil as fileutil from azurelinuxagent.common import version -from azurelinuxagent.common.cgroupconfigurator import _AGENT_DROP_IN_FILE_SLICE, _DROP_IN_FILE_CPU_ACCOUNTING, \ +from azurelinuxagent.ga.cgroupconfigurator import _AGENT_DROP_IN_FILE_SLICE, _DROP_IN_FILE_CPU_ACCOUNTING, \ _DROP_IN_FILE_CPU_QUOTA, _DROP_IN_FILE_MEMORY_ACCOUNTING, LOGCOLLECTOR_SLICE from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.osutil import get_osutil, systemd -from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler +from azurelinuxagent.ga.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.ga.exthandlers import HANDLER_COMPLETE_NAME_PATTERN diff --git a/azurelinuxagent/pa/rdma/centos.py b/azurelinuxagent/pa/rdma/centos.py index 87e2eff74c..5e82acf531 100644 --- a/azurelinuxagent/pa/rdma/centos.py +++ b/azurelinuxagent/pa/rdma/centos.py @@ -23,7 +23,7 @@ import time import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.shellutil as shellutil -from azurelinuxagent.common.rdma import RDMAHandler +from azurelinuxagent.pa.rdma.rdma import RDMAHandler class CentOSRDMAHandler(RDMAHandler): diff --git a/azurelinuxagent/pa/rdma/factory.py b/azurelinuxagent/pa/rdma/factory.py index c114dc3803..ec4a8bc48b 100644 --- a/azurelinuxagent/pa/rdma/factory.py +++ b/azurelinuxagent/pa/rdma/factory.py @@ -18,7 +18,7 @@ from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error import azurelinuxagent.common.logger as logger -from azurelinuxagent.common.rdma import RDMAHandler +from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_FULL_NAME, DISTRO_VERSION from .centos import CentOSRDMAHandler from .suse import SUSERDMAHandler diff --git a/azurelinuxagent/common/rdma.py b/azurelinuxagent/pa/rdma/rdma.py similarity index 100% rename from azurelinuxagent/common/rdma.py rename to azurelinuxagent/pa/rdma/rdma.py diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py index 66e8b3720a..bcf971482e 100644 --- a/azurelinuxagent/pa/rdma/suse.py +++ b/azurelinuxagent/pa/rdma/suse.py @@ -21,7 +21,7 @@ import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.shellutil as shellutil -from azurelinuxagent.common.rdma import RDMAHandler +from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_VERSION from distutils.version import LooseVersion as Version diff --git a/azurelinuxagent/pa/rdma/ubuntu.py b/azurelinuxagent/pa/rdma/ubuntu.py index a56a4be4e0..bef152f2e4 100644 --- a/azurelinuxagent/pa/rdma/ubuntu.py +++ b/azurelinuxagent/pa/rdma/ubuntu.py @@ -24,7 +24,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.shellutil as shellutil -from azurelinuxagent.common.rdma import RDMAHandler +from azurelinuxagent.pa.rdma.rdma import RDMAHandler class UbuntuRDMAHandler(RDMAHandler): diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 42a7050522..794cd449ea 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -687,7 +687,7 @@ def mock_popen(command, *args, **kwargs): return mock_popen.original(command, *args, **kwargs) mock_popen.original = subprocess.Popen - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen) as popen_patcher: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", side_effect=mock_popen) as popen_patcher: with patch('os.getuid', return_value=uid): popen_patcher.wait = wait popen_patcher.destination = destination @@ -910,7 +910,7 @@ def mock_popen(command, *args, **kwargs): return mock_popen.original(command, *args, **kwargs) mock_popen.original = subprocess.Popen - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", side_effect=mock_popen): success = osutil.DefaultOSUtil().remove_firewall(mock_iptables.destination, mock_iptables.uid, mock_iptables.wait) delete_conntrack_accept_command = TestOSUtil._command_to_string(osutil.get_firewall_delete_conntrack_accept_command(mock_iptables.wait, mock_iptables.destination)) diff --git a/tests/common/utils/test_extension_process_util.py b/tests/common/utils/test_extension_process_util.py index 7f2d4aadf6..316bad6a37 100644 --- a/tests/common/utils/test_extension_process_util.py +++ b/tests/common/utils/test_extension_process_util.py @@ -19,10 +19,10 @@ import subprocess import tempfile -from azurelinuxagent.common.cgroup import CpuCgroup +from azurelinuxagent.ga.cgroup import CpuCgroup from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.utils.extensionprocessutil import format_stdout_stderr, read_output, \ +from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \ wait_for_process_completion_or_timeout, handle_process_completion from tests.lib.tools import AgentTestCase, patch, data_dir @@ -68,7 +68,7 @@ def test_wait_for_process_completion_or_timeout_should_kill_process_on_timeout(s preexec_fn=os.setsid) # We don't actually mock the kill, just wrap it so we can assert its call count - with patch('azurelinuxagent.common.utils.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill: + with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill: with patch('time.sleep') as mock_sleep: timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, cpu_cgroup=None) @@ -211,20 +211,20 @@ def test_handle_process_completion_should_raise_on_nonzero_exit_code(self): self.assertIn("Non-zero exit code:", ustr(context_manager.exception)) def test_read_output_should_return_no_content(self): - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 0): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 0): expected = "" actual = read_output(self.stdout, self.stderr) self.assertEqual(expected, actual) def test_read_output_should_truncate_the_content(self): - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 50): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 50): expected = "[stdout]\nr the lazy dog.\n\n" \ "[stderr]\ns jump quickly." actual = read_output(self.stdout, self.stderr) self.assertEqual(expected, actual) def test_read_output_should_not_truncate_the_content(self): - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 90): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 90): expected = "[stdout]\nThe quick brown fox jumps over the lazy dog.\n\n" \ "[stderr]\nThe five boxing wizards jump quickly." actual = read_output(self.stdout, self.stderr) @@ -240,7 +240,7 @@ def test_format_stdout_stderr00(self): stderr = "The five boxing wizards jump quickly." expected = "[stdout]\n{0}\n\n[stderr]\n{1}".format(stdout, stderr) - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 1000): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 1000): actual = format_stdout_stderr(stdout, stderr) self.assertEqual(expected, actual) @@ -254,7 +254,7 @@ def test_format_stdout_stderr01(self): # noinspection SpellCheckingInspection expected = '[stdout]\ns over the lazy dog.\n\n[stderr]\nizards jump quickly.' - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 60): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 60): actual = format_stdout_stderr(stdout, stderr) self.assertEqual(expected, actual) self.assertEqual(60, len(actual)) @@ -268,7 +268,7 @@ def test_format_stdout_stderr02(self): stderr = "The five boxing wizards jump quickly." expected = '[stdout]\nempty\n\n[stderr]\ns jump quickly.' - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 40): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 40): actual = format_stdout_stderr(stdout, stderr) self.assertEqual(expected, actual) self.assertEqual(40, len(actual)) @@ -282,7 +282,7 @@ def test_format_stdout_stderr03(self): stderr = "empty" expected = '[stdout]\nr the lazy dog.\n\n[stderr]\nempty' - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 40): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 40): actual = format_stdout_stderr(stdout, stderr) self.assertEqual(expected, actual) self.assertEqual(40, len(actual)) @@ -296,7 +296,7 @@ def test_format_stdout_stderr04(self): stderr = "The five boxing wizards jump quickly." expected = '' - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 4): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 4): actual = format_stdout_stderr(stdout, stderr) self.assertEqual(expected, actual) self.assertEqual(0, len(actual)) @@ -307,6 +307,6 @@ def test_format_stdout_stderr05(self): """ expected = '[stdout]\n\n\n[stderr]\n' - with patch('azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 1000): + with patch('azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN', 1000): actual = format_stdout_stderr('', '') self.assertEqual(expected, actual) diff --git a/tests/common/test_cgroupapi.py b/tests/ga/test_cgroupapi.py similarity index 94% rename from tests/common/test_cgroupapi.py rename to tests/ga/test_cgroupapi.py index a7b47b44f6..6b15af1ebf 100644 --- a/tests/common/test_cgroupapi.py +++ b/tests/ga/test_cgroupapi.py @@ -22,8 +22,8 @@ import subprocess import tempfile -from azurelinuxagent.common.cgroupapi import CGroupsApi, SystemdCgroupsApi -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroupapi import CGroupsApi, SystemdCgroupsApi +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil from tests.lib.mock_cgroup_environment import mock_cgroup_environment @@ -39,7 +39,7 @@ def setUp(self): os.mkdir(os.path.join(self.cgroups_file_system_root, "cpu")) os.mkdir(os.path.join(self.cgroups_file_system_root, "memory")) - self.mock_cgroups_file_system_root = patch("azurelinuxagent.common.cgroupapi.CGROUPS_FILE_SYSTEM_ROOT", self.cgroups_file_system_root) + self.mock_cgroups_file_system_root = patch("azurelinuxagent.ga.cgroupapi.CGROUPS_FILE_SYSTEM_ROOT", self.cgroups_file_system_root) self.mock_cgroups_file_system_root.start() def tearDown(self): @@ -73,7 +73,7 @@ def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_an ] for (distro, supported) in test_cases: - with patch("azurelinuxagent.common.cgroupapi.get_distro", return_value=distro): + with patch("azurelinuxagent.ga.cgroupapi.get_distro", return_value=distro): self.assertEqual(CGroupsApi.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro)) @@ -150,7 +150,7 @@ def mock_popen(command, *args, **kwargs): with mock_cgroup_environment(self.tmp_dir): with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as output_file: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen) as popen_patch: # pylint: disable=unused-variable + with patch("subprocess.Popen", side_effect=mock_popen) as popen_patch: # pylint: disable=unused-variable command_output = SystemdCgroupsApi().start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="A_TEST_COMMAND", @@ -191,7 +191,7 @@ def test_start_extension_command_should_execute_the_command_in_a_cgroup(self, _) @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_use_systemd_to_execute_the_command(self, _): with mock_cgroup_environment(self.tmp_dir): - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with patch("subprocess.Popen", wraps=subprocess.Popen) as popen_patch: SystemdCgroupsApi().start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="the-test-extension-command", @@ -219,7 +219,7 @@ def test_cleanup_legacy_cgroups_should_remove_legacy_cgroups(self): legacy_cpu_cgroup = CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "cpu", '') legacy_memory_cgroup = CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", '') - with patch("azurelinuxagent.common.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file): + with patch("azurelinuxagent.ga.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file): legacy_cgroups = SystemdCgroupsApi().cleanup_legacy_cgroups() self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups") diff --git a/tests/common/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py similarity index 97% rename from tests/common/test_cgroupconfigurator.py rename to tests/ga/test_cgroupconfigurator.py index ebf7bac0e0..b5a9e09941 100644 --- a/tests/common/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -29,9 +29,9 @@ from nose.plugins.attrib import attr from azurelinuxagent.common import conf -from azurelinuxagent.common.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator, DisableCgroups -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.exception import CGroupsException, ExtensionError, ExtensionErrorCodes, \ AgentMemoryExceededException @@ -361,7 +361,7 @@ def test_start_extension_command_should_not_use_systemd_when_cgroups_are_not_ena with self._get_cgroup_configurator() as configurator: configurator.disable("UNIT TEST", DisableCgroups.ALL) - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as patcher: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as patcher: configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="date", @@ -381,7 +381,7 @@ def test_start_extension_command_should_not_use_systemd_when_cgroups_are_not_ena @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_use_systemd_run_when_cgroups_are_enabled(self, _): with self._get_cgroup_configurator() as configurator: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="the-test-extension-command", @@ -432,7 +432,7 @@ def mock_popen(command_arg, *args, **kwargs): raise Exception("A TEST EXCEPTION") return original_popen(command_arg, *args, **kwargs) - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", side_effect=mock_popen): with self.assertRaises(Exception) as context_manager: configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", @@ -454,7 +454,7 @@ def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_d configurator.mocks.add_command(MockCommand("systemd-run", return_value=1, stdout='', stderr='Failed to start transient scope unit: syntax error')) with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as output_file: - with patch("azurelinuxagent.common.cgroupconfigurator.add_event") as mock_add_event: + with patch("azurelinuxagent.ga.cgroupconfigurator.add_event") as mock_add_event: with patch("subprocess.Popen", wraps=subprocess.Popen) as popen_patch: CGroupsTelemetry.reset() @@ -539,7 +539,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_fai with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: with self.assertRaises(ExtensionError) as context_manager: configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", @@ -567,7 +567,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_fai @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") @attr('requires_sudo') @patch('time.sleep', side_effect=lambda _: mock_sleep()) - @patch("azurelinuxagent.common.utils.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN", 5) + @patch("azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN", 5) def test_start_extension_command_should_not_use_fallback_option_if_extension_fails_with_long_output(self, *args): self.assertTrue(i_am_root(), "Test does not run when non-root") @@ -579,7 +579,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_fai with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: with self.assertRaises(ExtensionError) as context_manager: configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", @@ -613,9 +613,9 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_tim with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.common.utils.extensionprocessutil.wait_for_process_completion_or_timeout", + with patch("azurelinuxagent.ga.extensionprocessutil.wait_for_process_completion_or_timeout", return_value=[True, None, 0]): - with patch("azurelinuxagent.common.cgroupapi.SystemdCgroupsApi._is_systemd_failure", + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupsApi._is_systemd_failure", return_value=False): with self.assertRaises(ExtensionError) as context_manager: configurator.start_extension_command( @@ -654,7 +654,7 @@ def mock_popen(command, *args, **kwargs): with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", side_effect=mock_popen): # We expect this call to fail because of the syntax error process_output = configurator.start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", @@ -896,7 +896,7 @@ def mock_popen(command, *args, **kwargs): return process with patch('time.sleep', side_effect=lambda _: original_sleep(0.1)): # start_extension_command has a small delay; skip it - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", side_effect=mock_popen): + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", side_effect=mock_popen): with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: configurator.start_extension_command( @@ -943,7 +943,7 @@ def get_completed_process(): agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid] other_processes = [1, get_completed_process()] + extension_processes - with patch("azurelinuxagent.common.cgroupconfigurator.CGroupsApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): + with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupsApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): with self.assertRaises(CGroupsException) as context_manager: configurator._check_processes_in_agent_cgroup() @@ -987,7 +987,7 @@ def test_check_cgroups_should_disable_cgroups_when_a_check_fails(self): patchers.append(p) p.start() - with patch("azurelinuxagent.common.cgroupconfigurator.add_event") as add_event: + with patch("azurelinuxagent.ga.cgroupconfigurator.add_event") as add_event: configurator.enable() tracked_metrics = [ @@ -1017,7 +1017,7 @@ def test_check_agent_memory_usage_should_raise_a_cgroups_exception_when_the_limi with self.assertRaises(AgentMemoryExceededException) as context_manager: with self._get_cgroup_configurator() as configurator: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics: tracked_metrics.return_value = metrics configurator.check_agent_memory_usage() diff --git a/tests/common/test_cgroups.py b/tests/ga/test_cgroups.py similarity index 98% rename from tests/common/test_cgroups.py rename to tests/ga/test_cgroups.py index 58e179d82a..0ffcfed1bd 100644 --- a/tests/common/test_cgroups.py +++ b/tests/ga/test_cgroups.py @@ -22,7 +22,7 @@ import random import shutil -from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound +from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil diff --git a/tests/common/test_cgroupstelemetry.py b/tests/ga/test_cgroupstelemetry.py similarity index 85% rename from tests/common/test_cgroupstelemetry.py rename to tests/ga/test_cgroupstelemetry.py index e9b163437b..26fcecbf65 100644 --- a/tests/common/test_cgroupstelemetry.py +++ b/tests/ga/test_cgroupstelemetry.py @@ -19,8 +19,8 @@ import random import time -from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.utils import fileutil from tests.lib.tools import AgentTestCase, data_dir, patch @@ -136,12 +136,12 @@ def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable= self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 @@ -163,10 +163,10 @@ def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable= self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.common.cgroup.CGroup.is_active", return_value=False) + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.CGroup.is_active", return_value=False) def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 # pylint: disable=unused-variable @@ -182,10 +182,10 @@ def test_telemetry_polling_with_inactive_cgroups(self, *_): self.assertEqual(len(metrics), 0) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.common.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 @@ -274,11 +274,11 @@ def test_telemetry_polling_to_generate_transient_logs_index_error(self): CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage") - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.common.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage, *args): # pylint: disable=unused-argument num_polls = 10 @@ -321,13 +321,13 @@ def test_cgroup_is_tracked(self, *args): # pylint: disable=unused-argument self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path")) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 @@ -341,16 +341,16 @@ def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # py self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0) - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = True current_memory = 209715200 @@ -367,14 +367,14 @@ def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylin self.assertEqual(len(metrics), num_extensions * 3) self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: patch_is_active.return_value = False poll_count = 1 @@ -383,9 +383,9 @@ def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # py metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics)) - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_throttled_time") - @patch("azurelinuxagent.common.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_throttled_time") + @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage): num_polls = 5 diff --git a/tests/ga/test_collect_logs.py b/tests/ga/test_collect_logs.py index fa0add0153..4ac3f03fb4 100644 --- a/tests/ga/test_collect_logs.py +++ b/tests/ga/test_collect_logs.py @@ -18,8 +18,8 @@ import os from azurelinuxagent.common import logger, conf -from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricValue -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.logger import Logger from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.utils import fileutil @@ -225,7 +225,7 @@ def test_send_extension_metrics_telemetry(self, patch_poll_resource_usage, patch @patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage") def test_verify_log_collector_memory_limit_exceeded(self, patch_poll_resource_usage, mock_exit): with _create_log_collector_monitor_handler() as log_collector_monitor_handler: - with patch("azurelinuxagent.common.cgroupconfigurator.LOGCOLLECTOR_MEMORY_LIMIT", 8): + with patch("azurelinuxagent.ga.cgroupconfigurator.LOGCOLLECTOR_MEMORY_LIMIT", 8): patch_poll_resource_usage.return_value = [MetricValue("Process", "% Processor Time", "service", 1), MetricValue("Process", "Throttled Time", "service", 1), MetricValue("Memory", "Total Memory Usage", "service", 9), diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index ff7f170060..e3e365d9b6 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -30,7 +30,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ get_agent_supported_features_list_for_crp -from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.datacontract import get_properties from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.utils import fileutil @@ -198,7 +198,7 @@ def mock_fail_popen(*args, **kwargs): # pylint: disable=unused-argument return original_popen("fail_this_command", **kwargs) with self._setup_test_env(wire_protocol_data.DATA_FILE_EXT_SINGLE) as (exthandlers_handler, protocol, no_of_exts): - with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen", mock_fail_popen): + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", mock_fail_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1331,7 +1331,7 @@ def mock_popen(cmd, *args, **kwargs): os.remove(status_path) return original_popen(["echo", "Yes"], *args, **kwargs) - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): with patch('azurelinuxagent.ga.exthandlers._DEFAULT_EXT_TIMEOUT_MINUTES', 0.01): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1374,7 +1374,7 @@ def mock_popen(cmd, *_, **kwargs): exthandlers_handler, protocol = self._create_mock(wire_protocol_data.WireProtocolData(aks_test_mock), mock_http_get, mock_crypt_util, *args) - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1422,7 +1422,7 @@ def mock_popen(cmd, *args, **kwargs): return original_popen(["echo", "Yes"], *args, **kwargs) - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1473,7 +1473,7 @@ def mock_popen(cmd, *args, **kwargs): return original_popen(["/fail/this/command"], *args, **kwargs) return original_popen(cmd, *args, **kwargs) - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1835,7 +1835,7 @@ def mock_popen(*args, **kwargs): extension_calls.append(args[0]) return original_popen(*args, **kwargs) - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -2160,7 +2160,7 @@ def test_ext_path_and_version_env_variables_set_for_ever_operation(self, *args): self._assert_handler_status(protocol.report_vm_status, "Ready", expected_ext_count=1, version="1.0.0") - @patch("azurelinuxagent.common.cgroupconfigurator.handle_process_completion", side_effect="Process Successful") + @patch("azurelinuxagent.ga.cgroupconfigurator.handle_process_completion", side_effect="Process Successful") def test_ext_sequence_no_should_be_set_for_every_command_call(self, _, *args): test_data = wire_protocol_data.WireProtocolData(wire_protocol_data.DATA_FILE_MULTIPLE_EXT) exthandlers_handler, protocol = self._create_mock(test_data, *args) # pylint: disable=no-value-for-parameter diff --git a/tests/ga/test_exthandlers.py b/tests/ga/test_exthandlers.py index 10303ce7a1..2f03396599 100644 --- a/tests/ga/test_exthandlers.py +++ b/tests/ga/test_exthandlers.py @@ -28,7 +28,7 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.wire import WireProtocol from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.extensionprocessutil import TELEMETRY_MESSAGE_MAX_LEN, format_stdout_stderr, \ +from azurelinuxagent.ga.extensionprocessutil import TELEMETRY_MESSAGE_MAX_LEN, format_stdout_stderr, \ read_output from azurelinuxagent.ga.exthandlers import parse_ext_status, ExtHandlerInstance, ExtCommandEnvVariable, \ ExtensionStatusError, _DEFAULT_SEQ_NO, get_exthandlers_handler, ExtHandlerState @@ -653,7 +653,7 @@ def test_it_should_read_only_the_head_of_large_outputs(self): # Mocking the call to file.read() is difficult, so instead we mock the call to format_stdout_stderr, which takes the # return value of the calls to file.read(). The intention of the test is to verify we never read (and load in memory) # more than a few KB of data from the files used to capture stdout/stderr - with patch('azurelinuxagent.common.utils.extensionprocessutil.format_stdout_stderr', side_effect=format_stdout_stderr) as mock_format: + with patch('azurelinuxagent.ga.extensionprocessutil.format_stdout_stderr', side_effect=format_stdout_stderr) as mock_format: output = self.ext_handler_instance.launch_command(command) self.assertGreaterEqual(len(output), 1024) @@ -686,7 +686,7 @@ def test_it_should_handle_errors_while_reading_the_command_output(self): def capture_process_output(stdout_file, stderr_file): # pylint: disable=unused-argument return original_capture_process_output(None, None) - with patch('azurelinuxagent.common.utils.extensionprocessutil.read_output', side_effect=capture_process_output): + with patch('azurelinuxagent.ga.extensionprocessutil.read_output', side_effect=capture_process_output): output = self.ext_handler_instance.launch_command(command) self.assertIn("[stderr]\nCannot read stdout/stderr:", output) diff --git a/tests/common/test_logcollector.py b/tests/ga/test_logcollector.py similarity index 89% rename from tests/common/test_logcollector.py rename to tests/ga/test_logcollector.py index 1ef2474c34..0fefedea23 100644 --- a/tests/common/test_logcollector.py +++ b/tests/ga/test_logcollector.py @@ -22,7 +22,7 @@ import tempfile import zipfile -from azurelinuxagent.common.logcollector import LogCollector +from azurelinuxagent.ga.logcollector import LogCollector from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.fileutil import rm_dirs, mkdir, rm_files from tests.lib.tools import AgentTestCase, is_python_version_26, patch, skip_if_predicate_true, data_dir @@ -48,26 +48,26 @@ def setUpClass(cls): @classmethod def _mock_constants(cls): - cls.mock_manifest = patch("azurelinuxagent.common.logcollector.MANIFEST_NORMAL", cls._build_manifest()) + cls.mock_manifest = patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", cls._build_manifest()) cls.mock_manifest.start() cls.log_collector_dir = os.path.join(cls.tmp_dir, "logcollector") - cls.mock_log_collector_dir = patch("azurelinuxagent.common.logcollector._LOG_COLLECTOR_DIR", + cls.mock_log_collector_dir = patch("azurelinuxagent.ga.logcollector._LOG_COLLECTOR_DIR", cls.log_collector_dir) cls.mock_log_collector_dir.start() cls.truncated_files_dir = os.path.join(cls.tmp_dir, "truncated") - cls.mock_truncated_files_dir = patch("azurelinuxagent.common.logcollector._TRUNCATED_FILES_DIR", + cls.mock_truncated_files_dir = patch("azurelinuxagent.ga.logcollector._TRUNCATED_FILES_DIR", cls.truncated_files_dir) cls.mock_truncated_files_dir.start() cls.output_results_file_path = os.path.join(cls.log_collector_dir, "results.txt") - cls.mock_output_results_file_path = patch("azurelinuxagent.common.logcollector.OUTPUT_RESULTS_FILE_PATH", + cls.mock_output_results_file_path = patch("azurelinuxagent.ga.logcollector.OUTPUT_RESULTS_FILE_PATH", cls.output_results_file_path) cls.mock_output_results_file_path.start() cls.compressed_archive_path = os.path.join(cls.log_collector_dir, "logs.zip") - cls.mock_compressed_archive_path = patch("azurelinuxagent.common.logcollector.COMPRESSED_ARCHIVE_PATH", + cls.mock_compressed_archive_path = patch("azurelinuxagent.ga.logcollector.COMPRESSED_ARCHIVE_PATH", cls.compressed_archive_path) cls.mock_compressed_archive_path.start() @@ -210,8 +210,8 @@ def test_log_collector_parses_commands_in_manifest(self): copy,{1} diskinfo,""".format(folder_to_list, file_to_collect) - with patch("azurelinuxagent.common.logcollector.MANIFEST_NORMAL", manifest): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -239,8 +239,8 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): copy,{0} """.format(file_to_collect) - with patch("azurelinuxagent.common.logcollector.MANIFEST_FULL", manifest): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(is_full_mode=True, cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -254,7 +254,7 @@ def test_log_collector_should_collect_all_files(self): # All files in the manifest should be collected, since none of them are over the individual file size limit, # and combined they do not cross the archive size threshold. - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -275,8 +275,8 @@ def test_log_collector_should_collect_all_files(self): def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_files(self): # Set the size limit so that some files are too large to collect in full. - with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -308,9 +308,9 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self os.path.join(self.root_collect_dir, "less_important_file*") ] - with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): - with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): + with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -336,8 +336,8 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self # if there is enough space. rm_files(os.path.join(self.root_collect_dir, "waagent.log.3.gz")) - with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): - with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): + with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ @@ -361,7 +361,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_deleted(self): # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") first_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) @@ -429,10 +429,10 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): # Set the archive size limit so that not all files can be collected. In that case, files will be added to the # archive according to their priority. # Set the size limit so that only two files can be collected, of which one needs to be truncated. - with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): - with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): - with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): + with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") archive = log_collector.collect_logs_and_get_archive() @@ -451,10 +451,10 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): # removed both from the archive and from the filesystem. rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) - with patch("azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): - with patch("azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): - with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): - with patch('azurelinuxagent.common.logcollector.LogCollector._initialize_telemetry'): + with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): + with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): + with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): + with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") second_archive = log_collector.collect_logs_and_get_archive() diff --git a/tests/ga/test_monitor.py b/tests/ga/test_monitor.py index 9b11d81114..1dbec27c39 100644 --- a/tests/ga/test_monitor.py +++ b/tests/ga/test_monitor.py @@ -21,8 +21,8 @@ import string from azurelinuxagent.common import event, logger -from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import EVENTS_DIRECTORY from azurelinuxagent.common.protocol.healthservice import HealthService from azurelinuxagent.common.protocol.util import ProtocolUtil @@ -198,7 +198,7 @@ def tearDown(self): self.get_protocol.stop() @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") + @patch("azurelinuxagent.ga.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") def test_send_extension_metrics_telemetry(self, patch_poll_all_tracked, # pylint: disable=unused-argument patch_add_metric, *args): patch_poll_all_tracked.return_value = [MetricValue("Process", "% Processor Time", "service", 1), @@ -212,7 +212,7 @@ def test_send_extension_metrics_telemetry(self, patch_poll_all_tracked, # pylin self.assertEqual(4, patch_add_metric.call_count) # Four metrics being sent. @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") + @patch("azurelinuxagent.ga.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_poll_all_tracked, # pylint: disable=unused-argument patch_add_metric, *args): patch_poll_all_tracked.return_value = [] @@ -222,7 +222,7 @@ def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_poll_all_ self.assertEqual(0, patch_add_metric.call_count) @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") + @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument patch_get_memory_usage, @@ -238,7 +238,7 @@ def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errn self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument patch_cpu_usage, patch_add_metric, diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 66b366fd0a..0fe8dea5a3 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -761,7 +761,7 @@ def mock_popen(cmd, *_, **kwargs): self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, "ext_conf_multi_config_no_dependencies.xml") with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): # Case 1: Check normal scenario - Install/Enable mc_handlers, sc_handler = self.__run_and_assert_generic_case(exthandlers_handler, protocol, no_of_extensions) @@ -924,7 +924,7 @@ def mock_popen(cmd, *_, **kwargs): self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, "ext_conf_multi_config_no_dependencies.xml") with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() self.assertEqual(no_of_extensions, @@ -1209,7 +1209,7 @@ def mock_popen(cmd, *_, **kwargs): return original_popen(cmd, *_, **kwargs) with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): - with patch('azurelinuxagent.common.cgroupapi.subprocess.Popen', side_effect=mock_popen): + with patch('azurelinuxagent.ga.cgroupapi.subprocess.Popen', side_effect=mock_popen): exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() diff --git a/tests/common/test_persist_firewall_rules.py b/tests/ga/test_persist_firewall_rules.py similarity index 98% rename from tests/common/test_persist_firewall_rules.py rename to tests/ga/test_persist_firewall_rules.py index da414c952d..5ee397baf3 100644 --- a/tests/common/test_persist_firewall_rules.py +++ b/tests/ga/test_persist_firewall_rules.py @@ -25,7 +25,7 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil.default import DefaultOSUtil -from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler +from azurelinuxagent.ga.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.utils import fileutil, shellutil from azurelinuxagent.common.utils.networkutil import AddFirewallRules, FirewallCmdDirectCommands from tests.lib.tools import AgentTestCase, MagicMock, patch @@ -90,9 +90,9 @@ def _get_persist_firewall_rules_handler(self, systemd=True): # Just for these tests, ignoring the mode of mkdir to allow non-sudo tests orig_mkdir = fileutil.mkdir - with patch("azurelinuxagent.common.persist_firewall_rules.fileutil.mkdir", + with patch("azurelinuxagent.ga.persist_firewall_rules.fileutil.mkdir", side_effect=lambda path, **mode: orig_mkdir(path)): - with patch("azurelinuxagent.common.persist_firewall_rules.get_osutil", return_value=osutil): + with patch("azurelinuxagent.ga.persist_firewall_rules.get_osutil", return_value=osutil): with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=systemd): with patch("azurelinuxagent.common.utils.shellutil.subprocess.Popen", side_effect=self.__mock_popen): yield PersistFirewallRulesHandler(self.__test_dst_ip, self.__test_uid) @@ -343,7 +343,7 @@ def mock_write_file(path, _, *__): test_files = [self._binary_file, self._network_service_unit_file] for file_to_fail in test_files: files_to_fail = [file_to_fail] - with patch("azurelinuxagent.common.persist_firewall_rules.fileutil.write_file", + with patch("azurelinuxagent.ga.persist_firewall_rules.fileutil.write_file", side_effect=mock_write_file): with self.assertRaises(Exception) as context_manager: handler.setup() diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index ef2e3e66e5..43935b525d 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -32,7 +32,7 @@ from azurelinuxagent.common.exception import HttpError, \ ExitException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr, httpclient -from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler +from azurelinuxagent.ga.persist_firewall_rules import PersistFirewallRulesHandler from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import VMAgentFamily, \ ExtHandlerPackage, ExtHandlerPackageList, Extension, VMStatus, ExtHandlerStatus, ExtensionStatus, \ @@ -1875,7 +1875,7 @@ def iterator(*_, **__): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): with patch('azurelinuxagent.ga.agent_update_handler.get_agent_update_handler'): with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff + with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): with patch('time.sleep'): with patch('sys.exit'): @@ -2374,7 +2374,7 @@ class AgentMemoryCheckTestCase(AgentTestCase): @patch("azurelinuxagent.common.logger.info") @patch("azurelinuxagent.ga.update.add_event") def test_check_agent_memory_usage_raises_exit_exception(self, patch_add_event, patch_info, *_): - with patch("azurelinuxagent.common.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=AgentMemoryExceededException()): + with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=AgentMemoryExceededException()): with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True): with self.assertRaises(ExitException) as context_manager: update_handler = get_update_handler() @@ -2390,7 +2390,7 @@ def test_check_agent_memory_usage_raises_exit_exception(self, patch_add_event, p @patch("azurelinuxagent.common.logger.warn") @patch("azurelinuxagent.ga.update.add_event") def test_check_agent_memory_usage_fails(self, patch_add_event, patch_warn, *_): - with patch("azurelinuxagent.common.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=Exception()): + with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=Exception()): with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True): update_handler = get_update_handler() diff --git a/tests/lib/mock_cgroup_environment.py b/tests/lib/mock_cgroup_environment.py index 408e1c15cc..3b51dce8fe 100644 --- a/tests/lib/mock_cgroup_environment.py +++ b/tests/lib/mock_cgroup_environment.py @@ -117,7 +117,7 @@ def mock_cgroup_environment(tmp_dir): (os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions) ] - with patch('azurelinuxagent.common.cgroupapi.CGroupsApi.cgroups_supported', return_value=True): + with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=True): with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True): with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS, paths=_MOCKED_PATHS, files=_MOCKED_FILES, data_files=data_files) as mock: yield mock diff --git a/tests/test_agent.py b/tests/test_agent.py index 14053de41f..a2509ad851 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -18,8 +18,9 @@ import os.path from azurelinuxagent.agent import parse_args, Agent, usage, AgentCommands -from azurelinuxagent.common import cgroupconfigurator, conf, logcollector -from azurelinuxagent.common.cgroupapi import SystemdCgroupsApi +from azurelinuxagent.common import conf +from azurelinuxagent.ga import logcollector, cgroupconfigurator +from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.collect_logs import CollectLogsHandler from tests.lib.tools import AgentTestCase, data_dir, Mock, patch From 51d0ba5fdb90a53b2605aca9024ca3cf70a8f87f Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:44:19 -0700 Subject: [PATCH 064/240] improved logging (#2893) --- .../tests/agent_publish/agent_publish.py | 8 ++--- .../scripts/agent_publish-check_update.py | 33 +++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index 397ecd0e33..eaddc74ede 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -57,13 +57,13 @@ def _get_agent_info(self) -> None: def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags") - output = self._ssh_client.run_command("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) - log.info('Updated agent-update related config flags \n%s', output) + self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: log.info("Verifying for agent update status") - output = self._ssh_client.run_command("agent_publish-check_update.py") - log.info('Checked the agent update \n%s', output) + self._run_remote_test("agent_publish-check_update.py") + log.info('Successfully checked the agent update') def _check_cse(self) -> None: custom_script_2_1 = VirtualMachineExtensionClient( diff --git a/tests_e2e/tests/scripts/agent_publish-check_update.py b/tests_e2e/tests/scripts/agent_publish-check_update.py index 9f8f66c4f2..38ae00a909 100755 --- a/tests_e2e/tests/scripts/agent_publish-check_update.py +++ b/tests_e2e/tests/scripts/agent_publish-check_update.py @@ -17,10 +17,12 @@ # limitations under the License. # import re -import sys -import logging + +from assertpy import fail from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test from tests_e2e.tests.lib.retry import retry_if_false @@ -65,9 +67,9 @@ def verify_agent_update_from_log(): update_successful = False update_version = '' - log = AgentLog() + agentlog = AgentLog() - for record in log.read(): + for record in agentlog.read(): if 'TelemetryData' in record.text: continue @@ -76,37 +78,34 @@ def verify_agent_update_from_log(): if update_match: detected_update = True update_version = update_match.groups()[2] - logging.info('found the agent update log: %s', record.text) + log.info('found the agent update log: %s', record.text) break if detected_update: running_match = re.match(_RUNNING_PATTERN_00, record.text) if running_match and update_version == running_match.groups()[0]: update_successful = True - logging.info('found the agent started new version log: %s', record.text) + log.info('found the agent started new version log: %s', record.text) if detected_update: - logging.info('update was detected: %s', update_version) + log.info('update was detected: %s', update_version) if update_successful: - logging.info('update was successful') + log.info('update was successful') else: - logging.warning('update was not successful') + log.warning('update was not successful') exit_code = 1 else: - logging.warning('update was not detected') + log.warning('update was not detected') exit_code = 1 return exit_code == 0 # This method will trace agent update messages in the agent log and determine if the update was successful or not. -try: - logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG, stream=sys.stdout) +def main(): found: bool = retry_if_false(verify_agent_update_from_log) if not found: - raise Exception('update was not found in the logs') -except Exception as e: - logging.error(e) - sys.exit(1) + fail('update was not found in the logs') + -sys.exit(0) +run_remote_test(main) From b138dfe443155f9de6330d5b5e7ae62b7ee595d8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:57:47 -0700 Subject: [PATCH 065/240] skip test in mooncake and usgov (#2904) --- tests_e2e/test_suites/ext_cgroups.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests_e2e/test_suites/ext_cgroups.yml b/tests_e2e/test_suites/ext_cgroups.yml index 5b3e017f52..4603393bfa 100644 --- a/tests_e2e/test_suites/ext_cgroups.yml +++ b/tests_e2e/test_suites/ext_cgroups.yml @@ -7,4 +7,7 @@ tests: - "ext_cgroups/ext_cgroups.py" images: "cgroups-endorsed" # The DCR test extension installs sample service, so this test suite uses it to test services cgroups but this is only published in southcentralus region in public cloud. -locations: "AzureCloud:southcentralus" \ No newline at end of file +locations: "AzureCloud:southcentralus" +skip_on_clouds: + - "AzureChinaCloud" + - "AzureUSGovernment" \ No newline at end of file From e86f1a5556341e376deb94d2b9389988beca259f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:37:14 -0700 Subject: [PATCH 066/240] extension telemetry pipeline scenario (#2901) * Update version to dummy 1.0.0.0' * Revert version change * Barebones for etp * Scenario should own VM because of conf change * Add extension telemetry pipeline test * Clean up code * Improve log messages * Fix pylint errors * Improve logging * Improve code comments * VmAccess is not supported on flatcar * Address PR comments * Add support_distros in VmExtensionIdentifier * Fix logic for support_distros in VmExtensionIdentifier * Use run_remote_test for remote script --- tests_e2e/orchestrator/runbook.yml | 2 +- .../test_suites/ext_telemetry_pipeline.yml | 9 + tests_e2e/tests/agent_bvt/vm_access.py | 4 +- .../ext_telemetry_pipeline.py | 109 +++++++++ tests_e2e/tests/lib/identifiers.py | 15 ++ ...telemetry_pipeline-add_extension_events.py | 224 ++++++++++++++++++ 6 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 tests_e2e/test_suites/ext_telemetry_pipeline.yml create mode 100755 tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py create mode 100755 tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 016f79546e..3492e9c80c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -51,7 +51,7 @@ variable: # # The test suites to execute - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" - name: cloud value: "AzureCloud" is_case_visible: true diff --git a/tests_e2e/test_suites/ext_telemetry_pipeline.yml b/tests_e2e/test_suites/ext_telemetry_pipeline.yml new file mode 100644 index 0000000000..f309f5cb8a --- /dev/null +++ b/tests_e2e/test_suites/ext_telemetry_pipeline.yml @@ -0,0 +1,9 @@ +# +# This test ensures that the agent does not throw any errors while trying to transmit events to wireserver. It does not +# validate if the events actually make it to wireserver +# +name: "ExtTelemetryPipeline" +tests: + - "agent_bvt/vm_access.py" + - "ext_telemetry_pipeline/ext_telemetry_pipeline.py" +images: "random(endorsed)" diff --git a/tests_e2e/tests/agent_bvt/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py index 7983d41479..9b52ac2453 100755 --- a/tests_e2e/tests/agent_bvt/vm_access.py +++ b/tests_e2e/tests/agent_bvt/vm_access.py @@ -39,8 +39,8 @@ class VmAccessBvt(AgentTest): def run(self): ssh: SshClient = self._context.create_ssh_client() - if "-flatcar" in ssh.run_command("uname -a"): - raise TestSkipped("Currently VMAccess is not supported on Flatcar") + if not VmExtensionIds.VmAccess.supports_distro(ssh.run_command("uname -a")): + raise TestSkipped("Currently VMAccess is not supported on this distro") # Try to use a unique username for each test run (note that we truncate to 32 chars to # comply with the rules for usernames) diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py new file mode 100755 index 0000000000..de051485ad --- /dev/null +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test ensures that the agent does not throw any errors while trying to transmit events to wireserver. It does not +# validate if the events actually make it to wireserver +# TODO: Update this test suite to verify that the agent picks up AND sends telemetry produced by extensions +# (work item https://dev.azure.com/msazure/One/_workitems/edit/24903999) +# + +import random +from typing import List, Dict, Any + +from azurelinuxagent.common.conf import get_etp_collection_period + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient + + +class ExtTelemetryPipeline(AgentTest): + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + # Extensions we will create events for + extensions = ["Microsoft.Azure.Extensions.CustomScript"] + if VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): + extensions.append("Microsoft.OSTCExtensions.VMAccessForLinux") + + # Set the etp collection period to 30 seconds instead of default 5 minutes + default_collection_period = get_etp_collection_period() + log.info("") + log.info("Set ETP collection period to 30 seconds on the test VM [%s]", self._context.vm.name) + output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod=30", use_sudo=True) + log.info("Updated waagent conf with Debug.ETPCollectionPeriod=30 completed:\n%s", output) + + # Add CSE to the test VM twice to ensure its events directory still exists after re-enabling + log.info("") + log.info("Add CSE to the test VM...") + cse = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + cse.enable(settings={'commandToExecute': "echo 'enable'"}) + cse.assert_instance_view() + + log.info("") + log.info("Add CSE to the test VM again...") + cse.enable(settings={'commandToExecute': "echo 'enable again'"}) + cse.assert_instance_view() + + # Check agent log to verify ETP is enabled + command = "agent_ext_workflow-check_data_in_agent_log.py --data 'Extension Telemetry pipeline enabled: True'" + log.info("") + log.info("Check agent log to verify ETP is enabled...") + log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) + + # Add good extension events for each extension and check that the TelemetryEventsCollector collects them + # TODO: Update test suite to check that the agent is picking up the events generated by the extension, instead + # of generating on the extensions' behalf + # (work item - https://dev.azure.com/msazure/One/_workitems/edit/24903999) + log.info("") + log.info("Add good extension events and check they are reported...") + max_events = random.randint(10, 50) + self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events}", use_sudo=True) + log.info("") + log.info("Good extension events were successfully reported.") + + # Add invalid events for each extension and check that the TelemetryEventsCollector drops them + log.info("") + log.info("Add bad extension events and check they are reported...") + self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + f"--extensions {','.join(extensions)} " + f"--num_events_total {max_events} " + f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) + log.info("") + log.info("Bad extension events were successfully dropped.") + + # Reset the etp collection period to the default value so this VM can be shared with other suites + log.info("") + log.info("Reset ETP collection period to {0} seconds on the test VM [{1}]".format(default_collection_period, self._context.vm.name)) + output = ssh_client.run_command("update-waagent-conf Debug.EtpCollectionPeriod={0}".format(default_collection_period), use_sudo=True) + log.info("Updated waagent conf with default collection period completed:\n%s", output) + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + return [ + {'message': r"Dropped events for Extension.*"} + ] + + +if __name__ == "__main__": + ExtTelemetryPipeline.run_from_command_line() diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/identifiers.py index 7bb067a835..45af22745f 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/identifiers.py @@ -15,6 +15,8 @@ # limitations under the License. # +from typing import Dict, List + class VmIdentifier(object): def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): @@ -45,6 +47,19 @@ def __init__(self, publisher: str, ext_type: str, version: str): self.type: str = ext_type self.version: str = version + unsupported_distros: Dict[str, List[str]] = { + "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"] + } + + def supports_distro(self, system_info: str) -> bool: + """ + Returns true if an unsupported distro name for the extension is found in the provided system info + """ + ext_unsupported_distros = VmExtensionIdentifier.unsupported_distros.get(self.publisher + "." + self.type) + if ext_unsupported_distros is not None and any(distro in system_info for distro in ext_unsupported_distros): + return False + return True + def __str__(self): return f"{self.publisher}.{self.type}" diff --git a/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py new file mode 100755 index 0000000000..2e5776c714 --- /dev/null +++ b/tests_e2e/tests/scripts/ext_telemetry_pipeline-add_extension_events.py @@ -0,0 +1,224 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Adds extension events for each provided extension and verifies the TelemetryEventsCollector collected or dropped them +# + +import argparse +import json +import os +import sys +import time +import uuid + +from assertpy import fail +from datetime import datetime, timedelta +from random import choice +from typing import List + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log + + +def add_extension_events(extensions: List[str], bad_event_count=0, no_of_events_per_extension=50): + def missing_key(bad_event): + key = choice(list(bad_event.keys())) + del bad_event[key] + return "MissingKeyError: {0}".format(key) + + def oversize_error(bad_event): + bad_event["EventLevel"] = "ThisIsAnOversizeError\n" * 300 + return "OversizeEventError" + + def empty_message(bad_event): + bad_event["Message"] = "" + return "EmptyMessageError" + + errors = [ + missing_key, + oversize_error, + empty_message + ] + + sample_ext_event = { + "EventLevel": "INFO", + "Message": "Starting IaaS ScriptHandler Extension v1", + "Version": "1.0", + "TaskName": "Extension Info", + "EventPid": "3228", + "EventTid": "1", + "OperationId": "519e4beb-018a-4bd9-8d8e-c5226cf7f56e", + "TimeStamp": "2019-12-12T01:20:05.0950244Z" + } + + sample_messages = [ + "Starting IaaS ScriptHandler Extension v1", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + "The quick brown fox jumps over the lazy dog", + "Cursus risus at ultrices mi.", + "Doing Something", + "Iaculis eu non diam phasellus.", + "Doing other thing", + "Look ma, lemons", + "Pretium quam vulputate dignissim suspendisse.", + "Man this is insane", + "I wish it worked as it should and not as it ain't", + "Ut faucibus pulvinar elementum integer enim neque volutpat ac tincidunt." + "Did you get any of that?", + "Non-English message - 此文字不是英文的" + "κόσμε", + "�", + "Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Wolther spillede på xylofon.", + "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg", + "Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich", + "Heizölrückstoßabdämpfung", + "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο", + "Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία", + "El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.", + "Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à côté de l'alcôve ovoïde, où les bûches", + "se consument dans l'âtre, ce qui lui permet de penser à la cænogenèse de l'être dont il est question", + "dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue çà et là la qualité de son œuvre.", + "D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh", + "Árvíztűrő tükörfúrógép", + "Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa", + "Sævör grét áðan því úlpan var ónýt", + "いろはにほへとちりぬるを わかよたれそつねならむ うゐのおくやまけふこえて あさきゆめみしゑひもせす", + "イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン", + "? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה" + "Pchnąć w tę łódź jeża lub ośm skrzyń fig", + "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!", + "๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน", + "Pijamalı hasta, yağız şoföre çabucak güvendi." + ] + + for ext in extensions: + bad_count = bad_event_count + event_dir = os.path.join("/var/log/azure/", ext, "events") + if not os.path.isdir(event_dir): + fail(f"Expected events dir: {event_dir} does not exist") + + log.info("") + log.info("Expected dir: {0} exists".format(event_dir)) + log.info("Creating random extension events for {0}. No of Good Events: {1}, No of Bad Events: {2}".format( + ext, no_of_events_per_extension - bad_event_count, bad_event_count)) + + new_opr_id = str(uuid.uuid4()) + event_list = [] + + for _ in range(no_of_events_per_extension): + event = sample_ext_event.copy() + event["OperationId"] = new_opr_id + event["TimeStamp"] = datetime.utcnow().strftime(u'%Y-%m-%dT%H:%M:%S.%fZ') + event["Message"] = choice(sample_messages) + + if bad_count != 0: + # Make this event a bad event + reason = choice(errors)(event) + bad_count -= 1 + + # Missing key error might delete the TaskName key from the event + if "TaskName" in event: + event["TaskName"] = "{0}. This is a bad event: {1}".format(event["TaskName"], reason) + else: + event["EventLevel"] = "{0}. This is a bad event: {1}".format(event["EventLevel"], reason) + + event_list.append(event) + + file_name = os.path.join(event_dir, '{0}.json'.format(int(time.time() * 1000000))) + log.info("Create json with extension events in event directory: {0}".format(file_name)) + with open("{0}.tmp".format(file_name), 'w+') as f: + json.dump(event_list, f) + os.rename("{0}.tmp".format(file_name), file_name) + + +def wait_for_extension_events_dir_empty(extensions: List[str]): + # By ensuring events dir to be empty, we verify that the telemetry events collector has completed its run + start_time = datetime.now() + timeout = timedelta(minutes=2) + ext_event_dirs = [os.path.join("/var/log/azure/", ext, "events") for ext in extensions] + + while (start_time + timeout) >= datetime.now(): + log.info("") + log.info("Waiting for extension event directories to be empty...") + all_dir_empty = True + for event_dir in ext_event_dirs: + if not os.path.exists(event_dir) or len(os.listdir(event_dir)) != 0: + log.info("Dir: {0} is not yet empty".format(event_dir)) + all_dir_empty = False + + if all_dir_empty: + log.info("Extension event directories are empty: \n{0}".format(ext_event_dirs)) + return + + time.sleep(20) + + fail("Extension events dir not empty before 2 minute timeout") + + +def main(): + # This test is a best effort test to ensure that the agent does not throw any errors while trying to transmit + # events to wireserver. We're not validating if the events actually make it to wireserver. + + parser = argparse.ArgumentParser() + parser.add_argument("--extensions", dest='extensions', type=str, required=True) + parser.add_argument("--num_events_total", dest='num_events_total', type=int, required=True) + parser.add_argument("--num_events_bad", dest='num_events_bad', type=int, required=False, default=0) + args, _ = parser.parse_known_args() + + extensions = args.extensions.split(',') + add_extension_events(extensions=extensions, bad_event_count=args.num_events_bad, + no_of_events_per_extension=args.num_events_total) + + # Ensure that the event collector ran after adding the events + wait_for_extension_events_dir_empty(extensions=extensions) + + # Sleep for a min to ensure that the TelemetryService has enough time to send events and report errors if any + time.sleep(60) + found_error = False + agent_log = AgentLog() + + log.info("") + log.info("Check that the TelemetryEventsCollector did not emit any errors while collecting and reporting events...") + telemetry_event_collector_name = "TelemetryEventsCollector" + for agent_record in agent_log.read(): + if agent_record.thread == telemetry_event_collector_name and agent_record.level == "ERROR": + found_error = True + log.info("waagent.log contains the following errors emitted by the {0} thread: \n{1}".format(telemetry_event_collector_name, agent_record)) + + if found_error: + fail("Found error(s) emitted by the TelemetryEventsCollector, but none were expected.") + log.info("The TelemetryEventsCollector did not emit any errors while collecting and reporting events") + + for ext in extensions: + good_count = args.num_events_total - args.num_events_bad + log.info("") + if not agent_log.agent_log_contains("Collected {0} events for extension: {1}".format(good_count, ext)): + fail("The TelemetryEventsCollector did not collect the expected number of events: {0} for {1}".format(good_count, ext)) + log.info("All {0} good events for {1} were collected by the TelemetryEventsCollector".format(good_count, ext)) + + if args.num_events_bad != 0: + log.info("") + if not agent_log.agent_log_contains("Dropped events for Extension: {0}".format(ext)): + fail("The TelemetryEventsCollector did not drop bad events for {0} as expected".format(ext)) + log.info("The TelemetryEventsCollector dropped bad events for {0} as expected".format(ext)) + + sys.exit(0) + + +if __name__ == "__main__": + main() From ae01d59f2d43314f4c8cb2d8120a3c5892086de6 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 29 Aug 2023 09:58:01 -0700 Subject: [PATCH 067/240] Ignore logcollector fetch failure if it recovers (#2906) --- tests_e2e/tests/lib/agent_log.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 1926c0d4db..4ef884d70c 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -351,7 +351,15 @@ def get_errors(self) -> List[AgentLogRecord]: { 'message': r"Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent.*op=Install.*Non-zero exit code: 56,", }, - + # + # Ignore LogCollector failure to fetch vmSettings if it recovers + # + # 2023-08-27T08:13:42.520557Z WARNING MainThread LogCollector Fetch failed: [HttpError] [HTTP Failed] GET https://md-hdd-tkst3125n3x0.blob.core.chinacloudapi.cn/$system/lisa-WALinuxAgent-20230827-080144-029-e0-n0.cb9a406f-584b-4702-98bb-41a3ad5e334f.vmSettings -- IOError timed out -- 6 attempts made + # + { + 'message': r"Fetch failed:.*GET.*vmSettings.*timed out", + 'if': lambda r: r.prefix == 'LogCollector' and self.agent_log_contains("LogCollector Log collection successfully completed", after_timestamp=r.timestamp) + }, ] def is_error(r: AgentLogRecord) -> bool: @@ -381,14 +389,16 @@ def is_error(r: AgentLogRecord) -> bool: return errors - def agent_log_contains(self, data: str): + def agent_log_contains(self, data: str, after_timestamp: str = datetime.min): """ This function looks for the specified test data string in the WALinuxAgent logs and returns if found or not. :param data: The string to look for in the agent logs - :return: True if test data string found in the agent log and False if not. + :param after_timestamp: A timestamp + appears after this timestamp + :return: True if test data string found in the agent log after after_timestamp and False if not. """ for record in self.read(): - if data in record.text: + if data in record.text and record.timestamp > after_timestamp: return True return False From 4a28ace3b8da8a84e3c6b60455936a1f8a216cb7 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 31 Aug 2023 17:18:25 -0700 Subject: [PATCH 068/240] download_fail unit test should use agent version in common instead of 9.9.9.9 (#2908) (#2912) (cherry picked from commit ed80388c02471a1e196fd8d77cf0a74eab13c5c7) --- tests/ga/test_guestagent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py index a127341d15..3d25ff7921 100644 --- a/tests/ga/test_guestagent.py +++ b/tests/ga/test_guestagent.py @@ -173,6 +173,7 @@ def http_get_handler(uri, *_, **__): return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) return None + agent_version = self._get_agent_version() pkg = ExtHandlerPackage(version=str(self._get_agent_version())) pkg.uris.append(agent_uri) @@ -185,7 +186,7 @@ def http_get_handler(uri, *_, **__): messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) - self.assertIn('[UpdateError] Unable to download Agent WALinuxAgent-9.9.9.9', messages[0], "The install error does not include the expected message") + self.assertIn(str.format('[UpdateError] Unable to download Agent WALinuxAgent-{0}', agent_version), messages[0], "The install error does not include the expected message") self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") From bfeb1703c7694d2372f15dfed11a82a75e679219 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 31 Aug 2023 17:24:35 -0700 Subject: [PATCH 069/240] Download certs on FT GS after check_certificates only when missing from disk (#2907) (#2913) * Download certs on FT GS only when missing from disk * Improve telemetry for inconsistent GS * Fix string format (cherry picked from commit c13f7500c4e3c93f081d0ff6cdb46c6ffdcdd43a) --- azurelinuxagent/common/protocol/goal_state.py | 44 ++++++++++++++++--- tests/common/protocol/test_goal_state.py | 4 +- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 3d02268ced..267b01c585 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -208,9 +208,15 @@ def update(self, silent=False): try: self._update(force_update=False) except GoalStateInconsistentError as e: - self.logger.warn("Detected an inconsistency in the goal state: {0}", ustr(e)) + message = "Detected an inconsistency in the goal state: {0}".format(ustr(e)) + self.logger.warn(message) + add_event(op=WALAEventOperation.GoalState, is_success=False, message=message) + self._update(force_update=True) - self.logger.info("The goal state is consistent") + + message = "The goal state is consistent" + self.logger.info(message) + add_event(op=WALAEventOperation.GoalState, message=message) def _update(self, force_update): # @@ -219,7 +225,9 @@ def _update(self, force_update): timestamp = datetime.datetime.utcnow() if force_update: - self.logger.info("Refreshing goal state and vmSettings") + message = "Refreshing goal state and vmSettings" + self.logger.info(message) + add_event(op=WALAEventOperation.GoalState, message=message) incarnation, xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client) goal_state_updated = force_update or incarnation != self._incarnation @@ -292,11 +300,9 @@ def _update(self, force_update): # if self._extensions_goal_state.source == GoalStateSource.FastTrack and self._goal_state_properties & GoalStateProperties.Certificates: self._check_certificates() + self._check_and_download_missing_certs_on_disk() def _check_certificates(self): - # Re-download certificates in case they have been removed from disk since last download - if self._certs_uri is not None: - self._download_certificates(self._certs_uri) # Check that certificates needed by extensions are in goal state certs.summary for extension in self.extensions_goal_state.extensions: for settings in extension.settings: @@ -321,6 +327,32 @@ def _download_certificates(self, certs_uri): self._history.save_certificates(json.dumps(certs.summary)) return certs + def _check_and_download_missing_certs_on_disk(self): + # Re-download certificates if any have been removed from disk since last download + if self._certs_uri is not None: + certificates = self.certs.summary + certs_missing_from_disk = False + + for c in certificates: + cert_path = os.path.join(conf.get_lib_dir(), c['thumbprint'] + '.crt') + if not os.path.isfile(cert_path): + certs_missing_from_disk = True + message = "Certificate required by goal state is not on disk: {0}".format(cert_path) + self.logger.info(message) + add_event(op=WALAEventOperation.GoalState, message=message) + if certs_missing_from_disk: + # Try to re-download certs. Sometimes download may fail if certs_uri is outdated/contains wrong + # container id (for example, when the VM is moved to a new container after resuming from + # hibernation). If download fails we should report and continue with goal state processing, as some + # extensions in the goal state may succeed. + try: + self._download_certificates(self._certs_uri) + except Exception as e: + message = "Unable to download certificates. Goal state processing will continue, some " \ + "extensions requiring certificates may fail. Error: {0}".format(ustr(e)) + self.logger.warn(message) + add_event(op=WALAEventOperation.GoalState, is_success=False, message=message) + def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_settings_support_stopped_error): msg = 'The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.' self.logger.info(msg) diff --git a/tests/common/protocol/test_goal_state.py b/tests/common/protocol/test_goal_state.py index 3f00a62d8b..c33851e9fb 100644 --- a/tests/common/protocol/test_goal_state.py +++ b/tests/common/protocol/test_goal_state.py @@ -157,7 +157,7 @@ def http_get_handler(url, *_, **__): protocol.set_http_handlers(http_get_handler=None) goal_state.update() self._assert_directory_contents( - self._find_history_subdirectory("234-987"), ["VmSettings.json", "Certificates.json"]) + self._find_history_subdirectory("234-987"), ["VmSettings.json"]) def test_it_should_redact_the_protected_settings_when_saving_to_the_history_directory(self): with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: @@ -464,7 +464,7 @@ def http_get_handler(url, *_, **__): goal_state = GoalState(protocol.client) self.assertEqual(2, protocol.mock_wire_data.call_counts['goalstate'], "There should have been exactly 2 requests for the goal state (original + refresh)") - self.assertEqual(4, http_get_handler.certificate_requests, "There should have been exactly 4 requests for the goal state certificates (2x original + 2x refresh)") + self.assertEqual(2, http_get_handler.certificate_requests, "There should have been exactly 2 requests for the goal state certificates (original + refresh)") thumbprints = [c.thumbprint for c in goal_state.certs.cert_list.certificates] From 601954a9df506dfb6ae1c50ed7246e874faaf6d1 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:29:47 -0700 Subject: [PATCH 070/240] Update pipeline.yml to increase timeout to 90 minutes (#2910) Runs have been timing out after 60 minutes due to multiple scenarios sharing VMs --- tests_e2e/pipeline/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 59420dd88d..6940f2a7d2 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -107,6 +107,7 @@ jobs: - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/execute_tests.sh displayName: "Execute tests" continueOnError: true + timeoutInMinutes: 90 env: SUBSCRIPTION_ID: $(SUBSCRIPTION-ID) AZURE_CLIENT_ID: $(AZURE-CLIENT-ID) From 7df55065127721e5c62adc6d2a80fdc2abdafba4 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 6 Sep 2023 17:17:00 -0700 Subject: [PATCH 071/240] Fix agent memory usage check (#2903) * fix memory usage check * add test * added comment * fix test --- azurelinuxagent/ga/update.py | 10 +++++++--- tests/ga/test_update.py | 13 +++++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 4eeb408c25..8065194d7b 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -145,7 +145,8 @@ def __init__(self): self._heartbeat_id = str(uuid.uuid4()).upper() self._heartbeat_counter = 0 - self._last_check_memory_usage = datetime.min + self._initial_attempt_check_memory_usage = True + self._last_check_memory_usage_time = time.time() self._check_memory_usage_last_error_report = datetime.min # VM Size is reported via the heartbeat, default it here. @@ -1016,8 +1017,11 @@ def _check_agent_memory_usage(self): """ try: if conf.get_enable_agent_memory_usage_check() and self._extensions_summary.converged: - if self._last_check_memory_usage == datetime.min or datetime.utcnow() >= (self._last_check_memory_usage + UpdateHandler.CHECK_MEMORY_USAGE_PERIOD): - self._last_check_memory_usage = datetime.utcnow() + # we delay first attempt memory usage check, so that current agent won't get blacklisted due to multiple restarts(because of memory limit reach) too frequently + if (self._initial_attempt_check_memory_usage and time.time() - self._last_check_memory_usage_time > CHILD_LAUNCH_INTERVAL) or \ + (not self._initial_attempt_check_memory_usage and time.time() - self._last_check_memory_usage_time > conf.get_cgroup_check_period()): + self._last_check_memory_usage_time = time.time() + self._initial_attempt_check_memory_usage = False CGroupConfigurator.get_instance().check_agent_memory_usage() except AgentMemoryExceededException as exception: msg = "Check on agent memory usage:\n{0}".format(ustr(exception)) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 43935b525d..129ce3cad7 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2378,7 +2378,7 @@ def test_check_agent_memory_usage_raises_exit_exception(self, patch_add_event, p with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True): with self.assertRaises(ExitException) as context_manager: update_handler = get_update_handler() - + update_handler._last_check_memory_usage_time = time.time() - 24 * 60 update_handler._check_agent_memory_usage() self.assertEqual(1, patch_add_event.call_count) self.assertTrue(any("Check on agent memory usage" in call_args[0] @@ -2393,7 +2393,7 @@ def test_check_agent_memory_usage_fails(self, patch_add_event, patch_warn, *_): with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage", side_effect=Exception()): with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True): update_handler = get_update_handler() - + update_handler._last_check_memory_usage_time = time.time() - 24 * 60 update_handler._check_agent_memory_usage() self.assertTrue(any("Error checking the agent's memory usage" in call_args[0] for call_args in patch_warn.call_args), @@ -2409,6 +2409,15 @@ def test_check_agent_memory_usage_fails(self, patch_add_event, patch_warn, *_): add_events[0]["message"], "The error message is not correct when memory usage check failed") + @patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.check_agent_memory_usage") + @patch("azurelinuxagent.ga.update.add_event") + def test_check_agent_memory_usage_not_called(self, patch_add_event, patch_memory_usage, *_): + # This test ensures that agent not called immediately on startup, instead waits for CHILD_LAUNCH_INTERVAL + with patch('azurelinuxagent.common.conf.get_enable_agent_memory_usage_check', return_value=True): + update_handler = get_update_handler() + update_handler._check_agent_memory_usage() + self.assertEqual(0, patch_memory_usage.call_count) + self.assertEqual(0, patch_add_event.call_count) class GoalStateIntervalTestCase(AgentTestCase): def test_initial_goal_state_period_should_default_to_goal_state_period(self): From 087e9264163c964dde73635567840013cbc7356b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 7 Sep 2023 17:08:17 -0700 Subject: [PATCH 072/240] disable ga versioning changes (#2917) * Disable ga versioning changes (#2909) * disbale rsm changes * add flag (cherry picked from commit 5a4fae833a92de4d44b1939e48678a043132fbd4) * merge conflicts --- azurelinuxagent/common/conf.py | 2 +- azurelinuxagent/ga/agent_update_handler.py | 10 +- tests/ga/test_agent_update_handler.py | 9 +- tests/ga/test_update.py | 128 ++++++++++--------- tests_e2e/orchestrator/scripts/install-agent | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 2 +- 6 files changed, 78 insertions(+), 75 deletions(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 0e0eb7f18e..167d520d09 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -643,7 +643,7 @@ def get_enable_ga_versioning(conf=__conf__): If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR. NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", True) + return conf.get_switch("Debug.EnableGAVersioning", False) def get_firewall_rules_log_period(conf=__conf__): diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 212ca1f945..6c93e092ca 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -58,12 +58,7 @@ def __should_update_agent(self, requested_version): largest version update(self-update): update is allowed once per (as specified in the conf.get_hotfix_upgrade_frequency() or conf.get_normal_upgrade_frequency()) return false when we don't allow updates. - Note: Downgrades are not allowed for self-update. """ - - if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): - return False - now = datetime.datetime.now() if self._is_requested_version_update: @@ -329,6 +324,11 @@ def run(self, goal_state): self.__log_event(LogLevel.WARNING, warn_msg) try: + # Downgrades are not allowed for self-update version + # Added it in try block after agent update timewindow check so that we don't log it too frequently + if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): + return + daemon_version = get_daemon_version() if requested_version < daemon_version: # Don't process the update if the requested version is less than daemon version, diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index a78471df08..d387164147 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -57,10 +57,11 @@ def put_handler(url, *args, **_): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.ga.agent_update_handler.add_event") as mock_telemetry: - agent_update_handler = get_agent_update_handler(protocol) - agent_update_handler._protocol = protocol - yield agent_update_handler, mock_telemetry + with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): + with patch("azurelinuxagent.ga.agent_update_handler.add_event") as mock_telemetry: + agent_update_handler = get_agent_update_handler(protocol) + agent_update_handler._protocol = protocol + yield agent_update_handler, mock_telemetry def __assert_agent_directories_available(self, versions): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 129ce3cad7..b3cf721580 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1250,68 +1250,69 @@ def test_it_should_recreate_extension_event_directories_for_existing_extensions_ def test_it_should_report_update_status_in_status_blob(self): with mock_wire_protocol(DATA_FILE) as protocol: with patch.object(conf, "get_autoupdate_gafamily", return_value="Prod"): - with patch("azurelinuxagent.common.logger.warn") as patch_warn: - - protocol.aggregate_status = None - protocol.incarnation = 1 - - def get_handler(url, **kwargs): - if HttpRequestPredicates.is_agent_package_request(url): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return protocol.mock_wire_data.mock_http_get(url, **kwargs) - - def put_handler(url, *args, **_): - if HttpRequestPredicates.is_host_plugin_status_request(url): - # Skip reading the HostGA request data as its encoded - return MockHttpResponse(status=500) - protocol.aggregate_status = json.loads(args[0]) - return MockHttpResponse(status=201) - - def update_goal_state_and_run_handler(autoupdate_enabled = True): - protocol.incarnation += 1 - protocol.mock_wire_data.set_incarnation(protocol.incarnation) - self._add_write_permission_to_goal_state_files() - with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): - GAUpdateReportState.report_error_msg = "" - update_handler.run(debug=True) - self.assertEqual(0, update_handler.get_exit_code(), - "Exit code should be 0; List of all warnings logged by the agent: {0}".format( - patch_warn.call_args_list)) - - protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - - # Case 1: Requested version removed in GS; report missing requested version errr - protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") - protocol.mock_wire_data.reload() - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be reported") - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") - self.assertEqual(update_status['code'], 1, "incorrect code reported") - self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") - - # Case 2: Requested version in GS == Current Version; updateStatus should be Success - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be reported if asked in GS") - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") - self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") - self.assertEqual(update_status['code'], 0, "incorrect code reported") - - # Case 3: Requested version in GS != Current Version; update fail and report error - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") - update_goal_state_and_run_handler() - self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], - "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) - update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] - self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") - self.assertEqual(update_status['expectedVersion'], "5.2.0.1", "incorrect version reported") - self.assertEqual(update_status['code'], 1, "incorrect code reported") + with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): + with patch("azurelinuxagent.common.logger.warn") as patch_warn: + + protocol.aggregate_status = None + protocol.incarnation = 1 + + def get_handler(url, **kwargs): + if HttpRequestPredicates.is_agent_package_request(url): + return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) + return protocol.mock_wire_data.mock_http_get(url, **kwargs) + + def put_handler(url, *args, **_): + if HttpRequestPredicates.is_host_plugin_status_request(url): + # Skip reading the HostGA request data as its encoded + return MockHttpResponse(status=500) + protocol.aggregate_status = json.loads(args[0]) + return MockHttpResponse(status=201) + + def update_goal_state_and_run_handler(autoupdate_enabled = True): + protocol.incarnation += 1 + protocol.mock_wire_data.set_incarnation(protocol.incarnation) + self._add_write_permission_to_goal_state_files() + with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): + GAUpdateReportState.report_error_msg = "" + update_handler.run(debug=True) + self.assertEqual(0, update_handler.get_exit_code(), + "Exit code should be 0; List of all warnings logged by the agent: {0}".format( + patch_warn.call_args_list)) + + protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + + # Case 1: Requested version removed in GS; report missing requested version errr + protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") + protocol.mock_wire_data.reload() + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['code'], 1, "incorrect code reported") + self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") + + # Case 2: Requested version in GS == Current Version; updateStatus should be Success + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be reported if asked in GS") + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Success, update_status['status'], "Status should be successful") + self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") + self.assertEqual(update_status['code'], 0, "incorrect code reported") + + # Case 3: Requested version in GS != Current Version; update fail and report error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") + protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") + update_goal_state_and_run_handler() + self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], + "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) + update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] + self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") + self.assertEqual(update_status['expectedVersion'], "5.2.0.1", "incorrect version reported") + self.assertEqual(update_status['code'], 1, "incorrect code reported") def test_it_should_wait_to_fetch_first_goal_state(self): with _get_update_handler() as (update_handler, protocol): @@ -1438,7 +1439,8 @@ def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequ with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=hotfix_frequency): with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=normal_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): - yield + with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): + yield @contextlib.contextmanager def __get_update_handler(self, iterations=1, test_data=None, diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 7c80f6caf6..0496784766 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -115,7 +115,7 @@ echo "Agent's conf path: $waagent_conf_path" sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" # By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. # But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. -sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path" +# sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path" # Logging and exiting tests if Extensions.Enabled flag is disabled for other distros than debian if grep -q "Extensions.Enabled=n" $waagent_conf_path; then pypy_get_distro=$(pypy3 -c 'from azurelinuxagent.common.version import get_distro; print(get_distro())') diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index bb6dcc6a7a..8d2ac149c1 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -139,7 +139,7 @@ def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: log.info('Successfully updated agent installed version') if update_config: log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') - self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) + self._run_remote_test("update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Successfully updated agent update config') @staticmethod From 0fc8b466fac730fce4dc5bee3ef02cc9fde0f869 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 8 Sep 2023 09:08:07 -0700 Subject: [PATCH 073/240] fix the ignore rule in agent update test (#2915) (#2918) * ignore the agent installed version * address comments * address comments * fixes (cherry picked from commit 8985a4207b8279b07fdc5186e22b001aaadbd27d) --- .../orchestrator/lib/agent_test_suite.py | 20 +++++------ tests_e2e/tests/agent_update/rsm_update.py | 35 ++++++++++++++----- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index eb90e4edcc..4a14b1f665 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -467,8 +467,6 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]): self.context.lisa_log.info("Executing Test Suite %s", suite.name) test_suite_success = self._execute_test_suite(suite) and test_suite_success - test_suite_success = self._check_agent_log() and test_suite_success - finally: collect = self.context.collect_logs if collect == CollectLogs.Always or collect == CollectLogs.Failed and not test_suite_success: @@ -512,6 +510,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: log.info("") summary: List[str] = [] + ignore_error_rules: List[Dict[str, Any]] = [] for test in suite.tests: test_full_name = f"{suite_name}-{test.name}" @@ -522,9 +521,9 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_success: bool = True + test_instance = test.test_class(self.context) try: - test.test_class(self.context).run() - + test_instance.run() summary.append(f"[Passed] {test.name}") log.info("******** [Passed] %s", test.name) self.context.lisa_log.info("[Passed] %s", test_full_name) @@ -583,6 +582,8 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: suite_success = suite_success and test_success + ignore_error_rules.extend(test_instance.get_ignore_error_rules()) + if not test_success and test.blocks_suite: log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) break @@ -607,9 +608,11 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: if not suite_success: self._mark_log_as_failed() + suite_success = suite_success and self._check_agent_log(ignore_error_rules) + return suite_success - def _check_agent_log(self) -> bool: + def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]]) -> bool: """ Checks the agent log for errors; returns true on success (no errors int the log) """ @@ -623,13 +626,6 @@ def _check_agent_log(self) -> bool: errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) # Individual tests may have rules to ignore known errors; filter those out - ignore_error_rules = [] - # pylint seems to think self.context.test_suites is not iterable. Suppressing warning, since its type is List[AgentTestSuite] - # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) - for suite in self.context.test_suites: # pylint: disable=E1133 - for test in suite.tests: - ignore_error_rules.extend(test.test_class(self.context).get_ignore_error_rules()) - if len(ignore_error_rules) > 0: new = [] for e in errors: diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 8d2ac149c1..d31e8ce3e6 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -24,6 +24,7 @@ # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # import json +import re from typing import List, Dict, Any import requests @@ -49,6 +50,8 @@ def __init__(self, context: AgentTestContext): ip_address=self._context.vm_ip_address, username=self._context.username, private_key_file=self._context.private_key_file) + self._installed_agent_version = "9.9.9.9" + self._downgrade_version = "9.9.9.9" def get_ignore_error_rules(self) -> List[Dict[str, Any]]: ignore_rules = [ @@ -56,9 +59,10 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: # This is expected as we validate the downgrade scenario # # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted - # + # Note: Version varies depending on the pipeline branch the test is running on { - 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" + 'message': rf"Agent WALinuxAgent-{self._installed_agent_version} is permanently blacklisted", + 'if': lambda r: r.prefix == 'ExtHandler' and self._installed_agent_version > self._downgrade_version }, # We don't allow downgrades below then daemon version # 2023-07-11T02:28:21.249836Z WARNING ExtHandler ExtHandler [AgentUpdateError] The Agent received a request to downgrade to version 1.4.0.0, but downgrading to a version less than the Agent installed on the image (1.4.0.1) is not supported. Skipping downgrade. @@ -71,20 +75,22 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: return ignore_rules def run(self) -> None: + # retrieve the installed agent version in the vm before run the scenario + self._retrieve_installed_agent_version() # Allow agent to send supported feature flag self._verify_agent_reported_supported_feature_flag() log.info("*******Verifying the Agent Downgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - downgrade_version: str = "1.5.0.0" - log.info("Attempting downgrade version %s", downgrade_version) - self._request_rsm_update(downgrade_version) - self._check_rsm_gs(downgrade_version) + self._downgrade_version: str = "1.5.0.0" + log.info("Attempting downgrade version %s", self._downgrade_version) + self._request_rsm_update(self._downgrade_version) + self._check_rsm_gs(self._downgrade_version) self._prepare_agent() # Verify downgrade scenario - self._verify_guest_agent_update(downgrade_version) - self._verify_agent_reported_update_status(downgrade_version) + self._verify_guest_agent_update(self._downgrade_version) + self._verify_agent_reported_update_status(self._downgrade_version) # Verify upgrade scenario @@ -246,6 +252,19 @@ def _verify_agent_reported_update_status(self, version: str): self._run_remote_test(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) log.info("Successfully Agent reported update status for version {0}".format(version)) + def _retrieve_installed_agent_version(self): + """ + Retrieve the installed agent version + """ + log.info("Retrieving installed agent version") + stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info("Retrieved installed agent version \n {0}".format(stdout)) + match = re.search(r'.*Goal state agent: (\S*)', stdout) + if match: + self._installed_agent_version = match.groups()[0] + else: + log.warning("Unable to retrieve installed agent version and set to default value {0}".format(self._installed_agent_version)) + if __name__ == "__main__": RsmUpdateBvt.run_from_command_line() From ef2a7e8927641cd78ed3d35f2c778f671cce31b0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 8 Sep 2023 10:21:21 -0700 Subject: [PATCH 074/240] Use Mariner 2 in FIPS test (#2916) * Use Mariner 2 in FIPS test --------- Co-authored-by: narrieta --- tests_e2e/test_suites/fips.yml | 10 +++- tests_e2e/tests/fips/fips.py | 31 +++------- .../tests/scripts/fips-check_fips_mariner | 56 +++++++++++++++++++ .../tests/scripts/fips-enable_fips_mariner | 53 ++++++++++++++++++ 4 files changed, 126 insertions(+), 24 deletions(-) create mode 100755 tests_e2e/tests/scripts/fips-check_fips_mariner create mode 100755 tests_e2e/tests/scripts/fips-enable_fips_mariner diff --git a/tests_e2e/test_suites/fips.yml b/tests_e2e/test_suites/fips.yml index 785671d0c1..bdff00098a 100644 --- a/tests_e2e/test_suites/fips.yml +++ b/tests_e2e/test_suites/fips.yml @@ -1,10 +1,16 @@ # # FIPS should not affect extension processing. The test enables FIPS and then executes an extension. # -# NOTE: Enabling FIPS is very specific to the distro. This test is only executed on RHEL 9.0. +# NOTE: Enabling FIPS is very specific to the distro. This test is only executed on Mariner 2. +# +# TODO: Add other distros. +# +# NOTE: FIPS can be enabled on RHEL9 using these instructions: see https://access.redhat.com/solutions/137833#rhel9), +# but extensions with protected settings do not work end-to-end, since the Agent can't decrypt the tenant +# certificate. # name: "FIPS" tests: - source: "fips/fips.py" -images: "rhel_90" +images: "mariner_2" owns_vm: true diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py index f8c27b900b..9f490de4ca 100755 --- a/tests_e2e/tests/fips/fips.py +++ b/tests_e2e/tests/fips/fips.py @@ -19,7 +19,6 @@ import uuid from assertpy import fail -from typing import Any, Dict, List from tests_e2e.tests.lib.agent_test import AgentTest from tests_e2e.tests.lib.logging import log @@ -32,18 +31,15 @@ class Fips(AgentTest): """ - Enables FIPS on the test VM, which is a RHEL 9 VM (see https://access.redhat.com/solutions/137833#rhel9), then executes the CustomScript extension. - - TODO: Investigate whether extensions with protected settings are supported on FIPS-enabled systems. The Agent has issues handling the tenant - certificate on those systems (additional configuration on FIPS may be needed). + Enables FIPS on the test VM, which is Mariner 2 VM, and verifies that extensions with protected settings are handled correctly under FIPS. """ def run(self): ssh_client: SshClient = self._context.create_ssh_client() try: - command = "fips-mode-setup --enable" + command = "fips-enable_fips_mariner" log.info("Enabling FIPS on the test VM [%s]", command) - output = ssh_client.run_command(command, use_sudo=True) + output = ssh_client.run_command(command) log.info("Enable FIPS completed\n%s", output) except CommandError as e: raise Exception(f"Failed to enable FIPS: {e}") @@ -53,34 +49,25 @@ def run(self): vm.restart(wait_for_boot=True, ssh_client=ssh_client) try: - command = "fips-mode-setup --check" + command = "fips-check_fips_mariner" log.info("Verifying that FIPS is enabled [%s]", command) output = ssh_client.run_command(command).rstrip() if output != "FIPS mode is enabled.": - fail(f"FIPS i not enabled - '{command}' returned '{output}'") + fail(f"FIPS is not enabled - '{command}' returned '{output}'") log.info(output) except CommandError as e: raise Exception(f"Failed to verify that FIPS is enabled: {e}") + # Execute an extension with protected settings to ensure the tenant certificate can be decrypted under FIPS custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - log.info("Installing %s", custom_script) message = f"Hello {uuid.uuid4()}!" custom_script.enable( - settings={ + protected_settings={ 'commandToExecute': f"echo \'{message}\'" - }, - auto_upgrade_minor_version=False + } ) - custom_script.assert_instance_view(expected_version="2.0", expected_message=message) - - def get_ignore_error_rules(self) -> List[Dict[str, Any]]: - """ - Some extensions added by policy on the test subscription use protected settings, which produce this error. - """ - return [ - {'message': r'Failed to decrypt /var/lib/waagent/Certificates.p7m'} - ] + custom_script.assert_instance_view(expected_message=message) if __name__ == "__main__": diff --git a/tests_e2e/tests/scripts/fips-check_fips_mariner b/tests_e2e/tests/scripts/fips-check_fips_mariner new file mode 100755 index 0000000000..e5a7730be7 --- /dev/null +++ b/tests_e2e/tests/scripts/fips-check_fips_mariner @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Verifies whether FIPS is enabled on Mariner 2.0 +# + +set -euo pipefail + +# Check if FIPS mode is enabled by the kernel (returns 1 if enabled) +fips_enabled=$(sudo cat /proc/sys/crypto/fips_enabled) +if [ "$fips_enabled" != "1" ]; then + echo "FIPS is not enabled by the kernel: $fips_enabled" + exit 1 +fi + +# Check if sysctl is configured (returns crypto.fips_enabled = 1 if enabled) +sysctl_configured=$(sudo sysctl crypto.fips_enabled) +if [ "$sysctl_configured" != "crypto.fips_enabled = 1" ]; then + echo "sysctl is not configured for FIPS: $sysctl_configured" + exit 1 +fi + +# Check if openssl library is running in FIPS mode +# MD5 should fail; the command's output should be similar to: +# Error setting digest +# 131590634539840:error:060800C8:digital envelope routines:EVP_DigestInit_ex:disabled for FIPS:crypto/evp/digest.c:135: +openssl=$(openssl md5 < /dev/null 2>&1 || true) +if [[ "$openssl" != *"disabled for FIPS"* ]]; then + echo "openssl is not running in FIPS mode: $openssl" + exit 1 +fi + +# Check if dracut-fips is installed (returns dracut-fips-) +dracut_fips=$( (rpm -qa | grep dracut-fips) || true ) +if [[ "$dracut_fips" != *"dracut-fips"* ]]; then + echo "dracut-fips is not installed: $dracut_fips" + exit 1 +fi + +echo "FIPS mode is enabled." \ No newline at end of file diff --git a/tests_e2e/tests/scripts/fips-enable_fips_mariner b/tests_e2e/tests/scripts/fips-enable_fips_mariner new file mode 100755 index 0000000000..8259b8d6c2 --- /dev/null +++ b/tests_e2e/tests/scripts/fips-enable_fips_mariner @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Enables FIPS on Mariner 2.0 +# + +set -euo pipefail + +echo "Installing packages required packages to enable FIPS..." +sudo tdnf install -y grubby dracut-fips + +# +# Set boot_uuid variable for the boot partition if different from the root +# +boot_dev="$(df /boot/ | tail -1 | cut -d' ' -f1)" +echo "Boot partition: $boot_dev" + +root_dev="$(df / | tail -1 | cut -d' ' -f1)" +echo "Root partition: $root_dev" + +boot_uuid="" +if [ "$boot_dev" != "$root_dev" ]; then + boot_uuid="boot=UUID=$(blkid $boot_dev -s UUID -o value)" + echo "Boot UUID: $boot_uuid" +fi + +# +# Enable FIPS and set boot= parameter +# +echo "Enabling FIPS..." +if sudo grub2-editenv - list | grep -q kernelopts; then + set -x + sudo grub2-editenv - set "$(sudo grub2-editenv - list | grep kernelopts) fips=1 $boot_uuid" +else + set -x + sudo grubby --update-kernel=ALL --args="fips=1 $boot_uuid" +fi \ No newline at end of file From 0a159d60c96b680969f5a5ec466a22f61d0254cc Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 14 Sep 2023 15:48:02 -0700 Subject: [PATCH 075/240] Change pipeline timeout to 90 minutes (#2925) --- tests_e2e/pipeline/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 6940f2a7d2..9dc1062751 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -62,6 +62,7 @@ pool: jobs: - job: "ExecuteTests" + timeoutInMinutes: 90 steps: - task: UsePythonVersion@0 @@ -107,7 +108,6 @@ jobs: - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/execute_tests.sh displayName: "Execute tests" continueOnError: true - timeoutInMinutes: 90 env: SUBSCRIPTION_ID: $(SUBSCRIPTION-ID) AZURE_CLIENT_ID: $(AZURE-CLIENT-ID) From 2bbb57ac68ed25c37fc170e835b7a6253284ae7c Mon Sep 17 00:00:00 2001 From: Zheyu Shen Date: Tue, 19 Sep 2023 00:53:52 +0800 Subject: [PATCH 076/240] fix version checking (#2920) Co-authored-by: Norberto Arrieta --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8f5d92b42e..6b54d09e76 100755 --- a/setup.py +++ b/setup.py @@ -319,7 +319,7 @@ def run(self): # implementation may be broken prior to Python 3.7 wher the functionality # will be removed from Python 3 requires = [] # pylint: disable=invalid-name -if float(sys.version[:3]) >= 3.7: +if sys.version_info[0] >= 3 and sys.version_info[1] >= 7: requires = ['distro'] # pylint: disable=invalid-name modules = [] # pylint: disable=invalid-name From 19b970b4dfdf8789dfa1cc97427f6cfb33f135c9 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 20 Sep 2023 15:02:16 -0700 Subject: [PATCH 077/240] mariner container image (#2926) * mariner container image * added packages repo * addressed comments * addressed comments --- tests_e2e/orchestrator/docker/Dockerfile | 30 ++++++++---------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index bbd460e6e7..33ef0b44bd 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -7,7 +7,7 @@ # # docker run --rm -it -v /home/nam/src/WALinuxAgent:/home/waagent/WALinuxAgent waagenttests bash --login # -FROM ubuntu:latest +FROM mcr.microsoft.com/cbl-mariner/base/core:2.0 LABEL description="Test environment for WALinuxAgent" SHELL ["/bin/bash", "-c"] @@ -18,24 +18,23 @@ SHELL ["/bin/bash", "-c"] USER root RUN \ - apt-get update && \ - \ + tdnf -y update && \ + # mariner packages can be found in this repository https://cvedashboard.azurewebsites.net/#/packages \ # \ # Install basic dependencies \ # \ - apt-get install -y git python3.10 python3.10-dev wget bzip2 && \ - ln /usr/bin/python3.10 /usr/bin/python3 && \ + tdnf -y install git python3 python3-devel wget bzip2 ca-certificates && \ \ # \ # Install LISA dependencies \ # \ - apt-get install -y git gcc libgirepository1.0-dev libcairo2-dev qemu-utils libvirt-dev \ - python3-pip python3-venv && \ + tdnf install -y git gcc gobject-introspection-devel cairo-devel pkg-config python3-devel libvirt-devel \ + cairo-gobject binutils kernel-headers glibc-devel python3-pip python3-virtualenv && \ \ # \ # Install test dependencies \ # \ - apt-get install -y zip && \ + tdnf -y install zip tar && \ \ # \ # Create user waagent, which is used to execute the tests \ @@ -46,17 +45,8 @@ RUN \ # \ # Install the Azure CLI \ # \ - apt-get install ca-certificates curl apt-transport-https lsb-release gnupg && \ - mkdir -p /etc/apt/keyrings && \ - curl -sLS https://packages.microsoft.com/keys/microsoft.asc \ - | gpg --dearmor \ - | tee /etc/apt/keyrings/microsoft.gpg > /dev/null && \ - chmod go+r /etc/apt/keyrings/microsoft.gpg && \ - AZ_REPO=$(lsb_release -cs) && \ - echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ $AZ_REPO main" \ - | tee /etc/apt/sources.list.d/azure-cli.list && \ - apt-get update && \ - apt-get install azure-cli && \ + tdnf -y install azure-cli && \ + tdnf clean all && \ : # @@ -77,7 +67,7 @@ RUN \ cd $HOME && \ git clone https://github.com/microsoft/lisa.git && \ cd lisa && \ - git checkout a030c5e6a0695db77dbf5bd52a45d07cbbf00087 && \ + git checkout 7396cbd6d9b31a99b13c184f735ce9827334f21b && \ \ python3 -m pip install --upgrade pip && \ python3 -m pip install --editable .[azure,libvirt] --config-settings editable_mode=compat && \ From c5181c05024e6bb03caccf877b82004db8b7ac09 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 2 Oct 2023 12:09:31 -0700 Subject: [PATCH 078/240] Fix for "local variable _COLLECT_NOEXEC_ERRORS referenced before assignment" (#2935) * Fix for "local variable _COLLECT_NOEXEC_ERRORS referenced before assignment" * pylint --------- Co-authored-by: narrieta --- azurelinuxagent/ga/extensionprocessutil.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azurelinuxagent/ga/extensionprocessutil.py b/azurelinuxagent/ga/extensionprocessutil.py index c24ebf1946..d2b37551ba 100644 --- a/azurelinuxagent/ga/extensionprocessutil.py +++ b/azurelinuxagent/ga/extensionprocessutil.py @@ -107,6 +107,10 @@ def _check_noexec(): """ Check if /var is mounted with the noexec flag. """ + # W0603: Using the global statement (global-statement) + # OK to disable; _COLLECT_NOEXEC_ERRORS is used only within _check_noexec, but needs to persist across calls. + global _COLLECT_NOEXEC_ERRORS # pylint: disable=W0603 + try: agent_dir = conf.get_lib_dir() with open('/proc/mounts', 'r') as f: From 791b8dbfc710de1d48d0582ed4b30425c4b26e0c Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 2 Oct 2023 17:19:40 -0700 Subject: [PATCH 079/240] fix agent manifest call frequency (#2923) (#2932) * fix agent manifest call frequency * new approach (cherry picked from commit 655403254331a7f7413c3d7448d83193daa08af3) --- azurelinuxagent/ga/agent_update_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 6c93e092ca..a8390c1c7d 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -87,7 +87,6 @@ def __update_last_attempt_update_times(self): else: self.update_state.last_attempted_normal_update_time = now self.update_state.last_attempted_hotfix_update_time = now - self.update_state.last_attempted_manifest_download_time = now def __should_agent_attempt_manifest_download(self): """ @@ -103,6 +102,7 @@ def __should_agent_attempt_manifest_download(self): if next_attempt_time > now: return False + self.update_state.last_attempted_manifest_download_time = now return True @staticmethod From ffa99edda1841d558d5e939d2d550a935f8c38a8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 3 Oct 2023 08:53:48 -0700 Subject: [PATCH 080/240] enable rhel/centos cgroups (#2922) --- azurelinuxagent/ga/cgroupapi.py | 3 ++- tests/ga/test_cgroupapi.py | 14 ++++++++------ tests_e2e/test_suites/images.yml | 4 ++++ tests_e2e/tests/agent_cgroups/agent_cgroups.py | 3 +++ 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/azurelinuxagent/ga/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py index 040333bdde..6f4bf4ab34 100644 --- a/azurelinuxagent/ga/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -59,7 +59,8 @@ def cgroups_supported(): distro_version = FlexibleVersion(distro_info[1]) except ValueError: return False - return distro_name.lower() == 'ubuntu' and distro_version.major >= 16 + return (distro_name.lower() == 'ubuntu' and distro_version.major >= 16) or \ + (distro_name.lower() in ('centos', 'redhat') and 8 <= distro_version.major < 9) @staticmethod def track_cgroups(extension_cgroups): diff --git a/tests/ga/test_cgroupapi.py b/tests/ga/test_cgroupapi.py index 6b15af1ebf..ad8ef80c2c 100644 --- a/tests/ga/test_cgroupapi.py +++ b/tests/ga/test_cgroupapi.py @@ -56,17 +56,19 @@ def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_an (['ubuntu', '18.10', 'cosmic'], True), (['ubuntu', '20.04', 'focal'], True), (['ubuntu', '20.10', 'groovy'], True), - (['centos', '7.8', 'Source'], False), - (['redhat', '7.8', 'Maipo'], False), - (['redhat', '7.9.1908', 'Core'], False), - (['centos', '8.1', 'Source'], False), - (['redhat', '8.2', 'Maipo'], False), - (['redhat', '8.2.2111', 'Core'], False), (['centos', '7.4', 'Source'], False), (['redhat', '7.4', 'Maipo'], False), (['centos', '7.5', 'Source'], False), (['centos', '7.3', 'Maipo'], False), (['redhat', '7.2', 'Maipo'], False), + (['centos', '7.8', 'Source'], False), + (['redhat', '7.8', 'Maipo'], False), + (['redhat', '7.9.1908', 'Core'], False), + (['centos', '8.1', 'Source'], True), + (['redhat', '8.2', 'Maipo'], True), + (['redhat', '8.2.2111', 'Core'], True), + (['redhat', '9.1', 'Core'], False), + (['centos', '9.1', 'Source'], False), (['bigip', '15.0.1', 'Final'], False), (['gaia', '273.562', 'R80.30'], False), (['debian', '9.1', ''], False), diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 433d0733d9..c254cccd71 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -12,6 +12,7 @@ image-sets: # - "alma_9" - "centos_79" + - "centos_82" - "debian_9" - "debian_10" - "debian_11" @@ -39,6 +40,8 @@ image-sets: # As of today agent only support and enabled resource governance feature on following distros cgroups-endorsed: + - "centos_82" + - "rhel_82" - "ubuntu_1604" - "ubuntu_1804" - "ubuntu_2004" @@ -78,6 +81,7 @@ images: AzureChinaCloud: [] centos_610: "OpenLogic CentOS 6.10 latest" centos_79: "OpenLogic CentOS 7_9 latest" + centos_82: "OpenLogic CentOS 8_2 latest" debian_8: "credativ Debian 8 latest" debian_9: "credativ Debian 9 latest" debian_10: "Debian debian-10 10 latest" diff --git a/tests_e2e/tests/agent_cgroups/agent_cgroups.py b/tests_e2e/tests/agent_cgroups/agent_cgroups.py index d976c0338e..c0394f6c62 100644 --- a/tests_e2e/tests/agent_cgroups/agent_cgroups.py +++ b/tests_e2e/tests/agent_cgroups/agent_cgroups.py @@ -31,6 +31,9 @@ def __init__(self, context: AgentTestContext): self._ssh_client = self._context.create_ssh_client() def run(self): + log.info("=====Prepare agent=====") + log.info("Restarting agent service to make sure service starts with new configuration that was setup by the cgroupconfigurator") + self._ssh_client.run_command("agent-service restart", use_sudo=True) log.info("=====Validating agent cgroups=====") self._run_remote_test("agent_cgroups-check_cgroups_agent.py") log.info("Successfully Verified that agent present in correct cgroups") From 8bfad4d7b1cbfb35a4ea8d1170248060bb4fdacd Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 5 Oct 2023 07:13:05 -0700 Subject: [PATCH 081/240] Add support for EC certificates (#2936) * Add support for EC certificates * pylint * pylint * typo --------- Co-authored-by: narrieta --- azurelinuxagent/common/event.py | 1 + azurelinuxagent/common/protocol/goal_state.py | 10 +- azurelinuxagent/common/utils/cryptutil.py | 19 +++- azurelinuxagent/ga/update.py | 27 ++++- tests/common/utils/test_crypt_util.py | 13 +++ tests/data/wire/ec-key.pem | 5 + tests/data/wire/ec-key.pub.pem | 4 + tests/data/wire/rsa-key.pem | 28 ++++++ tests/data/wire/rsa-key.pub.pem | 9 ++ .../test_suites/keyvault_certificates.yml | 9 ++ .../keyvault_certificates.py | 98 +++++++++++++++++++ tests_e2e/tests/lib/virtual_machine_client.py | 9 ++ 12 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 tests/data/wire/ec-key.pem create mode 100644 tests/data/wire/ec-key.pub.pem create mode 100644 tests/data/wire/rsa-key.pem create mode 100644 tests/data/wire/rsa-key.pub.pem create mode 100644 tests_e2e/test_suites/keyvault_certificates.yml create mode 100755 tests_e2e/tests/keyvault_certificates/keyvault_certificates.py diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 4679608067..95abf09edf 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -106,6 +106,7 @@ class WALAEventOperation: LogCollection = "LogCollection" NoExec = "NoExec" OSInfo = "OSInfo" + OpenSsl = "OpenSsl" Partition = "Partition" PersistFirewallRules = "PersistFirewallRules" PluginSettingsVersionMismatch = "PluginSettingsVersionMismatch" diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 267b01c585..1b4bcea829 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -579,8 +579,6 @@ def __init__(self, xml_text, my_logger): # The parsing process use public key to match prv and crt. buf = [] - begin_crt = False # pylint: disable=W0612 - begin_prv = False # pylint: disable=W0612 prvs = {} thumbprints = {} index = 0 @@ -588,17 +586,12 @@ def __init__(self, xml_text, my_logger): with open(pem_file) as pem: for line in pem.readlines(): buf.append(line) - if re.match(r'[-]+BEGIN.*KEY[-]+', line): - begin_prv = True - elif re.match(r'[-]+BEGIN.*CERTIFICATE[-]+', line): - begin_crt = True - elif re.match(r'[-]+END.*KEY[-]+', line): + if re.match(r'[-]+END.*KEY[-]+', line): tmp_file = Certificates._write_to_tmp_file(index, 'prv', buf) pub = cryptutil.get_pubkey_from_prv(tmp_file) prvs[pub] = tmp_file buf = [] index += 1 - begin_prv = False elif re.match(r'[-]+END.*CERTIFICATE[-]+', line): tmp_file = Certificates._write_to_tmp_file(index, 'crt', buf) pub = cryptutil.get_pubkey_from_crt(tmp_file) @@ -613,7 +606,6 @@ def __init__(self, xml_text, my_logger): os.rename(tmp_file, os.path.join(conf.get_lib_dir(), crt)) buf = [] index += 1 - begin_crt = False # Rename prv key with thumbprint as the file name for pubkey in prvs: diff --git a/azurelinuxagent/common/utils/cryptutil.py b/azurelinuxagent/common/utils/cryptutil.py index 5514cb5052..b7c9422747 100644 --- a/azurelinuxagent/common/utils/cryptutil.py +++ b/azurelinuxagent/common/utils/cryptutil.py @@ -53,10 +53,21 @@ def gen_transport_cert(self, prv_file, crt_file): def get_pubkey_from_prv(self, file_name): if not os.path.exists(file_name): raise IOError(errno.ENOENT, "File not found", file_name) - else: - cmd = [self.openssl_cmd, "rsa", "-in", file_name, "-pubout"] - pub = shellutil.run_command(cmd, log_error=True) - return pub + + # OpenSSL's pkey command may not be available on older versions so try 'rsa' first. + try: + command = [self.openssl_cmd, "rsa", "-in", file_name, "-pubout"] + return shellutil.run_command(command, log_error=False) + except shellutil.CommandError as error: + if not ("Not an RSA key" in error.stderr or "expecting an rsa key" in error.stderr): + logger.error( + "Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]", + " ".join(command), + error.returncode, + error.stdout, + error.stderr) + raise + return shellutil.run_command([self.openssl_cmd, "pkey", "-in", file_name, "-pubout"], log_error=True) def get_pubkey_from_crt(self, file_name): if not os.path.exists(file_name): diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 8065194d7b..cd37a32ee7 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -316,7 +316,8 @@ def run(self, debug=False): logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ - u"OSUtil: {util_name}; AgentService: {service_name}; "\ + u"OSUtil: {util_name}; "\ + u"AgentService: {service_name}; "\ u"Python: {py_major}.{py_minor}.{py_micro}; "\ u"systemd: {systemd}; "\ u"LISDrivers: {lis_ver}; "\ @@ -343,6 +344,7 @@ def run(self, debug=False): # Send telemetry for the OS-specific info. add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) + self._log_openssl_info() # # Perform initialization tasks @@ -409,6 +411,29 @@ def run(self, debug=False): self._shutdown() sys.exit(0) + @staticmethod + def _log_openssl_info(): + try: + version = shellutil.run_command(["openssl", "version"]) + message = "OpenSSL version: {0}".format(version) + logger.info(message) + add_event(op=WALAEventOperation.OpenSsl, message=message, is_success=True) + except Exception as e: + message = "Failed to get OpenSSL version: {0}".format(e) + logger.info(message) + add_event(op=WALAEventOperation.OpenSsl, message=message, is_success=False, log_event=False) + # + # Collect telemetry about the 'pkey' command. CryptUtil get_pubkey_from_prv() uses the 'pkey' command only as a fallback after trying 'rsa'. + # 'pkey' also works for RSA keys, but it may not be available on older versions of OpenSSL. Check telemetry after a few releases and if there + # are no versions of OpenSSL that do not support 'pkey' consider removing the use of 'rsa' altogether. + # + try: + shellutil.run_command(["openssl", "help", "pkey"]) + except Exception as e: + message = "OpenSSL does not support the pkey command: {0}".format(e) + logger.info(message) + add_event(op=WALAEventOperation.OpenSsl, message=message, is_success=False, log_event=False) + def _initialize_goal_state(self, protocol): # # Block until we can fetch the first goal state (self._try_update_goal_state() does its own logging and error handling). diff --git a/tests/common/utils/test_crypt_util.py b/tests/common/utils/test_crypt_util.py index c724c246c8..4bd3429768 100644 --- a/tests/common/utils/test_crypt_util.py +++ b/tests/common/utils/test_crypt_util.py @@ -67,6 +67,19 @@ def test_get_pubkey_from_crt(self): with open(expected_pub_key) as fh: self.assertEqual(fh.read(), crypto.get_pubkey_from_prv(prv_key)) + def test_get_pubkey_from_prv(self): + crypto = CryptUtil(conf.get_openssl_cmd()) + + def do_test(prv_key, expected_pub_key): + prv_key = os.path.join(data_dir, "wire", prv_key) + expected_pub_key = os.path.join(data_dir, "wire", expected_pub_key) + + with open(expected_pub_key) as fh: + self.assertEqual(fh.read(), crypto.get_pubkey_from_prv(prv_key)) + + do_test("rsa-key.pem", "rsa-key.pub.pem") + do_test("ec-key.pem", "ec-key.pub.pem") + def test_get_pubkey_from_crt_invalid_file(self): crypto = CryptUtil(conf.get_openssl_cmd()) prv_key = os.path.join(data_dir, "wire", "trans_prv_does_not_exist") diff --git a/tests/data/wire/ec-key.pem b/tests/data/wire/ec-key.pem new file mode 100644 index 0000000000..d157a12bbf --- /dev/null +++ b/tests/data/wire/ec-key.pem @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIEydYXZkSbZjdKaNEurW6x2W3dEOC5+yDxM/Wkq1m6lUoAoGCCqGSM49 +AwEHoUQDQgAE8H1M+73QdzCyIDToTyU7OTMfi9cnIt8B4sz7e127ydNBVWjDwgGV +bKXPNtuQSWNgkfGW8A3tf9S8VcKNFxXaZg== +-----END EC PRIVATE KEY----- diff --git a/tests/data/wire/ec-key.pub.pem b/tests/data/wire/ec-key.pub.pem new file mode 100644 index 0000000000..e29d8fb0b7 --- /dev/null +++ b/tests/data/wire/ec-key.pub.pem @@ -0,0 +1,4 @@ +-----BEGIN PUBLIC KEY----- +MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE8H1M+73QdzCyIDToTyU7OTMfi9cn +It8B4sz7e127ydNBVWjDwgGVbKXPNtuQSWNgkfGW8A3tf9S8VcKNFxXaZg== +-----END PUBLIC KEY----- diff --git a/tests/data/wire/rsa-key.pem b/tests/data/wire/rsa-key.pem new file mode 100644 index 0000000000..d59f8391bc --- /dev/null +++ b/tests/data/wire/rsa-key.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDe7cwx76yO+OjR +hWHJrKt0L1ih9F/Bctyq7Ddi/v3CitVBvkQUve4k+xeT538mHyeoOuGI3QFs5mLh +i535zbOFaHwfMMQI/CI4ZDtRrQh59XrJSsPytu0fXihsJ81IwNURuNDKwxYR0tKI +KUuUN4YxsDSBeqvP5vjSKT05f90gniscuGvPJ6Zgyynmg56KQtSXKaetbyNzPW/4 +QFmadyqsgdR7oZHEYj+1Tl6T9/tAPg/dgO55hT7WVdC8JxXeSiaDyRS1NRMFL0bC +fcnLNsO4tni2WJsfuju9a4GTrWe3NQ3+vsQV5s59MtuOhoObuYNVcETYiEjBVVsf ++shxRxL/AgMBAAECggEAfslt/eSbFoFYIHmkoQe0R5L57LpIj4QdHpTT91igyDkf +ipGEtOtEewHXagYaWXsUmehLBwTy35W0HSTDxyQHetNu7GpWw+lqKPpQhmZL0Nkd +aUg9Y1hISjPJ96E3bq5FQBwFm5wSfDaUCF68HmLpzm6xngY/mzF4yEYuDPq8r+RV +SDhVtrovSImpwLbKmPdn634PqC6bPDgO5htkT/lL/TVkR3Sla3U/YYMu90m7DiAA +46DEblx0yt+zBB+mKR3TU4zIPSFiTWYs/Srsm6nUnNqjf5rvupvXFZt0/eDZat7/ +L+/V5HPV0BxGIkCGt0Uv+qZYMGpC3eU+aEbByOr/wQKBgQDy+l4Rvgl0i+XzUPyw +N6UrDDpxBVsZ/w48DrBEBMQqTbZxVDK77E2CeMK/JlYMFYFdIT/c9W0U7eWPqe35 +kk9jVsPXc3xeoSiZvqK4CZeHEugE9OtJ4jJL1CfDXMcgPM+iSSj/QOJc5v7891QH +3gMOvmVk3Kk/I2MyBAEE6p6WHwKBgQDq4FvO77tsIZRkgmp3gPg4iImcTgwrgDxz +aHqlSVc98o4jzWsUShbZTwRgfcZm+kD3eas+gkux8CevYhwjafWiukrnwu3xvUaO +AKmgXU7ud/kS9bK/AT6ZpJsfoZzM/CQsConFbz0eXVb/tmipCBpyzi2yskLdk6SP +pEZYISknIQKBgHwE9PzjXdoiChYekUu0q1aEoFPN4wkq2W4oJSoisKnTDrtbuaWX +4Jwm3WhJvgPe+i+55+n1T18uakzg9Hm9h03yHHYdGS8H3TxURKPhKXmlWc4l4O7O +SNPRjxY1heHbiDOSWh2nVaMLuL0P1NFLLY5Z+lD4HF8AxgHib06+HoILAoGBALvg +oa+jNhGlvrSzWYSkJmnaVfEwwS1e03whe9GRG/cSeb6Lx3agWSyUt1ST50tiLOuI +aIGE6hW4m5X/7bAqRvFXASnoVDtFgxV91DHR0ZyRXSxcWxHMZg2yjN89gFa77hdI +irHibEpIsZm0iH2FXNqusAE79J6XRlAcQKSoSenhAoGARAP9q1WaftXdK4X7L1Ut +wnWJSVYMx6AsEo58SsJgNGqpbCl/vZMCwnSo6pdgO4xInu2tld3TKdPWZLoRCGCo +PDYVM1GXj5SS8QPmq+h/6fxS65Gl0h0oHUcKXoPD+AxHn2MWWqWzxMdRuthUQATE +MT+l5wgZPiEuiceY3Bp1hYk= +-----END PRIVATE KEY----- diff --git a/tests/data/wire/rsa-key.pub.pem b/tests/data/wire/rsa-key.pub.pem new file mode 100644 index 0000000000..940785f403 --- /dev/null +++ b/tests/data/wire/rsa-key.pub.pem @@ -0,0 +1,9 @@ +-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA3u3MMe+sjvjo0YVhyayr +dC9YofRfwXLcquw3Yv79worVQb5EFL3uJPsXk+d/Jh8nqDrhiN0BbOZi4Yud+c2z +hWh8HzDECPwiOGQ7Ua0IefV6yUrD8rbtH14obCfNSMDVEbjQysMWEdLSiClLlDeG +MbA0gXqrz+b40ik9OX/dIJ4rHLhrzyemYMsp5oOeikLUlymnrW8jcz1v+EBZmncq +rIHUe6GRxGI/tU5ek/f7QD4P3YDueYU+1lXQvCcV3komg8kUtTUTBS9Gwn3JyzbD +uLZ4tlibH7o7vWuBk61ntzUN/r7EFebOfTLbjoaDm7mDVXBE2IhIwVVbH/rIcUcS +/wIDAQAB +-----END PUBLIC KEY----- diff --git a/tests_e2e/test_suites/keyvault_certificates.yml b/tests_e2e/test_suites/keyvault_certificates.yml new file mode 100644 index 0000000000..00c51db7d2 --- /dev/null +++ b/tests_e2e/test_suites/keyvault_certificates.yml @@ -0,0 +1,9 @@ +# +# This test verifies that the Agent can download and extract KeyVault certificates that use different encryption algorithms +# +name: "KeyvaultCertificates" +tests: + - "keyvault_certificates/keyvault_certificates.py" +images: + - "endorsed" + - "endorsed-arm64" diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py new file mode 100755 index 0000000000..676d7ed249 --- /dev/null +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test verifies that the Agent can download and extract KeyVault certificates that use different encryption algorithms (currently EC and RSA). +# +from assertpy import fail + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient + + +class KeyvaultCertificates(AgentTest): + def run(self): + test_certificates = { + 'C49A06B3044BD1778081366929B53EBF154133B3': { + 'AzureCloud': 'https://waagenttests.vault.azure.net/secrets/ec-cert/39862f0c6dff4b35bc8a83a5770c2102', + 'AzureChinaCloud': 'https://waagenttests.vault.azure.cn/secrets/ec-cert/bb610217ef70412bb3b3c8d7a7fabfdc', + 'AzureUSGovernment': 'https://waagenttests.vault.usgovcloudapi.net/secrets/ec-cert/9c20ef55c7074a468f04a168b3488933' + }, + '2F846E657258E50C7011E1F68EA9AD129BA4AB31': { + 'AzureCloud': 'https://waagenttests.vault.azure.net/secrets/rsa-cert/0b5eac1e66fb457bb3c3419fce17e705', + 'AzureChinaCloud': 'https://waagenttests.vault.azure.cn/secrets/rsa-cert/98679243f8d6493e95281a852d8cee00', + 'AzureUSGovernment': 'https://waagenttests.vault.usgovcloudapi.net/secrets/rsa-cert/463a8a6be3b3436d85d3d4e406621c9e' + } + } + thumbprints = test_certificates.keys() + certificate_urls = [u[self._context.vm.cloud] for u in test_certificates.values()] + + # The test certificates should be downloaded to these locations + expected_certificates = " ".join([f"/var/lib/waagent/{t}.{{crt,prv}}" for t in thumbprints]) + + # The test may be running on a VM that has already been tested (e.g. while debugging the test), so we need to delete any existing test certificates first + # (note that rm -f does not fail if the given files do not exist) + ssh_client: SshClient = self._context.create_ssh_client() + log.info("Deleting any existing test certificates on the test VM.") + existing_certificates = ssh_client.run_command(f"rm -f -v {expected_certificates}", use_sudo=True) + if existing_certificates == "": + log.info("No existing test certificates were found on the test VM.") + else: + log.info("Some test certificates had already been downloaded to the test VM (they have been deleted now):\n%s", existing_certificates) + + vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + + osprofile = { + "location": self._context.vm.location, + "properties": { + "osProfile": { + "secrets": [ + { + "sourceVault": { + "id": f"/subscriptions/{self._context.vm.subscription}/resourceGroups/waagent-tests/providers/Microsoft.KeyVault/vaults/waagenttests" + }, + "vaultCertificates": [{"certificateUrl": url} for url in certificate_urls] + } + ], + } + } + } + log.info("updating the vm's osProfile with the certificates to download:\n%s", osprofile) + vm.update(osprofile) + + # If the test has already run on the VM, force a new goal state to ensure the certificates are downloaded since the VM model most likely already had the certificates + # and the update operation would not have triggered a goal state + if existing_certificates != "": + log.info("Reapplying the goal state to ensure the test certificates are downloaded.") + vm.reapply() + + try: + output = ssh_client.run_command(f"ls {expected_certificates}", use_sudo=True) + log.info("Found all the expected certificates:\n%s", output) + except CommandError as error: + if error.stdout != "": + log.info("Found some of the expected certificates:\n%s", error.stdout) + fail(f"Failed to find certificates\n{error.stderr}") + + +if __name__ == "__main__": + KeyvaultCertificates.run_from_command_line() diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index 38d35aee52..dd739fe535 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -108,6 +108,15 @@ def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT operation_name=f"Update {self._identifier}", timeout=timeout) + def reapply(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + """ + Reapplies the goal state on the virtual machine + """ + self._execute_async_operation( + lambda: self._compute_client.virtual_machines.begin_reapply(self._identifier.resource_group, self._identifier.name), + operation_name=f"Reapply {self._identifier}", + timeout=timeout) + def restart( self, wait_for_boot, From 6dd91e50aee04c19ba9f5effdf7f5dce7c6fb389 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Sun, 8 Oct 2023 11:16:12 -0700 Subject: [PATCH 082/240] Add Cpu Arch in local logs and telemetry events (#2938) * Add cpu arch to telem and local logs * Change get_vm_arch to static method * update unit tests * Remove e2e pipeline file * Remove arch from heartbeat * Move get_vm_arch to osutil * fix syntax issue * Fix unit test --- azurelinuxagent/common/event.py | 7 +++++-- azurelinuxagent/common/osutil/default.py | 8 ++++++++ azurelinuxagent/ga/update.py | 13 +++++++------ tests/common/test_event.py | 3 ++- tests/ga/test_send_telemetry_events.py | 4 ++-- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 95abf09edf..514c727fff 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -366,10 +366,14 @@ def __init__(self): # Parameters from OS osutil = get_osutil() + keyword_name = { + "CpuArchitecture": osutil.get_vm_arch() + } self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.OSVersion, EventLogger._get_os_version())) self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.ExecutionMode, AGENT_EXECUTION_MODE)) self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.RAM, int(EventLogger._get_ram(osutil)))) self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.Processors, int(EventLogger._get_processors(osutil)))) + self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.KeywordName, json.dumps(keyword_name))) # Parameters from goal state self._common_parameters.append(TelemetryEventParam(CommonTelemetryEventSchema.TenantName, "TenantName_UNINITIALIZED")) @@ -597,8 +601,7 @@ def add_common_event_parameters(self, event, event_timestamp): TelemetryEventParam(CommonTelemetryEventSchema.OpcodeName, event_timestamp.strftime(logger.Logger.LogTimeFormatInUTC)), TelemetryEventParam(CommonTelemetryEventSchema.EventTid, threading.current_thread().ident), TelemetryEventParam(CommonTelemetryEventSchema.EventPid, os.getpid()), - TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName()), - TelemetryEventParam(CommonTelemetryEventSchema.KeywordName, '')] + TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName())] if event.eventId == TELEMETRY_EVENT_EVENT_ID and event.providerId == TELEMETRY_EVENT_PROVIDER_ID: # Currently only the GuestAgentExtensionEvents has these columns, the other tables dont have them so skipping diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 6430f83ec8..69e20bea7a 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -149,6 +149,14 @@ def get_systemd_unit_file_install_path(): def get_agent_bin_path(): return "/usr/sbin" + @staticmethod + def get_vm_arch(): + try: + return platform.machine() + except Exception as e: + logger.warn("Unable to determine cpu architecture: {0}", ustr(e)) + return "unknown" + def get_firewall_dropped_packets(self, dst_ip=None): # If a previous attempt failed, do not retry global _enable_firewall # pylint: disable=W0603 diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index cd37a32ee7..147402709c 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -315,10 +315,14 @@ def run(self, debug=False): logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) + vm_arch = self.osutil.get_vm_arch() + logger.info("CPU Arch: {0}", vm_arch) + os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ u"OSUtil: {util_name}; "\ u"AgentService: {service_name}; "\ u"Python: {py_major}.{py_minor}.{py_micro}; "\ + u"Arch: {vm_arch}; "\ u"systemd: {systemd}; "\ u"LISDrivers: {lis_ver}; "\ u"logrotate: {has_logrotate};".format( @@ -326,7 +330,7 @@ def run(self, debug=False): util_name=type(self.osutil).__name__, service_name=self.osutil.service_name, py_major=PY_VERSION_MAJOR, py_minor=PY_VERSION_MINOR, - py_micro=PY_VERSION_MICRO, systemd=systemd.is_systemd(), + py_micro=PY_VERSION_MICRO, vm_arch=vm_arch, systemd=systemd.is_systemd(), lis_ver=get_lis_version(), has_logrotate=has_logrotate() ) logger.info(os_info_msg) @@ -1013,13 +1017,10 @@ def _send_heartbeat_telemetry(self, protocol): if datetime.utcnow() >= (self._last_telemetry_heartbeat + UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD): dropped_packets = self.osutil.get_firewall_dropped_packets(protocol.get_endpoint()) auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 - # Include vm architecture in the heartbeat message because the kusto table does not have - # a separate column for it. - vmarch = self._get_vm_arch() - telemetry_msg = "{0};{1};{2};{3};{4};{5}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, + telemetry_msg = "{0};{1};{2};{3};{4}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, - auto_update_enabled, vmarch) + auto_update_enabled) debug_log_msg = "[DEBUG HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ "UpdateGSErrors: {3};AutoUpdate: {4}]".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, diff --git a/tests/common/test_event.py b/tests/common/test_event.py index 28f2e3860d..435ac2e80d 100644 --- a/tests/common/test_event.py +++ b/tests/common/test_event.py @@ -20,6 +20,7 @@ import json import os +import platform import re import shutil import threading @@ -70,7 +71,7 @@ def setUp(self): CommonTelemetryEventSchema.EventTid: threading.current_thread().ident, CommonTelemetryEventSchema.EventPid: os.getpid(), CommonTelemetryEventSchema.TaskName: threading.current_thread().getName(), - CommonTelemetryEventSchema.KeywordName: '', + CommonTelemetryEventSchema.KeywordName: json.dumps({"CpuArchitecture": platform.machine()}), # common parameters computed from the OS platform CommonTelemetryEventSchema.OSVersion: EventLoggerTools.get_expected_os_version(), CommonTelemetryEventSchema.ExecutionMode: AGENT_EXECUTION_MODE, diff --git a/tests/ga/test_send_telemetry_events.py b/tests/ga/test_send_telemetry_events.py index c9e04a38ca..a9c87dde9a 100644 --- a/tests/ga/test_send_telemetry_events.py +++ b/tests/ga/test_send_telemetry_events.py @@ -368,13 +368,13 @@ def test_it_should_enqueue_and_send_events_properly(self, mock_lib_dir, *_): '' \ '' \ '' \ - '' \ '' \ '' \ '' \ '' \ '' \ '' \ + '' \ '' \ '' \ '' \ @@ -385,7 +385,7 @@ def test_it_should_enqueue_and_send_events_properly(self, mock_lib_dir, *_): '' \ ']]>'.format(AGENT_VERSION, TestSendTelemetryEventsHandler._TEST_EVENT_OPERATION, CURRENT_AGENT, test_opcodename, test_eventtid, test_eventpid, test_taskname, osversion, int(osutil.get_total_mem()), - osutil.get_processor_cores()).encode('utf-8') + osutil.get_processor_cores(), json.dumps({"CpuArchitecture": platform.machine()})).encode('utf-8') self.assertIn(sample_message, collected_event) From 5bad0b4b19c907386b80ec18ad1423cdb7f3a050 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 9 Oct 2023 11:14:30 -0700 Subject: [PATCH 083/240] skip cgorup monitor (#2939) --- azurelinuxagent/agent.py | 30 ++++++++++++++++++++++-------- azurelinuxagent/ga/collect_logs.py | 10 +++++----- azurelinuxagent/ga/logcollector.py | 15 +-------------- tests/ga/test_logcollector.py | 16 ++++++++-------- tests/test_agent.py | 8 ++++---- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index f565f2975f..2811e215ed 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -29,6 +29,7 @@ import sys import threading from azurelinuxagent.ga import logcollector, cgroupconfigurator +from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi import azurelinuxagent.common.conf as conf @@ -204,11 +205,10 @@ def collect_logs(self, is_full_mode): logger.info("Running log collector mode normal") # Check the cgroups unit - cpu_cgroup_path, memory_cgroup_path, log_collector_monitor = None, None, None - if CollectLogsHandler.should_validate_cgroups(): - cgroups_api = SystemdCgroupsApi() - cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self") - + log_collector_monitor = None + cgroups_api = SystemdCgroupsApi() + cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self") + if CollectLogsHandler.is_enabled_monitor_cgroups_check(): cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) @@ -221,10 +221,24 @@ def collect_logs(self, is_full_mode): sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) + def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path): + cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) + msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) + logger.info(msg) + cpu_cgroup.initialize_cpu_usage() + memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path) + msg = "Started tracking memory cgroup {0}".format(memory_cgroup) + logger.info(msg) + return [cpu_cgroup, memory_cgroup] + try: - log_collector = LogCollector(is_full_mode, cpu_cgroup_path, memory_cgroup_path) - log_collector_monitor = get_log_collector_monitor_handler(log_collector.cgroups) - log_collector_monitor.run() + log_collector = LogCollector(is_full_mode) + # Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector. + # If Log collector start by any other means, then it will not be monitored. + if CollectLogsHandler.is_enabled_monitor_cgroups_check(): + tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path) + log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups) + log_collector_monitor.run() archive = log_collector.collect_logs_and_get_archive() logger.info("Log collection successfully completed. Archive can be found at {0} " "and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH)) diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index 244d45b1e4..4987d865e9 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -83,16 +83,16 @@ def get_thread_name(): return CollectLogsHandler._THREAD_NAME @staticmethod - def enable_cgroups_validation(): + def enable_monitor_cgroups_check(): os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] = "1" @staticmethod - def disable_cgroups_validation(): + def disable_monitor_cgroups_check(): if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ: del os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] @staticmethod - def should_validate_cgroups(): + def is_enabled_monitor_cgroups_check(): if CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE in os.environ: return os.environ[CollectLogsHandler.__CGROUPS_FLAG_ENV_VARIABLE] == "1" return False @@ -147,7 +147,7 @@ def daemon(self): time.sleep(_INITIAL_LOG_COLLECTION_DELAY) try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() if self.protocol_util is None or self.protocol is None: self.init_protocols() @@ -162,7 +162,7 @@ def daemon(self): except Exception as e: logger.error("An error occurred in the log collection thread; will exit the thread.\n{0}", ustr(e)) finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() def collect_and_send_logs(self): if self._collect_logs(): diff --git a/azurelinuxagent/ga/logcollector.py b/azurelinuxagent/ga/logcollector.py index f2947e9373..393dd3c2ef 100644 --- a/azurelinuxagent/ga/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -26,7 +26,6 @@ from datetime import datetime from heapq import heappush, heappop -from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_LOG_COLLECTOR, MemoryCgroup from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters from azurelinuxagent.common.future import ustr @@ -71,14 +70,13 @@ class LogCollector(object): _TRUNCATED_FILE_PREFIX = "truncated_" - def __init__(self, is_full_mode=False, cpu_cgroup_path=None, memory_cgroup_path=None): + def __init__(self, is_full_mode=False): self._is_full_mode = is_full_mode self._manifest = MANIFEST_FULL if is_full_mode else MANIFEST_NORMAL self._must_collect_files = self._expand_must_collect_files() self._create_base_dirs() self._set_logger() self._initialize_telemetry() - self.cgroups = self._set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path) @staticmethod def _mkdir(dirname): @@ -105,17 +103,6 @@ def _set_logger(): _LOGGER.addHandler(_f_handler) _LOGGER.setLevel(logging.INFO) - @staticmethod - def _set_resource_usage_cgroups(cpu_cgroup_path, memory_cgroup_path): - cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) - msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) - _LOGGER.info(msg) - cpu_cgroup.initialize_cpu_usage() - memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path) - msg = "Started tracking memory cgroup {0}".format(memory_cgroup) - _LOGGER.info(msg) - return [cpu_cgroup, memory_cgroup] - @staticmethod def _initialize_telemetry(): protocol = get_protocol_util().get_protocol(init_goal_state=False) diff --git a/tests/ga/test_logcollector.py b/tests/ga/test_logcollector.py index 0fefedea23..cedf894b09 100644 --- a/tests/ga/test_logcollector.py +++ b/tests/ga/test_logcollector.py @@ -212,7 +212,7 @@ def test_log_collector_parses_commands_in_manifest(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() with open(self.output_results_file_path, "r") as fh: @@ -241,7 +241,7 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(is_full_mode=True, cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector(is_full_mode=True) archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -255,7 +255,7 @@ def test_log_collector_should_collect_all_files(self): # and combined they do not cross the archive size threshold. with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -277,7 +277,7 @@ def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_ # Set the size limit so that some files are too large to collect in full. with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -311,7 +311,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -362,7 +362,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() first_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) @@ -433,7 +433,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -455,7 +455,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): - log_collector = LogCollector(cpu_cgroup_path="dummy_cpu_path", memory_cgroup_path="dummy_memory_path") + log_collector = LogCollector() second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ diff --git a/tests/test_agent.py b/tests/test_agent.py index a2509ad851..414faa7266 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -233,7 +233,7 @@ def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): @patch("azurelinuxagent.agent.LogCollector") def test_calls_collect_logs_on_valid_cgroups(self, mock_log_collector): try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() def mock_cgroup_paths(*args, **kwargs): @@ -248,12 +248,12 @@ def mock_cgroup_paths(*args, **kwargs): mock_log_collector.assert_called_once() finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() @patch("azurelinuxagent.agent.LogCollector") def test_doesnt_call_collect_logs_on_invalid_cgroups(self, mock_log_collector): try: - CollectLogsHandler.enable_cgroups_validation() + CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() def mock_cgroup_paths(*args, **kwargs): @@ -272,7 +272,7 @@ def mock_cgroup_paths(*args, **kwargs): mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) self.assertEqual(exit_error, re) finally: - CollectLogsHandler.disable_cgroups_validation() + CollectLogsHandler.disable_monitor_cgroups_check() def test_it_should_parse_setup_firewall_properly(self): From 6e0e3f1fc192f96ab5550dce95edc45421e2daf2 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 9 Oct 2023 16:57:12 -0700 Subject: [PATCH 084/240] Clarify support status of installing from source. (#2941) Co-authored-by: narrieta --- README.md | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ae6a851064..4227d53592 100644 --- a/README.md +++ b/README.md @@ -84,11 +84,13 @@ Waagent depends on some system packages in order to function properly: ## Installation -Installation via your distribution's package repository is preferred. -You can also customize your own RPM or DEB packages using the configuration -samples provided (see deb and rpm sections below). +Installing via your distribution's package repository is the only method that is supported. -For more advanced installation options, such as installing to custom locations or prefixes, you can use **setuptools** to install from source by running: +You can install from source for more advanced options, such as installing to a custom location or creating +custom images. Installing from source, though, may override customizations done to the Agent by your +distribution, and is meant only for advanced users. We provide very limited support for this method. + +To install from source, you can use **setuptools**: ```bash sudo python setup.py install --register-service @@ -108,11 +110,18 @@ You can view more installation options by running: The agent's log file is kept at `/var/log/waagent.log`. +Lastly, you can also customize your own RPM or DEB packages using the configuration +samples provided in the deb and rpm sections below. This method is also meant for advanced users and we +provide very limited support for it. + + ## Upgrade -Upgrading via your distribution's package repository is strongly preferred. +Upgrading via your distribution's package repository or using automatic updates are the only supported +methods. More information can be found here: [Update Linux Agent](https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/update-linux-agent) -If upgrading manually, same with installation above by running: +To upgrade the Agent from source, you can use **setuptools**. Upgrading from source is meant for advanced +users and we provide very limited support for it. ```bash sudo python setup.py install --force From 47ea3b36f13e12680397ee82e7334f032c399e44 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 20 Oct 2023 14:41:52 -0700 Subject: [PATCH 085/240] agent cpu quota scenario (#2937) * agent_cpu_quota scenario * addressed comments * addressed comments --- tests_e2e/test_suites/agent_cgroups.yml | 3 +- tests_e2e/test_suites/images.yml | 9 +- .../tests/agent_cgroups/agent_cpu_quota.py | 39 ++++ tests_e2e/tests/lib/cgroup_helpers.py | 1 + .../agent_cpu_quota-check_agent_cpu_quota.py | 213 ++++++++++++++++++ .../scripts/agent_cpu_quota-start_service.py | 96 ++++++++ 6 files changed, 359 insertions(+), 2 deletions(-) create mode 100644 tests_e2e/tests/agent_cgroups/agent_cpu_quota.py create mode 100755 tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py create mode 100755 tests_e2e/tests/scripts/agent_cpu_quota-start_service.py diff --git a/tests_e2e/test_suites/agent_cgroups.yml b/tests_e2e/test_suites/agent_cgroups.yml index 239f37e32a..32a290c9ef 100644 --- a/tests_e2e/test_suites/agent_cgroups.yml +++ b/tests_e2e/test_suites/agent_cgroups.yml @@ -1,7 +1,8 @@ # -# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics. +# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics. Also, it verifies the agent cpu quota is set as expected. # name: "AgentCgroups" tests: - "agent_cgroups/agent_cgroups.py" + - "agent_cgroups/agent_cpu_quota.py" images: "cgroups-endorsed" \ No newline at end of file diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index c254cccd71..02392b375b 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -81,7 +81,11 @@ images: AzureChinaCloud: [] centos_610: "OpenLogic CentOS 6.10 latest" centos_79: "OpenLogic CentOS 7_9 latest" - centos_82: "OpenLogic CentOS 8_2 latest" + centos_82: + urn: "OpenLogic CentOS 8_2 latest" + vm_sizes: + # Since centos derived from redhat, please see the comment for vm size in rhel_82 + - "Standard_B2s" debian_8: "credativ Debian 8 latest" debian_9: "credativ Debian 9 latest" debian_10: "Debian debian-10 10 latest" @@ -126,6 +130,9 @@ images: urn: "RedHat RHEL 8.2 latest" locations: AzureChinaCloud: [] + vm_sizes: + # Previously one user reported agent hang on this VM size for redhat 7+ but not observed in rhel 8. So I'm using same vm size to test agent cgroups scenario for rhel 8 to make sure we don't see any issue in automation. + - "Standard_B2s" rhel_90: urn: "RedHat RHEL 9_0 latest" locations: diff --git a/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py b/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py new file mode 100644 index 0000000000..79f95fc547 --- /dev/null +++ b/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py @@ -0,0 +1,39 @@ +from typing import List, Dict, Any + +from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.logging import log + + +class AgentCPUQuota(AgentTest): + """ + The test verify that the agent detects when it is throttled for using too much CPU, that it detects processes that do belong to the agent's cgroup, and that resource metrics are generated. + """ + def __init__(self, context): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def run(self): + log.info("=====Validating agent cpu quota checks") + self._run_remote_test("agent_cpu_quota-check_agent_cpu_quota.py", use_sudo=True) + log.info("Successfully Verified that agent running in expected CPU quotas") + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # This is produced by the test, so it is expected + # Examples: + # 2023-10-03T17:59:03.007572Z INFO MonitorHandler ExtHandler [CGW] Disabling resource usage monitoring. Reason: Check on cgroups failed: + # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 3190] /usr/bin/python3\x00/home/azureuser/bin/agent_cpu_quota-start_servi', '[PID: 3293] dd\x00if=/dev/zero\x00of=/dev/null\x00'] + # [CGroupsException] The agent has been throttled for 5.7720997 seconds + {'message': r"Disabling resource usage monitoring. Reason: Check on cgroups failed"}, + # This may happen during service stop while terminating the process + # Example: + # 2022-03-11T21:11:11.713161Z ERROR E2ETest [Errno 3] No such process: + {'message': r'E2ETest.*No such process'}, + # 2022-10-26T15:38:39.655677Z ERROR E2ETest 'dd if=/dev/zero of=/dev/null' failed: -15 (): + {'message': r"E2ETest.*dd.*failed: -15"} + ] + return ignore_rules + + +if __name__ == "__main__": + AgentCPUQuota.run_from_command_line() diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index 7eb3a9b1f7..6da2865c21 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -133,6 +133,7 @@ def check_agent_quota_disabled(): Returns True if the cpu quota is infinity """ cpu_quota = get_agent_cpu_quota() + # the quota can be expressed as seconds (s) or milliseconds (ms); no quota is expressed as "infinity" return cpu_quota == 'infinity' diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py new file mode 100755 index 0000000000..63871b43a6 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py @@ -0,0 +1,213 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import os +import re +import shutil +import time + +from assertpy import fail + +from azurelinuxagent.common.osutil import systemd +from azurelinuxagent.common.utils import shellutil +from azurelinuxagent.ga.cgroupconfigurator import _DROP_IN_FILE_CPU_QUOTA +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, \ + get_agent_cpu_quota +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.remote_test import run_remote_test +from tests_e2e.tests.lib.retry import retry_if_false + + +def prepare_agent(): + # This function prepares the agent: + # 1) It modifies the service unit file to wrap the agent process with a script that starts the actual agent and then + # launches an instance of the dummy process to consume the CPU. Since all these processes are in the same cgroup, + # this has the same effect as the agent itself consuming the CPU. + # + # The process tree is similar to + # + # /usr/bin/python3 /home/azureuser/bin/agent_cpu_quota-start_service.py /usr/bin/python3 -u /usr/sbin/waagent -daemon + # ├─/usr/bin/python3 -u /usr/sbin/waagent -daemon + # │ └─python3 -u bin/WALinuxAgent-9.9.9.9-py3.8.egg -run-exthandlers + # │ └─4*[{python3}] + # ├─dd if=/dev/zero of=/dev/null + # │ + # └─{python3} + # + # And the agent's cgroup looks like + # + # CGroup: /azure.slice/walinuxagent.service + # ├─10507 /usr/bin/python3 /home/azureuser/bin/agent_cpu_quota-start_service.py /usr/bin/python3 -u /usr/sbin/waagent -daemon + # ├─10508 /usr/bin/python3 -u /usr/sbin/waagent -daemon + # ├─10516 python3 -u bin/WALinuxAgent-9.9.9.9-py3.8.egg -run-exthandlers + # ├─10711 dd if=/dev/zero of=/dev/null + # + # 2) It turns on a few debug flags and resart the agent + log.info("***Preparing agent for testing cpu quota") + # + # Create a drop in file to wrap "start-service.py" around the actual agent: This will ovveride the ExecStart line in the agent's unit file + # + # ExecStart= (need to be empty to clear the original ExecStart) + # ExecStart=/home/.../agent_cgroups-start-service.py /usr/bin/python3 -u /usr/sbin/waagent -daemon + # + service_file = systemd.get_agent_unit_file() + exec_start = None + with open(service_file, "r") as file_: + for line in file_: + match = re.match("ExecStart=(.+)", line) + if match is not None: + exec_start = match.group(1) + break + else: + file_.seek(0) + raise Exception("Could not find ExecStart in {0}\n:{1}".format(service_file, file_.read())) + agent_python = exec_start.split()[0] + current_directory = os.path.dirname(os.path.abspath(__file__)) + start_service_script = os.path.join(current_directory, "agent_cpu_quota-start_service.py") + drop_in_file = os.path.join(systemd.get_agent_drop_in_path(), "99-ExecStart.conf") + log.info("Creating %s...", drop_in_file) + with open(drop_in_file, "w") as file_: + file_.write(""" +[Service] +ExecStart= +ExecStart={0} {1} {2} +""".format(agent_python, start_service_script, exec_start)) + log.info("Executing daemon-reload") + shellutil.run_command(["systemctl", "daemon-reload"]) + + # Disable all checks on cgroups and enable log metrics every 20 sec + log.info("Executing script update-waagent-conf to enable agent cgroups config flag") + result = shellutil.run_command(["update-waagent-conf", "Debug.CgroupCheckPeriod=20", "Debug.CgroupLogMetrics=y", + "Debug.CgroupDisableOnProcessCheckFailure=n", "Debug.CgroupDisableOnQuotaCheckFailure=n"]) + log.info("Successfully enabled agent cgroups config flag: {0}".format(result)) + + +def verify_agent_reported_metrics(): + """ + This method verifies that the agent reports % Processor Time and Throttled Time metrics + """ + log.info("** Verifying agent reported metrics") + log.info("Parsing agent log for metrics") + processor_time = [] + throttled_time = [] + + def check_agent_log_for_metrics() -> bool: + for record in AgentLog().read(): + match = re.search(r"% Processor Time\s*\[walinuxagent.service\]\s*=\s*([0-9.]+)", record.message) + if match is not None: + processor_time.append(float(match.group(1))) + else: + match = re.search(r"Throttled Time\s*\[walinuxagent.service\]\s*=\s*([0-9.]+)", record.message) + if match is not None: + throttled_time.append(float(match.group(1))) + if len(processor_time) < 1 or len(throttled_time) < 1: + return False + return True + + found: bool = retry_if_false(check_agent_log_for_metrics) + if found: + log.info("%% Processor Time: %s", processor_time) + log.info("Throttled Time: %s", throttled_time) + log.info("Successfully verified agent reported resource metrics") + else: + fail( + "The agent doesn't seem to be collecting % Processor Time and Throttled Time metrics. Agent found Processor Time: {0}, Throttled Time: {1}".format( + processor_time, throttled_time)) + + +def wait_for_log_message(message, timeout=datetime.timedelta(minutes=5)): + log.info("Checking agent's log for message matching [%s]", message) + start_time = datetime.datetime.now() + while datetime.datetime.now() - start_time <= timeout: + for record in AgentLog().read(): + match = re.search(message, record.message, flags=re.DOTALL) + if match is not None: + log.info("Found message:\n\t%s", record.text.replace("\n", "\n\t")) + return + time.sleep(30) + fail("The agent did not find [{0}] in its log within the allowed timeout".format(message)) + + +def verify_process_check_on_agent_cgroups(): + """ + This method checks agent detect unexpected processes in its cgroup and disables the CPUQuota + """ + log.info("***Verifying process check on agent cgroups") + log.info("Ensuring agent CPUQuota is enabled and backup the drop-in file to restore later in further tests") + if check_agent_quota_disabled(): + fail("The agent's CPUQuota is not enabled: {0}".format(get_agent_cpu_quota())) + quota_drop_in = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA) + quota_drop_in_backup = quota_drop_in + ".bk" + log.info("Backing up %s to %s...", quota_drop_in, quota_drop_in_backup) + shutil.copy(quota_drop_in, quota_drop_in_backup) + # + # Re-enable Process checks on cgroups and verify that the agent detects unexpected processes in its cgroup and disables the CPUQuota wehen + # that happens + # + shellutil.run_command(["update-waagent-conf", "Debug.CgroupDisableOnProcessCheckFailure=y"]) + + # The log message indicating the check failed is similar to + # 2021-03-29T23:33:15.603530Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed: + # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s'] + wait_for_log_message( + "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes") + if not check_agent_quota_disabled(): + fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) + + +def verify_throttling_time_check_on_agent_cgroups(): + """ + This method checks agent disables its CPUQuota when it exceeds its throttling limit + """ + log.info("***Verifying CPU throttling check on agent cgroups") + # Now disable the check on unexpected processes and enable the check on throttledtime and verify that the agent disables its CPUQuota when it exceeds its throttling limit + log.info("Re-enabling CPUQuota...") + quota_drop_in = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA) + quota_drop_in_backup = quota_drop_in + ".bk" + log.info("Restoring %s from %s...", quota_drop_in, quota_drop_in_backup) + shutil.copy(quota_drop_in_backup, quota_drop_in) + shellutil.run_command(["systemctl", "daemon-reload"]) + shellutil.run_command(["update-waagent-conf", "Debug.CgroupDisableOnProcessCheckFailure=n", "Debug.CgroupDisableOnQuotaCheckFailure=y", "Debug.AgentCpuThrottledTimeThreshold=5"]) + + # The log message indicating the check failed is similar to + # 2021-04-01T20:47:55.892569Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed: + # [CGroupsException] The agent has been throttled for 121.339916938 seconds + # + # After we need to wait for a little longer for the agent to update systemd: + # 2021-04-14T01:51:44.399860Z INFO MonitorHandler ExtHandler Executing systemctl daemon-reload... + # + wait_for_log_message( + "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent has been throttled", + timeout=datetime.timedelta(minutes=10)) + wait_for_log_message("Stopped tracking cgroup walinuxagent.service", timeout=datetime.timedelta(minutes=10)) + wait_for_log_message("Executing systemctl daemon-reload...", timeout=datetime.timedelta(minutes=5)) + if not check_agent_quota_disabled(): + fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) + + +def main(): + prepare_agent() + verify_agent_reported_metrics() + verify_process_check_on_agent_cgroups() + verify_throttling_time_check_on_agent_cgroups() + + +run_remote_test(main) diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py new file mode 100755 index 0000000000..ba0f5abb23 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py @@ -0,0 +1,96 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script starts the actual agent and then launches an instance of the dummy process periodically to consume the CPU +# +import signal +import subprocess +import sys +import threading +import time +import traceback + +from azurelinuxagent.common import logger + + +class CpuConsumer(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self._stopped = False + + def run(self): + threading.current_thread().setName("*Stress*") + + while not self._stopped: + try: + # Dummy operation(reads empty streams and drops) which creates load on the CPU + dd_command = ["dd", "if=/dev/zero", "of=/dev/null"] + logger.info("Starting dummy dd command: {0} to stress CPU", ' '.join(dd_command)) + subprocess.Popen(dd_command) + logger.info("dd command completed; sleeping...") + i = 0 + while i < 30 and not self._stopped: + time.sleep(1) + i += 1 + except Exception as exception: + logger.error("{0}:\n{1}", exception, traceback.format_exc()) + + def stop(self): + self._stopped = True + + +try: + threading.current_thread().setName("*StartService*") + logger.set_prefix("E2ETest") + logger.add_logger_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, "/var/log/waagent.log") + + agent_command_line = sys.argv[1:] + + logger.info("Starting Agent: {0}", ' '.join(agent_command_line)) + agent_process = subprocess.Popen(agent_command_line) + + # sleep a little to give the agent a chance to initialize + time.sleep(15) + + cpu_consumer = CpuConsumer() + cpu_consumer.start() + + + def forward_signal(signum, _): + if signum == signal.SIGTERM: + logger.info("Stopping stress thread...") + cpu_consumer.stop() + logger.info("Forwarding signal {0} to Agent", signum) + agent_process.send_signal(signum) + + + signal.signal(signal.SIGTERM, forward_signal) + + agent_process.wait() + logger.info("Agent completed") + + cpu_consumer.stop() + cpu_consumer.join() + logger.info("Stress completed") + + logger.info("Exiting...") + sys.exit(agent_process.returncode) + +except Exception as exception: + logger.error("Unexpected error occurred while starting agent service : {0}", exception) + raise From c323eb481c475247e5c23739f0305dc72a80e4d5 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 23 Oct 2023 13:44:58 -0700 Subject: [PATCH 086/240] skip test version install (#2950) * skip test install * address comments * pylint * local run stuff * undo --- .../orchestrator/lib/agent_test_suite.py | 16 +++- tests_e2e/test_suites/agent_publish.yml | 3 +- .../tests/agent_publish/agent_publish.py | 7 +- tests_e2e/tests/lib/agent_test.py | 6 ++ ..._publish-get_agent_log_record_timestamp.py | 75 +++++++++++++++++++ 5 files changed, 103 insertions(+), 4 deletions(-) create mode 100755 tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 4a14b1f665..9a27ce395a 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -511,6 +511,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: summary: List[str] = [] ignore_error_rules: List[Dict[str, Any]] = [] + before_timestamp = datetime.datetime.min for test in suite.tests: test_full_name = f"{suite_name}-{test.name}" @@ -584,6 +585,14 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: ignore_error_rules.extend(test_instance.get_ignore_error_rules()) + # If the test has a timestamp before which errors should be ignored in the agent log, use that timestamp + # if multiple tests have this setting, use the earliest timestamp + if test_instance.get_ignore_errors_before_timestamp() != datetime.datetime.min: + if before_timestamp != datetime.datetime.min: + before_timestamp = min(before_timestamp, test_instance.get_ignore_errors_before_timestamp()) + else: + before_timestamp = test_instance.get_ignore_errors_before_timestamp() + if not test_success and test.blocks_suite: log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) break @@ -608,11 +617,11 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log(ignore_error_rules) + suite_success = suite_success and self._check_agent_log(ignore_error_rules, before_timestamp) return suite_success - def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]]) -> bool: + def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]], before_timestamp: datetime) -> bool: """ Checks the agent log for errors; returns true on success (no errors int the log) """ @@ -629,6 +638,9 @@ def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]]) -> bool: if len(ignore_error_rules) > 0: new = [] for e in errors: + # Ignore errors that occurred before the timestamp + if e.timestamp < before_timestamp: + continue if not AgentLog.matches_ignore_rule(e, ignore_error_rules): new.append(e) errors = new diff --git a/tests_e2e/test_suites/agent_publish.yml b/tests_e2e/test_suites/agent_publish.yml index 9b855f4ce4..3ab29c6a0b 100644 --- a/tests_e2e/test_suites/agent_publish.yml +++ b/tests_e2e/test_suites/agent_publish.yml @@ -8,4 +8,5 @@ images: - "random(endorsed, 10)" - "random(endorsed-arm64, 2)" locations: "AzureCloud:centraluseuap" -owns_vm: true \ No newline at end of file +owns_vm: true +install_test_agent: false \ No newline at end of file diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index eaddc74ede..91befd63b5 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -17,6 +17,7 @@ # limitations under the License. # import uuid +from datetime import datetime from typing import Any, Dict, List from tests_e2e.tests.lib.agent_test import AgentTest @@ -51,13 +52,17 @@ def run(self): self._get_agent_info() self._check_cse() + def get_ignore_errors_before_timestamp(self) -> datetime: + timestamp = self._ssh_client.run_command("agent_publish-get_agent_log_record_timestamp.py") + return datetime.strptime(timestamp.strip(), u'%Y-%m-%d %H:%M:%S.%f') + def _get_agent_info(self) -> None: stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info('Agent info \n%s', stdout) def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags") - self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test", use_sudo=True) + self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y", use_sudo=True) log.info('Updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index 2eac007afd..dcfc4db98e 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -20,6 +20,8 @@ import sys from abc import ABC, abstractmethod +from datetime import datetime + from assertpy import fail from typing import Any, Dict, List @@ -59,6 +61,10 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: # Tests can override this method to return a list with rules to ignore errors in the agent log (see agent_log.py for sample rules). return [] + def get_ignore_errors_before_timestamp(self) -> datetime: + # Ignore errors in the agent log before this timestamp + return datetime.min + @classmethod def run_from_command_line(cls): """ diff --git a/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py b/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py new file mode 100755 index 0000000000..d055fc6c25 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py @@ -0,0 +1,75 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +from datetime import datetime + +from tests_e2e.tests.lib.agent_log import AgentLog + +# pylint: disable=W0105 +""" +> WALinuxAgent-2.2.18 discovered WALinuxAgent-2.2.47 as an update and will exit +(None, 'WALinuxAgent-2.2.18', '2.2.47') +""" +_UPDATE_PATTERN_00 = re.compile(r'(.*Agent\s)?(\S*)\sdiscovered\sWALinuxAgent-(\S*)\sas an update and will exit') + +""" +> Agent WALinuxAgent-2.2.45 discovered update WALinuxAgent-2.2.47 -- exiting +('Agent', 'WALinuxAgent-2.2.45', '2.2.47') +""" +_UPDATE_PATTERN_01 = re.compile(r'(.*Agent)?\s(\S*) discovered update WALinuxAgent-(\S*) -- exiting') + +""" +> Normal Agent upgrade discovered, updating to WALinuxAgent-2.9.1.0 -- exiting +('Normal Agent', WALinuxAgent, '2.9.1.0 ') +""" +_UPDATE_PATTERN_02 = re.compile(r'(.*Agent) upgrade discovered, updating to (WALinuxAgent)-(\S*) -- exiting') + +""" +> Agent update found, exiting current process to downgrade to the new Agent version 1.3.0.0 +(Agent, 'downgrade', '1.3.0.0') +""" +_UPDATE_PATTERN_03 = re.compile( + r'(.*Agent) update found, exiting current process to (\S*) to the new Agent version (\S*)') + + +""" +This script return timestamp of update message in the agent log +""" + + +def main(): + try: + agentlog = AgentLog() + + for record in agentlog.read(): + + for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03]: + update_match = re.match(p, record.text) + if update_match: + return record.timestamp + + return datetime.min + except Exception as e: + raise Exception("Error thrown when searching for update pattern in agent log to get record timestamp: {0}".format(str(e))) + + +if __name__ == "__main__": + timestamp = main() + print(timestamp) From d638a3d6e42670a7822c7541c6c1e13a25a513e9 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 27 Oct 2023 13:22:22 -0700 Subject: [PATCH 087/240] Add support for VM Scale Sets to end-to-end tests (#2954) --------- Co-authored-by: narrieta --- test-requirements.txt | 1 + .../orchestrator/lib/agent_test_loader.py | 18 +- .../orchestrator/lib/agent_test_suite.py | 847 +++++++++++------- .../lib/agent_test_suite_combinator.py | 447 ++++++--- .../lib/update_arm_template_hook.py | 4 +- tests_e2e/orchestrator/runbook.yml | 199 ++-- .../sample_runbooks/existing_vm.yml | 149 --- tests_e2e/orchestrator/templates/vmss.json | 253 ++++++ tests_e2e/test_suites/vmss.yml | 8 + .../tests/agent_bvt/extension_operations.py | 11 +- tests_e2e/tests/agent_bvt/run_command.py | 6 +- tests_e2e/tests/agent_bvt/vm_access.py | 16 +- .../tests/agent_cgroups/agent_cgroups.py | 10 +- .../tests/agent_cgroups/agent_cpu_quota.py | 9 +- .../agent_ext_workflow/extension_workflow.py | 15 +- .../tests/agent_firewall/agent_firewall.py | 10 +- .../agent_not_provisioned.py | 10 +- .../disable_agent_provisioning.py | 7 +- .../tests/agent_publish/agent_publish.py | 14 +- tests_e2e/tests/agent_status/agent_status.py | 13 +- tests_e2e/tests/agent_update/rsm_update.py | 29 +- tests_e2e/tests/ext_cgroups/ext_cgroups.py | 10 +- .../tests/ext_cgroups/install_extensions.py | 8 +- .../ext_telemetry_pipeline.py | 12 +- .../extensions_disabled.py | 10 +- tests_e2e/tests/fips/fips.py | 10 +- .../keyvault_certificates.py | 11 +- .../tests/lib/add_network_security_group.py | 138 +-- tests_e2e/tests/lib/agent_test.py | 40 +- tests_e2e/tests/lib/agent_test_context.py | 186 ++-- .../{azure_client.py => azure_sdk_client.py} | 19 +- tests_e2e/tests/lib/logging.py | 15 + tests_e2e/tests/lib/resource_group_client.py | 74 ++ tests_e2e/tests/lib/ssh_client.py | 20 +- tests_e2e/tests/lib/update_arm_template.py | 5 +- tests_e2e/tests/lib/virtual_machine_client.py | 106 +-- .../lib/virtual_machine_extension_client.py | 26 +- .../lib/virtual_machine_scale_set_client.py | 107 +++ ...ntifiers.py => vm_extension_identifier.py} | 31 +- .../multi_config_ext/multi_config_ext.py | 16 +- .../check_fallback_to_hgap.py | 6 +- .../check_no_outbound_connections.py | 4 +- .../deny_outbound_connections.py | 5 +- tests_e2e/tests/samples/error_remote_test.py | 6 +- tests_e2e/tests/samples/error_test.py | 4 +- tests_e2e/tests/samples/fail_remote_test.py | 6 +- tests_e2e/tests/samples/fail_test.py | 4 +- tests_e2e/tests/samples/pass_remote_test.py | 6 +- tests_e2e/tests/samples/pass_test.py | 4 +- tests_e2e/tests/samples/vmss_test.py | 37 + 50 files changed, 1875 insertions(+), 1127 deletions(-) delete mode 100644 tests_e2e/orchestrator/sample_runbooks/existing_vm.yml create mode 100644 tests_e2e/orchestrator/templates/vmss.json create mode 100644 tests_e2e/test_suites/vmss.yml rename tests_e2e/tests/lib/{azure_client.py => azure_sdk_client.py} (67%) create mode 100644 tests_e2e/tests/lib/resource_group_client.py create mode 100644 tests_e2e/tests/lib/virtual_machine_scale_set_client.py rename tests_e2e/tests/lib/{identifiers.py => vm_extension_identifier.py} (79%) create mode 100755 tests_e2e/tests/samples/vmss_test.py diff --git a/test-requirements.txt b/test-requirements.txt index 89a2bb2c5d..2b9467870e 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -17,6 +17,7 @@ assertpy azure-core azure-identity azure-mgmt-compute>=22.1.0 +azure-mgmt-network>=19.3.0 azure-mgmt-resource>=15.0.0 msrestazure pytz diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index fbd6cfe8f8..a1ac6c2a46 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Type import tests_e2e -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentTest, AgentVmTest, AgentVmssTest class TestInfo(object): @@ -31,7 +31,7 @@ class TestInfo(object): Description of a test """ # The class that implements the test - test_class: Type[AgentTest] + test_class: Type[AgentVmTest] # If True, an error in the test blocks the execution of the test suite (defaults to False) blocks_suite: bool @@ -57,6 +57,8 @@ class TestSuiteInfo(object): locations: List[str] # Whether this suite must run on its own test VM owns_vm: bool + # If True, the suite must run on a scale set (instead of a single VM) + executes_on_scale_set: bool # Whether to install the test Agent on the test VM install_test_agent: bool # Customization for the ARM template used when creating the test VM @@ -222,6 +224,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: This is useful for suites that modify the test VMs in such a way that the setup may cause problems in other test suites (for example, some tests targeted to the HGAP block internet access in order to force the agent to use the HGAP). + * executes_on_scale_set - [Optional; boolean] True indicates that the test runs on a scale set. * install_test_agent - [Optional; boolean] By default the setup process installs the test Agent on the test VMs; set this property to False to skip the installation. * template - [Optional; string] If given, the ARM template for the test VM is customized using the given Python module. @@ -267,8 +270,13 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: test_suite_info.owns_vm = "owns_vm" in test_suite and test_suite["owns_vm"] test_suite_info.install_test_agent = "install_test_agent" not in test_suite or test_suite["install_test_agent"] + test_suite_info.executes_on_scale_set = "executes_on_scale_set" in test_suite and test_suite["executes_on_scale_set"] test_suite_info.template = test_suite.get("template", "") + # TODO: Add support for custom templates + if test_suite_info.executes_on_scale_set and test_suite_info.template != '': + raise Exception(f"Currently custom templates are not supported on scale sets. [Test suite: {test_suite_info.name}]") + skip_on_clouds = test_suite.get("skip_on_clouds") if skip_on_clouds is not None: if isinstance(skip_on_clouds, str): @@ -281,7 +289,7 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: return test_suite_info @staticmethod - def _load_test_class(relative_path: str) -> Type[AgentTest]: + def _load_test_class(relative_path: str) -> Type[AgentVmTest]: """ Loads an AgentTest from its source code file, which is given as a path relative to WALinuxAgent/tests_e2e/tests. """ @@ -289,8 +297,8 @@ def _load_test_class(relative_path: str) -> Type[AgentTest]: spec = importlib.util.spec_from_file_location(f"tests_e2e.tests.{relative_path.replace('/', '.').replace('.py', '')}", str(full_path)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - # return all the classes in the module that are subclasses of AgentTest but are not AgentTest itself. - matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentTest] + # return all the classes in the module that are subclasses of AgentTest but are not AgentVmTest or AgentVmssTest themselves. + matches = [v for v in module.__dict__.values() if isinstance(v, type) and issubclass(v, AgentTest) and v != AgentVmTest and v != AgentVmssTest] if len(matches) != 1: raise Exception(f"Error in {full_path} (each test file must contain exactly one class derived from AgentTest)") return matches[0] diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 9a27ce395a..9209b18f01 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import contextlib import datetime import json import logging @@ -22,8 +21,8 @@ import uuid from pathlib import Path -from threading import current_thread, RLock -from typing import Any, Dict, List +from threading import RLock +from typing import Any, Dict, List, Tuple # Disable those warnings, since 'lisa' is an external, non-standard, dependency # E0401: Unable to import 'lisa' (import-error) @@ -31,7 +30,6 @@ from lisa import ( # pylint: disable=E0401 Environment, Logger, - Node, notifier, simple_requirement, TestCaseMetadata, @@ -40,19 +38,27 @@ ) from lisa.environment import EnvironmentStatus # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 +from lisa.node import LocalNode # pylint: disable=E0401 +from lisa.util.constants import RUN_ID # pylint: disable=E0401 from lisa.sut_orchestrator.azure.common import get_node_context # pylint: disable=E0401 +from lisa.sut_orchestrator.azure.platform_ import AzurePlatform # pylint: disable=E0401 import makepkg from azurelinuxagent.common.version import AGENT_VERSION + +from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient + +import tests_e2e from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError -from tests_e2e.tests.lib.agent_test_context import AgentTestContext -from tests_e2e.tests.lib.identifiers import VmIdentifier -from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.logging import set_current_thread_log +from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext +from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log from tests_e2e.tests.lib.agent_log import AgentLogRecord -from tests_e2e.tests.lib.shell import run_command, CommandError +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient +from tests_e2e.tests.lib.shell import run_command from tests_e2e.tests.lib.ssh_client import SshClient @@ -80,19 +86,6 @@ def _initialize_lisa_logger(): _initialize_lisa_logger() -# -# Helper to change the current thread name temporarily -# -@contextlib.contextmanager -def _set_thread_name(name: str): - initial_name = current_thread().name - current_thread().name = name - try: - yield - finally: - current_thread().name = initial_name - - # # Possible values for the collect_logs parameter # @@ -102,76 +95,166 @@ class CollectLogs(object): No = 'no' # Never collect logs +# +# Possible values for the keep_environment parameter +# +class KeepEnvironment(object): + Always = 'always' # Do not delete resources created by the test suite + Failed = 'failed' # Skip delete only on test failures + No = 'no' # Always delete resources created by the test suite + + +class _TestNode(object): + """ + Name and IP address of a test VM + """ + def __init__(self, name: str, ip_address: str): + self.name = name + self.ip_address = ip_address + + def __str__(self): + return f"{self.name}:{self.ip_address}" + + @TestSuiteMetadata(area="waagent", category="", description="") class AgentTestSuite(LisaTestSuite): """ Manages the setup of test VMs and execution of Agent test suites. This class acts as the interface with the LISA framework, which will invoke the execute() method when a runbook is executed. """ - - class _Context(AgentTestContext): - def __init__(self, vm: VmIdentifier, paths: AgentTestContext.Paths, connection: AgentTestContext.Connection): - super().__init__(vm=vm, paths=paths, connection=connection) - # These are initialized by AgentTestSuite._set_context(). - self.log_path: Path = None - self.lisa_log: Logger = None - self.node: Node = None - self.runbook_name: str = None - self.environment_name: str = None - self.is_vhd: bool = None - self.test_suites: List[AgentTestSuite] = None - self.collect_logs: str = None - self.skip_setup: bool = None - self.ssh_client: SshClient = None - def __init__(self, metadata: TestSuiteMetadata) -> None: super().__init__(metadata) - # The context is initialized by _set_context() via the call to execute() - self.__context: AgentTestSuite._Context = None - - def _initialize(self, node: Node, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): - connection_info = node.connection_info - node_context = get_node_context(node) - - self.__context = self._Context( - vm=VmIdentifier( - cloud=self._get_required_parameter(variables, "cloud"), - location=self._get_required_parameter(variables, "c_location"), - subscription=node.features._platform.subscription_id, - resource_group=node_context.resource_group_name, - name=node_context.vm_name), - paths=AgentTestContext.Paths( - working_directory=self._get_working_directory(lisa_working_path), - remote_working_directory=Path('/home')/connection_info['username']), - connection=AgentTestContext.Connection( - ip_address=connection_info['address'], - username=connection_info['username'], - private_key_file=connection_info['private_key_file'], - ssh_port=connection_info['port'])) - - self.__context.log_path = self._get_log_path(variables, lisa_log_path) - self.__context.lisa_log = lisa_log - self.__context.node = node - self.__context.is_vhd = self._get_optional_parameter(variables, "c_vhd") != "" - self.__context.environment_name = f"{node.os.name}-vhd" if self.__context.is_vhd else self._get_required_parameter(variables, "c_env_name") - self.__context.test_suites = self._get_required_parameter(variables, "c_test_suites") - self.__context.collect_logs = self._get_required_parameter(variables, "collect_logs") - self.__context.skip_setup = self._get_required_parameter(variables, "skip_setup") - self.__context.ssh_client = SshClient(ip_address=self.__context.vm_ip_address, username=self.__context.username, private_key_file=self.__context.private_key_file) + self._working_directory: Path # Root directory for temporary files + self._log_path: Path # Root directory for log files + self._pypy_x64_path: Path # Path to the Pypy x64 download + self._pypy_arm64_path: Path # Path to the Pypy ARM64 download + self._test_agent_package_path: Path # Path to the package for the test Agent + self._test_source_directory: Path # Root directory of the source code for the end-to-end tests + self._test_tools_tarball_path: Path # Path to the tarball with the tools needed on the test node - @staticmethod - def _get_required_parameter(variables: Dict[str, Any], name: str) -> Any: - value = variables.get(name) - if value is None: - raise Exception(f"The runbook is missing required parameter '{name}'") - return value + self._runbook_name: str # name of the runbook execution, used as prefix on ARM resources created by the AgentTestSuite - @staticmethod - def _get_optional_parameter(variables: Dict[str, Any], name: str, default_value: Any = "") -> Any: - value = variables.get(name) - if value is None: - return default_value - return value + self._lisa_log: Logger # Main log for the LISA run + + self._lisa_environment_name: str # Name assigned by LISA to the test environment, useful for correlation with LISA logs + self._environment_name: str # Name assigned by the AgentTestSuiteCombinator to the test environment + + self._test_suites: List[AgentTestSuite] # Test suites to execute in the environment + + self._cloud: str # Azure cloud where test VMs are located + self._subscription_id: str # Azure subscription where test VMs are located + self._location: str # Azure location (region) where test VMs are located + self._image: str # Image used to create the test VMs; it can be empty if LISA chose the size, or when using an existing VM + + self._is_vhd: bool # True when the test VMs were created by LISA from a VHD; this is usually used to validate a new VHD and the test Agent is not installed + + # username and public SSH key for the admin account used to connect to the test VMs + self._user: str + self._identity_file: str + + self._skip_setup: bool # If True, skip the setup of the test VMs + self._collect_logs: str # Whether to collect logs from the test VMs (one of 'always', 'failed', or 'no') + self._keep_environment: str # Whether to skip deletion of the resources created by the test suite (one of 'always', 'failed', or 'no') + + # Resource group and VM/VMSS for the test machines. self._vm_name and self._vmss_name are mutually exclusive, only one of them will be set. + self._resource_group_name: str + self._vm_name: str + self._vm_ip_address: str + self._vmss_name: str + + self._test_nodes: List[_TestNode] # VMs or scale set instances the tests will run on + + # Whether to create and delete a scale set. + self._create_scale_set: bool + self._delete_scale_set: bool + + def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): + """ + Initializes the AgentTestSuite from the data passed as arguments by LISA. + + NOTE: All the interface with LISA should be confined to this method. The rest of the test code should not have any dependencies on LISA. + """ + self._working_directory = self._get_working_directory(lisa_working_path) + self._log_path = self._get_log_path(variables, lisa_log_path) + self._test_agent_package_path = self._working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + self._test_source_directory = Path(tests_e2e.__path__[0]) + self._test_tools_tarball_path = self._working_directory/"waagent-tools.tar" + self._pypy_x64_path = Path("/tmp/pypy3.7-x64.tar.bz2") + self._pypy_arm64_path = Path("/tmp/pypy3.7-arm64.tar.bz2") + + self._runbook_name = variables["name"] + + self._lisa_log = lisa_log + + self._lisa_environment_name = environment.name + self._environment_name = variables["c_env_name"] + + self._test_suites = variables["c_test_suites"] + + self._cloud = variables["cloud"] + self._subscription_id = variables["subscription_id"] + self._location = variables["c_location"] + self._image = variables["c_image"] + + self._is_vhd = variables["c_is_vhd"] + + self._user = variables["user"] + self._identity_file = variables["identity_file"] + + self._skip_setup = variables["skip_setup"] + self._keep_environment = variables["keep_environment"] + self._collect_logs = variables["collect_logs"] + + # The AgentTestSuiteCombinator can create 4 kinds of platform/environment combinations: + # + # * New VM + # The VM is created by LISA. The platform will be 'azure' and the environment will contain a single 'remote' node. + # + # * Existing VM + # The VM was passed as argument to the runbook. The platform will be 'ready' and the environment will contain a single 'remote' node. + # + # * New VMSS + # The AgentTestSuite will create the scale set before executing the tests. The platform will be 'ready' and the environment will a single 'local' node. + # + # * Existing VMSS + # The VMSS was passed as argument to the runbook. The platform will be 'ready' and the environment will contain a list of 'remote' nodes, + # one for each instance of the scale set. + # + + # Note that _vm_name and _vmss_name are mutually exclusive, only one of them will be set. + self._vm_name = None + self._vm_ip_address = None + self._vmss_name = None + self._create_scale_set = False + self._delete_scale_set = False + + if isinstance(environment.nodes[0], LocalNode): + # We need to create a new VMSS. + # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group + # id to "e0" and the scale set name to "n0" since we are creating a single scale set. + self._resource_group_name = f"lisa-{self._runbook_name}-{RUN_ID}-e0" + self._vmss_name = f"{self._resource_group_name}-n0" + self._test_nodes = [] # we'll fill this up when the scale set is created + self._create_scale_set = True + self._delete_scale_set = False # we set it to True once we create the scale set + else: + # Else we are using a VM that was created by LISA, or an existing VM/VMSS + node_context = get_node_context(environment.nodes[0]) + + if isinstance(environment.nodes[0].features._platform, AzurePlatform): # The test VM was created by LISA + self._resource_group_name = node_context.resource_group_name + self._vm_name = node_context.vm_name + self._vm_ip_address = environment.nodes[0].connection_info['address'] + self._test_nodes = [_TestNode(self._vm_name, self._vm_ip_address)] + else: # An existing VM/VMSS was passed as argument to the runbook + self._resource_group_name = variables["resource_group_name"] + if variables["vm_name"] != "": + self._vm_name = variables["vm_name"] + self._vm_ip_address = environment.nodes[0].connection_info['address'] + self._test_nodes = [_TestNode(self._vm_name, self._vm_ip_address)] + else: + self._vmss_name = variables["vmss_name"] + self._test_nodes = [_TestNode(node.name, node.connection_info['address']) for node in environment.nodes.list()] @staticmethod def _get_log_path(variables: Dict[str, Any], lisa_log_path: str) -> Path: @@ -188,18 +271,12 @@ def _get_log_path(variables: Dict[str, Any], lisa_log_path: str) -> Path: def _get_working_directory(lisa_working_path: str) -> Path: # LISA's "working_path" has a value similar to # "<--working_path>/20230322/20230322-194430-287/tests/20230322-194451-333-agent_test_suite - # where "<--working_path>" is the value given to the --working_path command line argument. Create the working for + # where "<--working_path>" is the value given to the --working_path command line argument. Create the working directory for # the AgentTestSuite as # "<--working_path>/20230322/20230322-194430-287/waagent # This directory will be unique for each execution of the runbook ("20230322-194430" is the timestamp and "287" is a # unique ID per execution) - return Path(lisa_working_path).parent.parent / "waagent" - - @property - def context(self): - if self.__context is None: - raise Exception("The context for the AgentTestSuite has not been initialized") - return self.__context + return Path(lisa_working_path).parent.parent/"waagent" # # Test suites within the same runbook may be executed concurrently, and setup needs to be done only once. @@ -217,13 +294,13 @@ def _create_working_directory(self) -> None: self._working_directory_lock.acquire() try: - if not self.context.working_directory.exists(): - log.info("Creating working directory: %s", self.context.working_directory) - self.context.working_directory.mkdir(parents=True) + if not self._working_directory.exists(): + log.info("Creating working directory: %s", self._working_directory) + self._working_directory.mkdir(parents=True) finally: self._working_directory_lock.release() - def _setup(self) -> None: + def _setup_test_run(self) -> None: """ Prepares the test suite for execution (currently, it just builds the agent package) @@ -232,17 +309,56 @@ def _setup(self) -> None: self._setup_lock.acquire() try: - log.info("") - log.info("**************************************** [Build] ****************************************") - log.info("") - completed: Path = self.context.working_directory/"completed" + completed: Path = self._working_directory / "completed" if completed.exists(): log.info("Found %s. Build has already been done, skipping.", completed) return - self.context.lisa_log.info("Building test agent") - self._build_agent_package() + log.info("") + log.info("********************************** [Preparing Test Run] **********************************") + log.info("") + + self._lisa_log.info("Building agent package to %s", self._test_agent_package_path) + log.info("Building agent package to %s", self._test_agent_package_path) + makepkg.run(agent_family="Test", output_directory=str(self._working_directory), log=log) + if not self._test_agent_package_path.exists(): # the target path is created by makepkg, ensure we are using the correct value + raise Exception(f"The test Agent package was not created at the expected path {self._test_agent_package_path}") + + # + # Ensure that Pypy (both x64 and ARM) has been downloaded to the local machine; it is pre-downloaded to /tmp on + # the container image used for Azure Pipelines runs, but for developer runs it may need to be downloaded. + # + for pypy in [self._pypy_x64_path, self._pypy_arm64_path]: + if pypy.exists(): + log.info("Found Pypy at %s", pypy) + else: + pypy_download = f"https://dcrdata.blob.core.windows.net/python/{pypy.name}" + self._lisa_log.info("Downloading %s to %s", pypy_download, pypy) + log.info("Downloading %s to %s", pypy_download, pypy) + run_command(["wget", pypy_download, "-O", pypy]) + + # + # Create a tarball with the tools we need to copy to the test node. The tarball includes two directories: + # + # * bin - Executables file (Bash and Python scripts) + # * lib - Library files (Python modules) + # + self._lisa_log.info("Creating %s with the tools needed on the test node", self._test_tools_tarball_path) + log.info("Creating %s with the tools needed on the test node", self._test_tools_tarball_path) + log.info("Adding orchestrator/scripts") + command = "cd {0} ; tar cf {1} --transform='s,^,bin/,' *".format(self._test_source_directory/"orchestrator"/"scripts", self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Adding tests/scripts") + command = "cd {0} ; tar rf {1} --transform='s,^,bin/,' *".format(self._test_source_directory/"tests"/"scripts", self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Adding tests/lib") + command = "cd {0} ; tar rf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self._test_source_directory.parent, self._test_tools_tarball_path) + log.info("%s", command) + run_command(command, shell=True) + log.info("Contents of %s:\n%s", self._test_tools_tarball_path, run_command(['tar', 'tvf', str(self._test_tools_tarball_path)])) log.info("Completed setup, creating %s", completed) completed.touch() @@ -250,163 +366,113 @@ def _setup(self) -> None: finally: self._setup_lock.release() - def _build_agent_package(self) -> None: - """ - Builds the agent package and returns the path to the package. - """ - log.info("Building agent package to %s", self.context.working_directory) - - makepkg.run(agent_family="Test", output_directory=str(self.context.working_directory), log=log) - - package_path: Path = self._get_agent_package_path() - if not package_path.exists(): - raise Exception(f"Can't find the agent package at {package_path}") - - log.info("Built agent package as %s", package_path) - - def _get_agent_package_path(self) -> Path: + def _clean_up(self, success: bool) -> None: """ - Returns the path to the agent package. + Cleans up any items created by the test suite run. """ - return self.context.working_directory/"eggs"/f"WALinuxAgent-{AGENT_VERSION}.zip" + if self._delete_scale_set: + if self._keep_environment == KeepEnvironment.Always: + log.info("Won't delete the scale set %s, per the test suite configuration.", self._vmss_name) + elif self._keep_environment == KeepEnvironment.No or self._keep_environment == KeepEnvironment.Failed and success: + try: + self._lisa_log.info("Deleting resource group containing the test VMSS: %s", self._resource_group_name) + resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=self._resource_group_name) + resource_group.delete() + except Exception as error: # pylint: disable=broad-except + log.warning("Error deleting resource group %s: %s", self._resource_group_name, error) - def _clean_up(self) -> None: + def _setup_test_nodes(self) -> None: """ - Cleans up any leftovers from the test suite run. Currently just an empty placeholder for future use. + Prepares the test nodes for execution of the test suite (installs tools and the test agent, etc) """ + install_test_agent = self._test_suites[0].install_test_agent # All suites in the environment have the same value for install_test_agent - def _setup_node(self, install_test_agent: bool) -> None: - """ - Prepares the remote node for executing the test suite (installs tools and the test agent, etc) - """ - self.context.lisa_log.info("Setting up test node") log.info("") - log.info("************************************** [Node Setup] **************************************") + log.info("************************************ [Test Nodes Setup] ************************************") log.info("") - log.info("Test Node: %s", self.context.vm.name) - log.info("IP Address: %s", self.context.vm_ip_address) - log.info("Resource Group: %s", self.context.vm.resource_group) - log.info("") - - # - # Ensure that the correct version (x84 vs ARM64) Pypy has been downloaded; it is pre-downloaded to /tmp on the container image - # used for Azure Pipelines runs, but for developer runs it may need to be downloaded. - # - if self.context.ssh_client.get_architecture() == "aarch64": - pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2") - pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-arm64.tar.bz2" - else: - pypy_path = Path("/tmp/pypy3.7-x64.tar.bz2") - pypy_download = "https://dcrdata.blob.core.windows.net/python/pypy3.7-x64.tar.bz2" - if pypy_path.exists(): - log.info("Found Pypy at %s", pypy_path) - else: - log.info("Downloading %s to %s", pypy_download, pypy_path) - run_command(["wget", pypy_download, "-O", pypy_path]) - - # - # Cleanup the test node (useful for developer runs) - # - log.info('Preparing the test node for setup') - # Note that removing lib requires sudo, since a Python cache may have been created by tests using sudo - self.context.ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True) - - # - # Copy Pypy and the test Agent to the test node - # - target_path = Path("~")/"tmp" - self.context.ssh_client.run_command(f"mkdir {target_path}") - log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(pypy_path, target_path) - agent_package_path: Path = self._get_agent_package_path() - log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(agent_package_path, target_path) - - # tar commands sometimes fail with 'tar: Unexpected EOF in archive' error. Retry tarball creation, copy, and - # extraction if we hit this error - tar_retries = 3 - while tar_retries > 0: - try: - # - # Create a tarball with the files we need to copy to the test node. The tarball includes two directories: - # - # * bin - Executables file (Bash and Python scripts) - # * lib - Library files (Python modules) - # - # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. - # - # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. - # - tarball_path: Path = Path("/tmp/waagent.tar") - log.info("Creating %s with the files need on the test node", tarball_path) - log.info("Adding orchestrator/scripts") - command = "cd {0} ; tar cvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"orchestrator"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/scripts") - command = "cd {0} ; tar rvf {1} --transform='s,^,bin/,' *".format(self.context.test_source_directory/"tests"/"scripts", str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Adding tests/lib") - command = "cd {0} ; tar rvf {1} --transform='s,^,lib/,' --exclude=__pycache__ tests_e2e/tests/lib".format(self.context.test_source_directory.parent, str(tarball_path)) - log.info("%s\n%s", command, run_command(command, shell=True)) - log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)])) - - # - # Copy the tarball to the test node - # - log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path) - self.context.ssh_client.copy_to_node(tarball_path, target_path) - - # - # Extract the tarball and execute the install scripts - # - log.info('Installing tools on the test node') - command = f"tar xvf {target_path/tarball_path.name} && ~/bin/install-tools" - log.info("Remote command [%s] completed:\n%s", command, self.context.ssh_client.run_command(command)) - - # Tarball creation and extraction was successful - no need to retry - tar_retries = 0 - - except CommandError as error: - if "tar: Unexpected EOF in archive" in error.stderr: - tar_retries -= 1 - # Log the error with traceback to see which tar operation failed - log.info(f"Tarball creation or extraction failed: \n{error}") - # Retry tar operations - if tar_retries > 0: - log.info("Retrying tarball creation and extraction...") - else: - raise Exception(f"Unexpected error when creating or extracting tarball during node setup: {error}") - - if self.context.is_vhd: - log.info("Using a VHD; will not install the Test Agent.") - elif not install_test_agent: - log.info("Will not install the Test Agent per the test suite configuration.") - else: - log.info("Installing the Test Agent on the test node") - command = f"install-agent --package ~/tmp/{agent_package_path.name} --version {AGENT_VERSION}" - log.info("%s\n%s", command, self.context.ssh_client.run_command(command, use_sudo=True)) - - log.info("Completed test node setup") + for node in self._test_nodes: + self._lisa_log.info(f"Setting up test node {node}") + log.info("Test Node: %s", node.name) + log.info("IP Address: %s", node.ip_address) + log.info("") - def _collect_node_logs(self) -> None: + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + + # + # Cleanup the test node (useful for developer runs) + # + log.info('Preparing the test node for setup') + # Note that removing lib requires sudo, since a Python cache may have been created by tests using sudo + ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True) + + # + # Copy Pypy, the test Agent, and the test tools to the test node + # + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + if ssh_client.get_architecture() == "aarch64": + pypy_path = self._pypy_arm64_path + else: + pypy_path = self._pypy_x64_path + target_path = Path("~")/"tmp" + ssh_client.run_command(f"mkdir {target_path}") + log.info("Copying %s to %s:%s", pypy_path, node.name, target_path) + ssh_client.copy_to_node(pypy_path, target_path) + log.info("Copying %s to %s:%s", self._test_agent_package_path, node.name, target_path) + ssh_client.copy_to_node(self._test_agent_package_path, target_path) + log.info("Copying %s to %s:%s", self._test_tools_tarball_path, node.name, target_path) + ssh_client.copy_to_node(self._test_tools_tarball_path, target_path) + + # + # Extract the tarball with the test tools. The tarball includes two directories: + # + # * bin - Executables file (Bash and Python scripts) + # * lib - Library files (Python modules) + # + # After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'. + # + # Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib. + # + log.info('Installing tools on the test node') + command = f"tar xvf {target_path/self._test_tools_tarball_path.name} && ~/bin/install-tools" + log.info("Remote command [%s] completed:\n%s", command, ssh_client.run_command(command)) + + if self._is_vhd: + log.info("Using a VHD; will not install the Test Agent.") + elif not install_test_agent: + log.info("Will not install the Test Agent per the test suite configuration.") + else: + log.info("Installing the Test Agent on the test node") + command = f"install-agent --package ~/tmp/{self._test_agent_package_path.name} --version {AGENT_VERSION}" + log.info("%s\n%s", command, ssh_client.run_command(command, use_sudo=True)) + + log.info("Completed test node setup") + + def _collect_logs_from_test_nodes(self) -> None: """ - Collects the test logs from the remote machine and copies them to the local machine + Collects the test logs from the test nodes and copies them to the local machine """ - try: - # Collect the logs on the test machine into a compressed tarball - self.context.lisa_log.info("Collecting logs on test node") - log.info("Collecting logs on test node") - stdout = self.context.ssh_client.run_command("collect-logs", use_sudo=True) - log.info(stdout) - - # Copy the tarball to the local logs directory - remote_path = "/tmp/waagent-logs.tgz" - local_path = self.context.log_path/'{0}.tgz'.format(self.context.environment_name) - log.info("Copying %s:%s to %s", self.context.node.name, remote_path, local_path) - self.context.ssh_client.copy_from_node(remote_path, local_path) - - except: # pylint: disable=bare-except - log.exception("Failed to collect logs from the test machine") + for node in self._test_nodes: + node_name = node.name + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + try: + # Collect the logs on the test machine into a compressed tarball + self._lisa_log.info("Collecting logs on test node %s", node_name) + log.info("Collecting logs on test node %s", node_name) + stdout = ssh_client.run_command("collect-logs", use_sudo=True) + log.info(stdout) + + # Copy the tarball to the local logs directory + tgz_name = self._environment_name + if len(self._test_nodes) > 1: + # Append instance of scale set to the end of tarball name + tgz_name += '_' + node_name.split('_')[-1] + remote_path = "/tmp/waagent-logs.tgz" + local_path = self._log_path / '{0}.tgz'.format(tgz_name) + log.info("Copying %s:%s to %s", node_name, remote_path, local_path) + ssh_client.copy_from_node(remote_path, local_path) + + except: # pylint: disable=bare-except + log.exception("Failed to collect logs from the test machine") # NOTES: # @@ -418,89 +484,81 @@ def _collect_node_logs(self) -> None: # # W0621: Redefining name 'log' from outer scope (line 53) (redefined-outer-name) @TestCaseMetadata(description="", priority=0, requirement=simple_requirement(environment_status=EnvironmentStatus.Deployed)) - def main(self, node: Node, environment: Environment, variables: Dict[str, Any], working_path: str, log_path: str, log: Logger): # pylint: disable=redefined-outer-name + def main(self, environment: Environment, variables: Dict[str, Any], working_path: str, log_path: str, log: Logger): # pylint: disable=redefined-outer-name """ Entry point from LISA """ - self._initialize(node, variables, working_path, log_path, log) - self._execute(environment, variables) + self._initialize(environment, variables, working_path, log_path, log) + self._execute() + + def _execute(self) -> None: + unexpected_error = False + test_suite_success = True - def _execute(self, environment: Environment, variables: Dict[str, Any]): - """ - Executes each of the AgentTests included in the "c_test_suites" variable (which is generated by the AgentTestSuitesCombinator). - """ # Set the thread name to the name of the environment. The thread name is added to each item in LISA's log. - with _set_thread_name(self.context.environment_name): - log_path: Path = self.context.log_path/f"env-{self.context.environment_name}.log" + with set_thread_name(self._environment_name): + log_path: Path = self._log_path / f"env-{self._environment_name}.log" with set_current_thread_log(log_path): start_time: datetime.datetime = datetime.datetime.now() - success = True try: # Log the environment's name and the variables received from the runbook (note that we need to expand the names of the test suites) - log.info("LISA Environment (for correlation with the LISA log): %s", environment.name) - log.info("Runbook variables:") - for name, value in variables.items(): - log.info(" %s: %s", name, value if name != 'c_test_suites' else [t.name for t in value]) + log.info("LISA Environment (for correlation with the LISA log): %s", self._lisa_environment_name) + log.info("Test suites: %s", [t.name for t in self._test_suites]) - test_suite_success = True + self._create_working_directory() - try: - self._create_working_directory() + if not self._skip_setup: + self._setup_test_run() - if not self.context.skip_setup: - self._setup() + try: + test_context = self._create_test_context() - if not self.context.skip_setup: - # pylint seems to think self.context.test_suites is not iterable. Suppressing this warning here and a few lines below, since - # its type is List[AgentTestSuite]. - # E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable) - install_test_agent = all([suite.install_test_agent for suite in self.context.test_suites]) # pylint: disable=E1133 + if not self._skip_setup: try: - self._setup_node(install_test_agent) + self._setup_test_nodes() except: test_suite_success = False raise - for suite in self.context.test_suites: # pylint: disable=E1133 + for suite in self._test_suites: log.info("Executing test suite %s", suite.name) - self.context.lisa_log.info("Executing Test Suite %s", suite.name) - test_suite_success = self._execute_test_suite(suite) and test_suite_success + self._lisa_log.info("Executing Test Suite %s", suite.name) + test_suite_success = self._execute_test_suite(suite, test_context) and test_suite_success finally: - collect = self.context.collect_logs - if collect == CollectLogs.Always or collect == CollectLogs.Failed and not test_suite_success: - self._collect_node_logs() + if self._collect_logs == CollectLogs.Always or self._collect_logs == CollectLogs.Failed and not test_suite_success: + self._collect_logs_from_test_nodes() except Exception as e: # pylint: disable=bare-except # Report the error and raise an exception to let LISA know that the test errored out. - success = False + unexpected_error = True log.exception("UNEXPECTED ERROR.") self._report_test_result( - self.context.environment_name, + self._environment_name, "Unexpected Error", TestStatus.FAILED, start_time, message="UNEXPECTED ERROR.", add_exception_stack_trace=True) - raise Exception(f"[{self.context.environment_name}] Unexpected error in AgentTestSuite: {e}") + raise Exception(f"[{self._environment_name}] Unexpected error in AgentTestSuite: {e}") finally: - self._clean_up() - if not success: + self._clean_up(test_suite_success and not unexpected_error) + if unexpected_error: self._mark_log_as_failed() - def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext) -> bool: """ Executes the given test suite and returns True if all the tests in the suite succeeded. """ suite_name = suite.name - suite_full_name = f"{suite_name}-{self.context.environment_name}" + suite_full_name = f"{suite_name}-{self._environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() - with _set_thread_name(suite_full_name): # The thread name is added to the LISA log - log_path: Path = self.context.log_path/f"{suite_full_name}.log" + with set_thread_name(suite_full_name): # The thread name is added to the LISA log + log_path: Path = self._log_path / f"{suite_full_name}.log" with set_current_thread_log(log_path): suite_success: bool = True @@ -518,16 +576,16 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_start_time: datetime.datetime = datetime.datetime.now() log.info("******** Executing %s", test.name) - self.context.lisa_log.info("Executing test %s", test_full_name) + self._lisa_log.info("Executing test %s", test_full_name) test_success: bool = True - test_instance = test.test_class(self.context) + test_instance = test.test_class(test_context) try: test_instance.run() summary.append(f"[Passed] {test.name}") log.info("******** [Passed] %s", test.name) - self.context.lisa_log.info("[Passed] %s", test_full_name) + self._lisa_log.info("[Passed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -536,7 +594,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: except TestSkipped as e: summary.append(f"[Skipped] {test.name}") log.info("******** [Skipped] %s: %s", test.name, e) - self.context.lisa_log.info("******** [Skipped] %s", test_full_name) + self._lisa_log.info("******** [Skipped] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -547,7 +605,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_success = False summary.append(f"[Failed] {test.name}") log.error("******** [Failed] %s: %s", test.name, e) - self.context.lisa_log.error("******** [Failed] %s", test_full_name) + self._lisa_log.error("******** [Failed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -559,7 +617,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: summary.append(f"[Failed] {test.name}") message = f"UNEXPECTED ERROR IN [{e.command}] {e.stderr}\n{e.stdout}" log.error("******** [Failed] %s: %s", test.name, message) - self.context.lisa_log.error("******** [Failed] %s", test_full_name) + self._lisa_log.error("******** [Failed] %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -570,7 +628,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: test_success = False summary.append(f"[Error] {test.name}") log.exception("UNEXPECTED ERROR IN %s", test.name) - self.context.lisa_log.exception("UNEXPECTED ERROR IN %s", test_full_name) + self._lisa_log.exception("UNEXPECTED ERROR IN %s", test_full_name) self._report_test_result( suite_full_name, test.name, @@ -617,61 +675,115 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool: if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log(ignore_error_rules, before_timestamp) + suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, before_timestamp) return suite_success - def _check_agent_log(self, ignore_error_rules: List[Dict[str, Any]], before_timestamp: datetime) -> bool: + def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]], before_timestamp: datetime) -> bool: """ - Checks the agent log for errors; returns true on success (no errors int the log) + Checks the agent log on the test nodes for errors; returns true on success (no errors in the logs) """ - start_time: datetime.datetime = datetime.datetime.now() + success: bool = True - try: - self.context.lisa_log.info("Checking agent log on the test node") - log.info("Checking agent log on the test node") - - output = self.context.ssh_client.run_command("check-agent-log.py -j") - errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) - - # Individual tests may have rules to ignore known errors; filter those out - if len(ignore_error_rules) > 0: - new = [] - for e in errors: - # Ignore errors that occurred before the timestamp - if e.timestamp < before_timestamp: - continue - if not AgentLog.matches_ignore_rule(e, ignore_error_rules): - new.append(e) - errors = new - - if len(errors) == 0: - # If no errors, we are done; don't create a log or test result. - log.info("There are no errors in the agent log") - return True - - message = f"Detected {len(errors)} error(s) in the agent log" - self.context.lisa_log.error(message) - log.error("%s:\n\n%s\n", message, '\n'.join(['\t\t' + e.text.replace('\n', '\n\t\t') for e in errors])) - self._mark_log_as_failed() - - self._report_test_result( - self.context.environment_name, - "CheckAgentLog", - TestStatus.FAILED, - start_time, - message=message + ' - First few errors:\n' + '\n'.join([e.text for e in errors[0:3]])) - except: # pylint: disable=bare-except - log.exception("Error checking agent log") - self._report_test_result( - self.context.environment_name, - "CheckAgentLog", - TestStatus.FAILED, - start_time, - "Error checking agent log", - add_exception_stack_trace=True) - - return False + for node in self._test_nodes: + node_name = node.name + ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + + test_result_name = self._environment_name + if len(self._test_nodes) > 1: + # If there are multiple test nodes, as in a scale set, append the name of the node to the name of the result + test_result_name += '_' + node_name.split('_')[-1] + + start_time: datetime.datetime = datetime.datetime.now() + + try: + self._lisa_log.info("Checking agent log on the test node %s", node_name) + log.info("Checking agent log on the test node %s", node_name) + + output = ssh_client.run_command("check-agent-log.py -j") + errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) + + # Individual tests may have rules to ignore known errors; filter those out + if len(ignore_error_rules) > 0: + new = [] + for e in errors: + # Ignore errors that occurred before the timestamp + if e.timestamp < before_timestamp: + continue + if not AgentLog.matches_ignore_rule(e, ignore_error_rules): + new.append(e) + errors = new + + if len(errors) == 0: + # If no errors, we are done; don't create a log or test result. + log.info("There are no errors in the agent log") + else: + message = f"Detected {len(errors)} error(s) in the agent log on {node_name}" + self._lisa_log.error(message) + log.error("%s:\n\n%s\n", message, '\n'.join(['\t\t' + e.text.replace('\n', '\n\t\t') for e in errors])) + self._mark_log_as_failed() + success = False + + self._report_test_result( + test_result_name, + "CheckAgentLog", + TestStatus.FAILED, + start_time, + message=message + ' - First few errors:\n' + '\n'.join([e.text for e in errors[0:3]])) + except: # pylint: disable=bare-except + log.exception("Error checking agent log on %s", node_name) + success = False + self._report_test_result( + test_result_name, + "CheckAgentLog", + TestStatus.FAILED, + start_time, + "Error checking agent log", + add_exception_stack_trace=True) + + return success + + def _create_test_context(self,) -> AgentTestContext: + """ + Creates the context for the test run. + """ + if self._vm_name is not None: + self._lisa_log.info("Creating test context for virtual machine") + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._cloud, + location=self._location, + subscription=self._subscription_id, + resource_group=self._resource_group_name, + name=self._vm_name) + return AgentVmTestContext( + working_directory=self._working_directory, + vm=vm, + ip_address=self._vm_ip_address, + username=self._user, + identity_file=self._identity_file) + else: + log.info("Creating test context for scale set") + if self._create_scale_set: + self._create_test_scale_set() + else: + log.info("Using existing scale set %s", self._vmss_name) + + scale_set = VirtualMachineScaleSetClient( + cloud=self._cloud, + location=self._location, + subscription=self._subscription_id, + resource_group=self._resource_group_name, + name=self._vmss_name) + + # If we created the scale set, fill up the test nodes + if self._create_scale_set: + self._test_nodes = [_TestNode(name=i.instance_name, ip_address=i.ip_address) for i in scale_set.get_instances_ip_address()] + + return AgentVmssTestContext( + working_directory=self._working_directory, + vmss=scale_set, + username=self._user, + identity_file=self._identity_file) @staticmethod def _mark_log_as_failed(): @@ -715,4 +827,55 @@ def _report_test_result( notifier.notify(msg) + def _create_test_scale_set(self) -> None: + """ + Creates a scale set for the test run + """ + self._lisa_log.info("Creating resource group %s", self._resource_group_name) + resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=self._resource_group_name) + resource_group.create() + self._delete_scale_set = True + + self._lisa_log.info("Creating scale set %s", self._vmss_name) + log.info("Creating scale set %s", self._vmss_name) + template, parameters = self._get_scale_set_deployment_template(self._vmss_name) + resource_group.deploy_template(template, parameters) + + def _get_scale_set_deployment_template(self, scale_set_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + Returns the deployment template for scale sets and its parameters + """ + def read_file(path: str) -> str: + with open(path, "r") as file_: + return file_.read().strip() + + publisher, offer, sku, version = self._image.replace(":", " ").split(' ') + + template: Dict[str, Any] = json.loads(read_file(str(self._test_source_directory/"orchestrator"/"templates/vmss.json"))) + + # Scale sets for some images need to be deployed with 'plan' property + plan_required_images = ["almalinux", "kinvolk", "erockyenterprisesoftwarefoundationinc1653071250513"] + if publisher in plan_required_images: + resources: List[Dict[str, Any]] = template.get('resources') + for resource in resources: + if resource.get('type') == "Microsoft.Compute/virtualMachineScaleSets": + resource["plan"] = { + "name": "[parameters('sku')]", + "product": "[parameters('offer')]", + "publisher": "[parameters('publisher')]" + } + + AddNetworkSecurityGroup().update(template, is_lisa_template=False) + + return template, { + "username": {"value": self._user}, + "sshPublicKey": {"value": read_file(f"{self._identity_file}.pub")}, + "vmName": {"value": scale_set_name}, + "publisher": {"value": publisher}, + "offer": {"value": offer}, + "sku": {"value": sku}, + "version": {"value": version} + } + + diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index be72cc4c70..b3d84a1211 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -1,9 +1,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import datetime import logging import random import re +import traceback import urllib.parse +import uuid from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Type @@ -14,74 +17,79 @@ # Disable those warnings, since 'lisa' is an external, non-standard, dependency # E0401: Unable to import 'lisa' (import-error) # etc -from lisa import schema # pylint: disable=E0401 +from lisa import notifier, schema # pylint: disable=E0401 from lisa.combinator import Combinator # pylint: disable=E0401 +from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 from lisa.util import field_metadata # pylint: disable=E0401 from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo +from tests_e2e.tests.lib.logging import set_thread_name +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient @dataclass_json() @dataclass class AgentTestSuitesCombinatorSchema(schema.Combinator): - test_suites: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - cloud: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - location: str = field( - default_factory=str, metadata=field_metadata(required=True) - ) - image: str = field( - default_factory=str, metadata=field_metadata(required=False) - ) - vm_size: str = field( - default_factory=str, metadata=field_metadata(required=False) - ) - vm_name: str = field( - default_factory=str, metadata=field_metadata(required=False) - ) + """ + Defines the parameters passed to the combinator from the runbook. + + The runbook is a static document and always passes all these parameters to the combinator, so they are all + marked as required. Optional parameters can pass an empty value to indicate that they are not specified. + """ + cloud: str = field(default_factory=str, metadata=field_metadata(required=True)) + identity_file: str = field(default_factory=str, metadata=field_metadata(required=True)) + image: str = field(default_factory=str, metadata=field_metadata(required=True)) + keep_environment: str = field(default_factory=str, metadata=field_metadata(required=True)) + location: str = field(default_factory=str, metadata=field_metadata(required=True)) + resource_group_name: str = field(default_factory=str, metadata=field_metadata(required=True)) + subscription_id: str = field(default_factory=str, metadata=field_metadata(required=True)) + test_suites: str = field(default_factory=str, metadata=field_metadata(required=True)) + user: str = field(default_factory=str, metadata=field_metadata(required=True)) + vm_name: str = field(default_factory=str, metadata=field_metadata(required=True)) + vm_size: str = field(default_factory=str, metadata=field_metadata(required=True)) + vmss_name: str = field(default_factory=str, metadata=field_metadata(required=True)) class AgentTestSuitesCombinator(Combinator): """ - The "agent_test_suites" combinator returns a list of variables that specify the environments (i.e. test VMs) that the agent - test suites must be executed on: - - * c_env_name: Unique name for the environment, e.g. "0001-com-ubuntu-server-focal-20_04-lts-westus2" - * c_marketplace_image: e.g. "Canonical UbuntuServer 18.04-LTS latest", - * c_location: e.g. "westus2", - * c_vm_size: e.g. "Standard_D2pls_v5" - * c_vhd: e.g "https://rhel.blob.core.windows.net/images/RHEL_8_Standard-8.3.202006170423.vhd?se=..." - * c_test_suites: e.g. [AgentBvt, FastTrack] - - (c_marketplace_image, c_location, c_vm_size) and vhd are mutually exclusive and define the environment (i.e. the test VM) - in which the test will be executed. c_test_suites defines the test suites that should be executed in that - environment. - - The 'vm_name' runbook parameter can be used to execute the test suites on an existing VM. In that case, the combinator - generates a single item with these variables: - - * c_env_name: Name for the environment, same as vm_name - * c_vm_name: Name of the test VM - * c_location: Location of the test VM e.g. "westus2", - * c_test_suites: e.g. [AgentBvt, FastTrack] + The "agent_test_suites" combinator returns a list of variables that specify the test environments (i.e. test VMs) that the + test suites must be executed on. These variables are prefixed with "c_" to distinguish them from the command line arguments + of the runbook. See the runbook definition for details on each of those variables. + + The combinator can generate environments for VMs created and managed by LISA, Scale Sets created and managed by the AgentTestSuite, + or existing VMs or Scale Sets. """ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: super().__init__(runbook) if self.runbook.cloud not in self._DEFAULT_LOCATIONS: raise Exception(f"Invalid cloud: {self.runbook.cloud}") - if self.runbook.vm_name != '' and (self.runbook.image != '' or self.runbook.vm_size != ''): - raise Exception("Invalid runbook parameters: When 'vm_name' is specified, 'image' and 'vm_size' should not be specified.") + if self.runbook.vm_name != '' and self.runbook.vmss_name != '': + raise Exception("Invalid runbook parameters: 'vm_name' and 'vmss_name' are mutually exclusive.") if self.runbook.vm_name != '': - self._environments = self.create_environment_for_existing_vm() - else: - self._environments = self.create_environment_list() - self._index = 0 - + if self.runbook.image != '' or self.runbook.vm_size != '': + raise Exception("Invalid runbook parameters: The 'vm_name' parameter indicates an existing VM, 'image' and 'vm_size' should not be specified.") + if self.runbook.resource_group_name == '': + raise Exception("Invalid runbook parameters: The 'vm_name' parameter indicates an existing VM, a 'resource_group_name' must be specified.") + + if self.runbook.vmss_name != '': + if self.runbook.image != '' or self.runbook.vm_size != '': + raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, 'image' and 'vm_size' should not be specified.") + if self.runbook.resource_group_name == '': + raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, a 'resource_group_name' must be specified.") + + self._log: logging.Logger = logging.getLogger("lisa") + + with set_thread_name("AgentTestSuitesCombinator"): + if self.runbook.vm_name != '': + self._environments = [self.create_existing_vm_environment()] + elif self.runbook.vmss_name != '': + self._environments = [self.create_existing_vmss_environment()] + else: + self._environments = self.create_environment_list() + self._index = 0 @classmethod def type_name(cls) -> str: @@ -116,27 +124,9 @@ def _next(self) -> Optional[Dict[str, Any]]: "AzureUSGovernment": "usgovarizona", } - def create_environment_for_existing_vm(self) -> List[Dict[str, Any]]: - loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) - - environment: Dict[str, Any] = { - "c_env_name": self.runbook.vm_name, - "c_vm_name": self.runbook.vm_name, - "c_location": self.runbook.location, - "c_test_suites": loader.test_suites, - } - - log: logging.Logger = logging.getLogger("lisa") - log.info("******** Waagent: Settings for existing VM *****") - log.info("") - log.info("Settings for %s:\n%s\n", environment['c_env_name'], self._get_env_settings(environment)) - log.info("") - - return [environment] - def create_environment_list(self) -> List[Dict[str, Any]]: """ - Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs) that need to be + Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs or scale sets) that need to be created in order to execute these suites. Note that if the runbook provides an 'image', 'location', or 'vm_size', those values override any values provided in the @@ -150,89 +140,261 @@ def create_environment_list(self) -> List[Dict[str, Any]]: runbook_images = self._get_runbook_images(loader) skip_test_suites: List[str] = [] - for suite_info in loader.test_suites: - if self.runbook.cloud in suite_info.skip_on_clouds: - skip_test_suites.append(suite_info.name) + for test_suite_info in loader.test_suites: + if self.runbook.cloud in test_suite_info.skip_on_clouds: + skip_test_suites.append(test_suite_info.name) continue if len(runbook_images) > 0: images_info: List[VmImageInfo] = runbook_images else: - images_info: List[VmImageInfo] = self._get_test_suite_images(suite_info, loader) + images_info: List[VmImageInfo] = self._get_test_suite_images(test_suite_info, loader) for image in images_info: # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter if self._is_vhd(image.urn): - c_marketplace_image = "" - c_vhd = image.urn - image_name = "vhd" + marketplace_image = "" + vhd = image.urn + image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd") else: - c_marketplace_image = image.urn - c_vhd = "" + marketplace_image = image.urn + vhd = "" image_name = self._get_image_name(image.urn) - c_location: str = self._get_location(suite_info, image) - if c_location is None: + location: str = self._get_location(test_suite_info, image) + if location is None: continue - c_vm_size = self._get_vm_size(image) - - # Note: Disabling "W0640: Cell variable 'foo' defined in loop (cell-var-from-loop)". This is a false positive, the closure is OK - # to use, since create_environment() is called within the same iteration of the loop. - # pylint: disable=W0640 - def create_environment(c_env_name: str) -> Dict[str, Any]: - c_vm_tags = {} - if suite_info.template != '': - c_vm_tags["templates"] = suite_info.template - return { - "c_marketplace_image": c_marketplace_image, - "c_location": c_location, - "c_vm_size": c_vm_size, - "c_vhd": c_vhd, - "c_test_suites": [suite_info], - "c_env_name": c_env_name, - "c_marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], - "c_shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], - "c_vm_tags": c_vm_tags - } - # pylint: enable=W0640 - - if suite_info.owns_vm: - # create an environment for exclusive use by this suite - environments.append(create_environment(f"{image_name}-{suite_info.name}")) + vm_size = self._get_vm_size(image) + + if test_suite_info.owns_vm or not test_suite_info.install_test_agent: + # + # Create an environment for exclusive use by this suite + # + # TODO: Allow test suites that set 'install_test_agent' to False to share environments (we need to ensure that + # all the suites in the shared environment have the same value for 'install_test_agent') + # + if test_suite_info.executes_on_scale_set: + env = self.create_vmss_environment( + env_name=f"{image_name}-vmss-{test_suite_info.name}", + marketplace_image=marketplace_image, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + else: + env = self.create_vm_environment( + env_name=f"{image_name}-{test_suite_info.name}", + marketplace_image=marketplace_image, + vhd=vhd, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + environments.append(env) else: # add this suite to the shared environments - key: str = f"{image_name}-{c_location}" - env = shared_environments.get(key) + env_name: str = f"{image_name}-vmss-{location}" if test_suite_info.executes_on_scale_set else f"{image_name}-{location}" + env = shared_environments.get(env_name) if env is not None: - env["c_test_suites"].append(suite_info) - if suite_info.template != '': - vm_tags = env["c_vm_tags"] - if "templates" in vm_tags: - vm_tags["templates"] += ", " + suite_info.template - else: - vm_tags["templates"] = suite_info.template + env["c_test_suites"].append(test_suite_info) else: - shared_environments[key] = create_environment(key) + if test_suite_info.executes_on_scale_set: + # TODO: Add support for VHDs + if vhd != "": + raise Exception("VHDS are currently not supported on scale sets.") + env = self.create_vmss_environment( + env_name=env_name, + marketplace_image=marketplace_image, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + else: + env = self.create_vm_environment( + env_name=env_name, + marketplace_image=marketplace_image, + vhd=vhd, + location=location, + vm_size=vm_size, + test_suite_info=test_suite_info) + shared_environments[env_name] = env + + if test_suite_info.template != '': + vm_tags = env.get("vm_tags") + if vm_tags is not None: + if "templates" not in vm_tags: + vm_tags["templates"] = test_suite_info.template + else: + vm_tags["templates"] += "," + test_suite_info.template environments.extend(shared_environments.values()) if len(environments) == 0: raise Exception("No VM images were found to execute the test suites.") - log: logging.Logger = logging.getLogger("lisa") + # Log a summary of each environment and the suites that will be executed on it + format_suites = lambda suites: ", ".join([s.name for s in suites]) + summary = [f"{e['c_env_name']}: [{format_suites(e['c_test_suites'])}]" for e in environments] + summary.sort() + self._log.info("Executing tests on %d environments\n\n%s\n", len(environments), '\n'.join([f"\t{s}" for s in summary])) + if len(skip_test_suites) > 0: - log.info("") - log.info("Test suites skipped on %s:\n\n\t%s\n", self.runbook.cloud, '\n\t'.join(skip_test_suites)) - log.info("") - log.info("******** Waagent: Test Environments *****") - log.info("") - log.info("Will execute tests on %d environments:\n\n\t%s\n", len(environments), '\n\t'.join([env['c_env_name'] for env in environments])) - for env in environments: - log.info("Settings for %s:\n%s\n", env['c_env_name'], self._get_env_settings(env)) - log.info("") + self._log.info("Skipping test suites %s", skip_test_suites) return environments + def create_existing_vm_environment(self) -> Dict[str, Any]: + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self.runbook.cloud, + location=self.runbook.location, + subscription=self.runbook.subscription_id, + resource_group=self.runbook.resource_group_name, + name=self.runbook.vm_name) + + ip_address = vm.get_ip_address() + + return { + "c_env_name": self.runbook.vm_name, + "c_platform": [ + { + "type": "ready" + } + ], + "c_environment": { + "environments": [ + { + "nodes": [ + { + "type": "remote", + "name": self.runbook.vm_name, + "public_address": ip_address, + "public_port": 22, + "username": self.runbook.user, + "private_key_file": self.runbook.identity_file + } + ], + } + ] + }, + "c_location": self.runbook.location, + "c_test_suites": loader.test_suites, + } + + def create_existing_vmss_environment(self) -> Dict[str, Any]: + loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + + vmss = VirtualMachineScaleSetClient( + cloud=self.runbook.cloud, + location=self.runbook.location, + subscription=self.runbook.subscription_id, + resource_group=self.runbook.resource_group_name, + name=self.runbook.vmss_name) + + ip_addresses = vmss.get_instances_ip_address() + + return { + "c_env_name": self.runbook.vmss_name, + "c_environment": { + "environments": [ + { + "nodes": [ + { + "type": "remote", + "name": i.instance_name, + "public_address": i.ip_address, + "public_port": 22, + "username": self.runbook.user, + "private_key_file": self.runbook.identity_file + } for i in ip_addresses + ], + } + ] + }, + "c_platform": [ + { + "type": "ready" + } + ], + "c_location": self.runbook.location, + "c_test_suites": loader.test_suites, + } + + def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + # + # Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template + # hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "template" and pass the + # names of the custom templates in its value. The hook can then retrieve the value from the Platform object (see wiki for more details). + # We also use a dummy item, "vm_tags" in the environment dictionary in order to concatenate templates from multiple test suites when they + # share the same test environment. + # + vm_tags = {} + if test_suite_info.template != '': + vm_tags["templates"] = test_suite_info.template + return { + "c_platform": [ + { + "type": "azure", + "admin_username": self.runbook.user, + "admin_private_key_file": self.runbook.identity_file, + "keep_environment": self.runbook.keep_environment, + "azure": { + "deploy": True, + "cloud": self.runbook.cloud, + "marketplace_image_information_location": self._MARKETPLACE_IMAGE_INFORMATION_LOCATIONS[self.runbook.cloud], + "shared_resource_group_location": self._SHARED_RESOURCE_GROUP_LOCATIONS[self.runbook.cloud], + "subscription_id": self.runbook.subscription_id, + "wait_delete": False, + "vm_tags": vm_tags + }, + "requirement": { + "core_count": { + "min": 2 + }, + "azure": { + "marketplace": marketplace_image, + "vhd": vhd, + "location": location, + "vm_size": vm_size + } + } + } + ], + + "c_environment": None, + + "c_env_name": env_name, + "c_test_suites": [test_suite_info], + "c_location": location, + "c_image": marketplace_image, + "c_is_vhd": vhd != "", + "vm_tags": vm_tags + } + + def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + return { + "c_platform": [ + { + "type": "ready" + } + ], + + "c_environment": { + "environments": [ + { + "nodes": [ + {"type": "local"} + ], + } + ] + }, + + "c_env_name": env_name, + "c_test_suites": [test_suite_info], + "c_location": location, + "c_image": marketplace_image, + "c_is_vhd": False, + "c_vm_size": vm_size + } + def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: """ Returns the images specified in the runbook, or an empty list if none are specified. @@ -322,6 +484,7 @@ def _get_vm_size(self, image: VmImageInfo) -> str: # Otherwise, set the size to empty and LISA will select an appropriate size. return "" + @staticmethod def _get_image_name(urn: str) -> str: """ @@ -332,11 +495,6 @@ def _get_image_name(urn: str) -> str: raise Exception(f"Invalid URN: {urn}") return f"{match.group('offer')}-{match.group('sku')}" - @staticmethod - def _get_env_settings(environment: Dict[str, Any]): - suite_names = [s.name for s in environment['c_test_suites']] - return '\n'.join([f"\t{name}: {value if name != 'c_test_suites' else suite_names}" for name, value in environment.items()]) - _URN = re.compile(r"(?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)[\s:](?P[^\s:]+)") @staticmethod @@ -349,3 +507,38 @@ def _is_vhd(vhd: str) -> bool: # VHDs are given as URIs to storage; do some basic validation, not intending to be exhaustive. parsed = urllib.parse.urlparse(vhd) return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != "" + + @staticmethod + def _report_test_result( + suite_name: str, + test_name: str, + status: TestStatus, + start_time: datetime.datetime, + message: str = "", + add_exception_stack_trace: bool = False + ) -> None: + """ + Reports a test result to the junit notifier + """ + # The junit notifier requires an initial RUNNING message in order to register the test in its internal cache. + msg: TestResultMessage = TestResultMessage() + msg.type = "AgentTestResultMessage" + msg.id_ = str(uuid.uuid4()) + msg.status = TestStatus.RUNNING + msg.suite_full_name = suite_name + msg.suite_name = msg.suite_full_name + msg.full_name = test_name + msg.name = msg.full_name + msg.elapsed = 0 + + notifier.notify(msg) + + # Now send the actual result. The notifier pipeline makes a deep copy of the message so it is OK to re-use the + # same object and just update a few fields. If using a different object, be sure that the "id_" is the same. + msg.status = status + msg.message = message + if add_exception_stack_trace: + msg.stacktrace = traceback.format_exc() + msg.elapsed = (datetime.datetime.now() - start_time).total_seconds() + + notifier.notify(msg) diff --git a/tests_e2e/orchestrator/lib/update_arm_template_hook.py b/tests_e2e/orchestrator/lib/update_arm_template_hook.py index 2ff910a9a7..fee943de14 100644 --- a/tests_e2e/orchestrator/lib/update_arm_template_hook.py +++ b/tests_e2e/orchestrator/lib/update_arm_template_hook.py @@ -46,7 +46,7 @@ def azure_update_arm_template(self, template: Any, environment: Environment) -> # Add the network security group for the test VM. This group includes a rule allowing SSH access from the current machine. # log.info("******** Waagent: Adding network security rule to the ARM template") - AddNetworkSecurityGroup().update(template) + AddNetworkSecurityGroup().update(template, is_lisa_template=True) # # Apply any template customizations provided by the tests. @@ -60,7 +60,7 @@ def azure_update_arm_template(self, template: Any, environment: Environment) -> for t in test_templates.split(","): update_arm_template = self._get_update_arm_template(t) - update_arm_template().update(template) + update_arm_template().update(template, is_lisa_template=True) _SOURCE_CODE_ROOT: Path = Path(tests_e2e.__path__[0]) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 3492e9c80c..a076264036 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -8,138 +8,187 @@ extension: - "./lib" variable: + # + # The test environments are generated dynamically by the AgentTestSuitesCombinator using the 'platform' and 'environment' variables. + # Most of the variables below are parameters for the combinator and/or the AgentTestSuite (marked as 'is_case_visible'), but a few of + # them, such as the runbook name and the SSH proxy variables, are handled by LISA. + # + # Many of these variables are optional, depending on the scenario. An empty values indicates that the variable has not been specified. + # + + # + # The name of the runbook, it is added as a prefix ("lisa-") to ARM resources created by the test run. + # + # Set the name to your email alias when doing developer runs. + # - name: name value: "WALinuxAgent" + is_case_visible: true + + # + # Test suites to execute + # + - name: test_suites + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + # - # These variables define parameters handled by LISA. + # Parameters used to create test VMs # - name: subscription_id value: "" - - name: user - value: "waagent" - - name: identity_file + is_case_visible: true + - name: cloud + value: "AzureCloud" + is_case_visible: true + - name: location value: "" - is_secret: true - - name: admin_password + - name: image value: "" - is_secret: true + - name: vm_size + value: "" + + # + # Whether to skip deletion of the test VMs after the test run completes. + # + # Possible values: always, no, failed + # - name: keep_environment value: "no" + is_case_visible: true + + # + # Username and SSH public key for the admin user on the test VMs + # + - name: user + value: "waagent" + is_case_visible: true + - name: identity_file + value: "" + is_case_visible: true + # - # These variables define parameters for the AgentTestSuite; see the test wiki for details. + # Set the resource group and vm, or the group and the vmss, to execute the test run on an existing VM or VMSS. # - # NOTE: c_test_suites, generated by the AgentTestSuitesCombinator, is also a parameter - # for the AgentTestSuite + - name: resource_group_name + value: "" + is_case_visible: true + - name: vm_name + value: "" + is_case_visible: true + - name: vmss_name + value: "" + is_case_visible: true + + # + # Directory for test logs # - # Root directory for log files (optional) - name: log_path value: "" is_case_visible: true + # # Whether to collect logs from the test VM + # + # Possible values: always, no, failed + # - name: collect_logs value: "failed" is_case_visible: true - # Whether to skip setup of the test VM + # + # Whether to skip setup of the test VMs. This is useful in developer runs when using existing VMs to save initialization time. + # - name: skip_setup value: false is_case_visible: true # - # These variables are parameters for the AgentTestSuitesCombinator + # These variables are handled by LISA to use an SSH proxy when executing the runbook # - # The test suites to execute - - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" - - name: cloud - value: "AzureCloud" - is_case_visible: true - - name: image - value: "" - - name: location + - name: proxy + value: False + - name: proxy_host value: "" - - name: vm_size + - name: proxy_user + value: "foo" + - name: proxy_identity_file value: "" + is_secret: true # - # The values for these variables are generated by the AgentTestSuitesCombinator combinator. They are + # The variables below are generated by the AgentTestSuitesCombinator combinator. They are # prefixed with "c_" to distinguish them from the rest of the variables, whose value can be set from # the command line. # - # Most of these variables are handled by LISA and are used to define the set of test VMs that need to be - # created. The variables marked with 'is_case_visible' are also referenced by the AgentTestSuite. + + # + # The combinator generates the test environments using these two variables, which are passed to LISA + # + - name: c_environment + value: {} + - name: c_platform + value: [] + # - # 'c_vm_tags' is a special case: it is used by the azure_update_arm_template hook. This hook does not - # have access to the runbook variables, so instead we use a dummy VM tag named "template" to pass the - # name of the custom ARM template that the hook needs to use (see wiki for more details). + # Name of the test environment, used for mainly for logging purposes # - name: c_env_name value: "" is_case_visible: true - - name: c_marketplace_image - value: "" - - name: c_marketplace_image_information_location - value: "" - - name: c_shared_resource_group_location + + # + # Test suites assigned for execution in the current test environment. + # + # The combinator splits the test suites specified in the 'test_suites' variable in subsets and assigns each subset + # to a test environment. The AgentTestSuite uses 'c_test_suites' to execute the suites assigned to the current environment. + # + - name: c_test_suites + value: [] + is_case_visible: true + + # + # These parameters are used by the AgentTestSuite to create the test scale sets. + # + # Note that there are other 3 variables named 'image', 'vm_size' and 'location', which can be passed + # from the command line. The combinator generates the values for these parameters using test metadata, + # but they can be overriden with these command line variables. The final values are passed to the + # AgentTestSuite in the corresponding 'c_*' variables. + # + - name: c_image value: "" + is_case_visible: true - name: c_vm_size value: "" - - name: c_location - value: "" is_case_visible: true - - name: c_vhd + - name: c_location value: "" is_case_visible: true - - name: c_test_suites - value: [] - is_case_visible: true - - name: c_vm_tags - value: {} # - # Set these variables to use an SSH proxy when executing the runbook + # True if the image is a VHD (instead of a URN) # - - name: proxy - value: False - - name: proxy_host - value: "" - - name: proxy_user - value: "foo" - - name: proxy_identity_file - value: "" - is_secret: true + - name: c_is_vhd + value: false + is_case_visible: true -platform: - - type: azure - admin_username: $(user) - admin_private_key_file: $(identity_file) - admin_password: $(admin_password) - keep_environment: $(keep_environment) - azure: - deploy: True - cloud: $(cloud) - marketplace_image_information_location: $(c_marketplace_image_information_location) - shared_resource_group_location: $(c_shared_resource_group_location) - subscription_id: $(subscription_id) - wait_delete: false - vm_tags: $(c_vm_tags) - requirement: - core_count: - min: 2 - azure: - marketplace: $(c_marketplace_image) - vhd: $(c_vhd) - location: $(c_location) - vm_size: $(c_vm_size) +environment: $(c_environment) + +platform: $(c_platform) combinator: type: agent_test_suites - test_suites: $(test_suites) cloud: $(cloud) + identity_file: $(identity_file) image: $(image) + keep_environment: $(keep_environment) location: $(location) + resource_group_name: $(resource_group_name) + subscription_id: $(subscription_id) + test_suites: $(test_suites) + user: $(user) + vm_name: $(vm_name) vm_size: $(vm_size) + vmss_name: $(vmss_name) concurrency: 32 diff --git a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml b/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml deleted file mode 100644 index 8ef5baba28..0000000000 --- a/tests_e2e/orchestrator/sample_runbooks/existing_vm.yml +++ /dev/null @@ -1,149 +0,0 @@ -# Microsoft Azure Linux Agent -# -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# Executes the test suites on an existing VM -# -name: ExistingVM - -testcase: - - criteria: - area: waagent - -extension: - - "../lib" - -variable: - # - # These variables identify the existing VM, and the user for SSH connections - # - - name: cloud - value: "AzureCloud" - is_case_visible: true - - name: subscription_id - value: "" - - name: resource_group_name - value: "" - - name: vm_name - value: "" - - name: location - value: "" - - - name: user - value: "" - - name: identity_file - value: "" - is_secret: true - - # - # The test suites to execute - # - - name: test_suites - value: "agent_bvt" - - # - # These variables define parameters for the AgentTestSuite; see the test wiki for details. - # - # NOTE: c_test_suites, generated by the AgentTestSuitesCombinator, is also a parameter - # for the AgentTestSuite - # - # Root directory for log files (optional) - - name: log_path - value: "" - is_case_visible: true - - # Whether to collect logs from the test VM - - name: collect_logs - value: "failed" - is_case_visible: true - - # Whether to skip setup of the test VM - - name: skip_setup - value: false - is_case_visible: true - - # - # The values for these variables are generated by the AgentTestSuitesCombinator. See - # tests_e2e/orchestrator/runbook.yml for details. - # - - name: c_env_name - value: "" - is_case_visible: true - - name: c_vm_name - value: "" - - name: c_marketplace_image_information_location - value: "" - - name: c_shared_resource_group_location - value: "" - - name: c_location - value: "" - is_case_visible: true - - name: c_test_suites - value: [] - is_case_visible: true - - name: c_vm_tags - value: {} - - # - # Set these variables to use an SSH proxy when executing the runbook - # - - name: proxy - value: False - - name: proxy_host - value: "" - - name: proxy_user - value: "foo" - - name: proxy_identity_file - value: "" - is_secret: true - -platform: - - type: azure - admin_username: $(user) - admin_private_key_file: $(identity_file) - azure: - cloud: $(cloud) - marketplace_image_information_location: $(c_marketplace_image_information_location) - shared_resource_group_location: $(c_shared_resource_group_location) - resource_group_name: $(resource_group_name) - deploy: false - subscription_id: $(subscription_id) - vm_tags: $(c_vm_tags) - requirement: - azure: - name: $(c_vm_name) - location: $(c_location) - -combinator: - type: agent_test_suites - test_suites: $(test_suites) - cloud: $(cloud) - location: $(location) - vm_name: $(vm_name) - -notifier: - - type: env_stats - - type: agent.junit - -dev: - enabled: $(proxy) - mock_tcp_ping: $(proxy) - jump_boxes: - - private_key_file: $(proxy_identity_file) - address: $(proxy_host) - username: $(proxy_user) - password: "dummy" diff --git a/tests_e2e/orchestrator/templates/vmss.json b/tests_e2e/orchestrator/templates/vmss.json new file mode 100644 index 0000000000..293edf80c1 --- /dev/null +++ b/tests_e2e/orchestrator/templates/vmss.json @@ -0,0 +1,253 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "username": { + "type": "string" + }, + "sshPublicKey": { + "type": "string" + }, + "vmName": { + "type": "string" + }, + "scenarioPrefix": { + "type": "string", + "defaultValue": "e2e-test" + }, + "publisher": { + "type": "string" + }, + "offer": { + "type": "string" + }, + "sku": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "variables": { + "nicName": "[concat(parameters('scenarioPrefix'),'Nic')]", + "vnetAddressPrefix": "10.130.0.0/16", + "subnetName": "[concat(parameters('scenarioPrefix'),'Subnet')]", + "subnetPrefix": "10.130.0.0/24", + "publicIPAddressName": "[concat(parameters('scenarioPrefix'),'PublicIp')]", + "lbIpName": "[concat(parameters('scenarioPrefix'),'PublicLbIp')]", + "virtualNetworkName": "[concat(parameters('scenarioPrefix'),'Vnet')]", + "lbName": "[concat(parameters('scenarioPrefix'),'lb')]", + "lbIpId": "[resourceId('Microsoft.Network/publicIPAddresses', variables('lbIpName'))]", + "bepoolName": "[concat(variables('lbName'), 'bepool')]", + "natpoolName": "[concat(variables('lbName'), 'natpool')]", + "feIpConfigName": "[concat(variables('lbName'), 'fepool', 'IpConfig')]", + "sshProbeName": "[concat(variables('lbName'), 'probe')]", + "vnetID": "[resourceId('Microsoft.Network/virtualNetworks',variables('virtualNetworkName'))]", + "subnetRef": "[concat(variables('vnetID'),'/subnets/',variables('subnetName'))]", + "lbId": "[resourceId('Microsoft.Network/loadBalancers', variables('lbName'))]", + "bepoolID": "[concat(variables('lbId'), '/backendAddressPools/', variables('bepoolName'))]", + "natpoolID": "[concat(variables('lbId'), '/inboundNatPools/', variables('natpoolName'))]", + "feIpConfigId": "[concat(variables('lbId'), '/frontendIPConfigurations/', variables('feIpConfigName'))]", + "sshProbeId": "[concat(variables('lbId'), '/probes/', variables('sshProbeName'))]", + "sshKeyPath": "[concat('/home/', parameters('username'), '/.ssh/authorized_keys')]" + }, + "resources": [ + { + "apiVersion": "2023-06-01", + "type": "Microsoft.Network/virtualNetworks", + "name": "[variables('virtualNetworkName')]", + "location": "[resourceGroup().location]", + "properties": { + "addressSpace": { + "addressPrefixes": [ + "[variables('vnetAddressPrefix')]" + ] + }, + "subnets": [ + { + "name": "[variables('subnetName')]", + "properties": { + "addressPrefix": "[variables('subnetPrefix')]" + } + } + ] + } + }, + { + "type": "Microsoft.Network/publicIPAddresses", + "name": "[variables('lbIpName')]", + "location": "[resourceGroup().location]", + "apiVersion": "2023-06-01", + "properties": { + "publicIPAllocationMethod": "Dynamic", + "dnsSettings": { + "domainNameLabel": "[parameters('vmName')]" + } + } + }, + { + "type": "Microsoft.Network/loadBalancers", + "name": "[variables('lbName')]", + "location": "[resourceGroup().location]", + "apiVersion": "2020-06-01", + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/publicIPAddresses/', variables('lbIpName'))]" + ], + "properties": { + "frontendIPConfigurations": [ + { + "name": "[variables('feIpConfigName')]", + "properties": { + "PublicIpAddress": { + "id": "[variables('lbIpId')]" + } + } + } + ], + "backendAddressPools": [ + { + "name": "[variables('bepoolName')]" + } + ], + "inboundNatPools": [ + { + "name": "[variables('natpoolName')]", + "properties": { + "FrontendIPConfiguration": { + "Id": "[variables('feIpConfigId')]" + }, + "BackendPort": 22, + "Protocol": "tcp", + "FrontendPortRangeStart": 3500, + "FrontendPortRangeEnd": 4500 + } + } + ], + "loadBalancingRules": [ + { + "name": "ProbeRule", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('feIpConfigId')]" + }, + "backendAddressPool": { + "id": "[variables('bepoolID')]" + }, + "protocol": "Tcp", + "frontendPort": 80, + "backendPort": 80, + "idleTimeoutInMinutes": 5, + "probe": { + "id": "[variables('sshProbeId')]" + } + } + } + ], + "probes": [ + { + "name": "[variables('sshProbeName')]", + "properties": { + "protocol": "tcp", + "port": 22, + "intervalInSeconds": 5, + "numberOfProbes": 2 + } + } + ] + } + }, + { + "apiVersion": "2023-03-01", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": "[parameters('vmName')]", + "location": "[resourceGroup().location]", + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/loadBalancers/', variables('lbName'))]" + ], + "sku": { + "name": "Standard_D2s_v3", + "tier": "Standard", + "capacity": 3 + }, + "properties": { + "orchestrationMode": "Uniform", + "overprovision": false, + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + }, + "osProfile": { + "computerNamePrefix": "[parameters('vmName')]", + "adminUsername": "[parameters('username')]", + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "ssh": { + "publicKeys": [ + { + "path": "[variables('sshKeyPath')]", + "keyData": "[parameters('sshPublicKey')]" + } + ] + } + } + }, + "storageProfile": { + "osDisk": { + "osType": "Linux", + "createOption": "FromImage", + "caching": "ReadWrite", + "managedDisk": { + "storageAccountType": "Premium_LRS" + }, + "diskSizeGB": 64 + }, + "imageReference": { + "publisher": "[parameters('publisher')]", + "offer": "[parameters('offer')]", + "sku": "[parameters('sku')]", + "version": "[parameters('version')]" + } + }, + "diagnosticsProfile": { + "bootDiagnostics": { + "enabled": true + } + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[variables('nicName')]", + "properties": { + "primary": true, + "ipConfigurations": [ + { + "name": "ipconfig1", + "properties": { + "primary": true, + "publicIPAddressConfiguration": { + "name": "[variables('publicIPAddressName')]", + "properties": { + "idleTimeoutInMinutes": 15 + } + }, + "subnet": { + "id": "[variables('subnetRef')]" + } + } + } + ] + } + } + ] + } + }, + "upgradePolicy": { + "mode": "Automatic" + }, + "platformFaultDomainCount": 1 + } + } + ] +} diff --git a/tests_e2e/test_suites/vmss.yml b/tests_e2e/test_suites/vmss.yml new file mode 100644 index 0000000000..d9ca6be01f --- /dev/null +++ b/tests_e2e/test_suites/vmss.yml @@ -0,0 +1,8 @@ +# +# Sample test for scale sets +# +name: "VMSS" +tests: + - "samples/vmss_test.py" +executes_on_scale_set: true +images: "ubuntu_2004" diff --git a/tests_e2e/tests/agent_bvt/extension_operations.py b/tests_e2e/tests/agent_bvt/extension_operations.py index e5c607c1d1..52f39c7755 100755 --- a/tests_e2e/tests/agent_bvt/extension_operations.py +++ b/tests_e2e/tests/agent_bvt/extension_operations.py @@ -31,19 +31,16 @@ from azure.core.exceptions import ResourceNotFoundError -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionOperationsBvt(AgentTest): +class ExtensionOperationsBvt(AgentVmTest): def run(self): - ssh_client: SshClient = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + ssh_client: SshClient = self._context.create_ssh_client() is_arm64: bool = ssh_client.get_architecture() == "aarch64" diff --git a/tests_e2e/tests/agent_bvt/run_command.py b/tests_e2e/tests/agent_bvt/run_command.py index 494458eab4..df5cdcf2b4 100755 --- a/tests_e2e/tests/agent_bvt/run_command.py +++ b/tests_e2e/tests/agent_bvt/run_command.py @@ -31,14 +31,14 @@ from assertpy import assert_that, soft_assertions from typing import Callable, Dict -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class RunCommandBvt(AgentTest): +class RunCommandBvt(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension diff --git a/tests_e2e/tests/agent_bvt/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py index 9b52ac2453..c36aef132b 100755 --- a/tests_e2e/tests/agent_bvt/vm_access.py +++ b/tests_e2e/tests/agent_bvt/vm_access.py @@ -28,18 +28,18 @@ from assertpy import assert_that from pathlib import Path -from tests_e2e.tests.lib.agent_test import AgentTest, TestSkipped -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest, TestSkipped +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class VmAccessBvt(AgentTest): +class VmAccessBvt(AgentVmTest): def run(self): - ssh: SshClient = self._context.create_ssh_client() - if not VmExtensionIds.VmAccess.supports_distro(ssh.run_command("uname -a")): + ssh_client: SshClient = self._context.create_ssh_client() + if not VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): raise TestSkipped("Currently VMAccess is not supported on this distro") # Try to use a unique username for each test run (note that we truncate to 32 chars to @@ -52,8 +52,8 @@ def run(self): private_key_file: Path = self._context.working_directory/f"{username}_rsa" public_key_file: Path = self._context.working_directory/f"{username}_rsa.pub" log.info("Generating SSH key as %s", private_key_file) - ssh = SshClient(ip_address=self._context.vm_ip_address, username=username, private_key_file=private_key_file) - ssh.generate_ssh_key(private_key_file) + ssh_client = SshClient(ip_address=self._context.ip_address, username=username, identity_file=private_key_file) + ssh_client.generate_ssh_key(private_key_file) with public_key_file.open() as f: public_key = f.read() @@ -70,7 +70,7 @@ def run(self): # Verify the user was added correctly by starting an SSH session to the VM log.info("Verifying SSH connection to the test VM") - stdout = ssh.run_command("echo -n $USER") + stdout = ssh_client.run_command("echo -n $USER") assert_that(stdout).described_as("Output from SSH command").is_equal_to(username) log.info("SSH command output ($USER): %s", stdout) diff --git a/tests_e2e/tests/agent_cgroups/agent_cgroups.py b/tests_e2e/tests/agent_cgroups/agent_cgroups.py index c0394f6c62..449c5c3629 100644 --- a/tests_e2e/tests/agent_cgroups/agent_cgroups.py +++ b/tests_e2e/tests/agent_cgroups/agent_cgroups.py @@ -16,17 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class AgentCgroups(AgentTest): +class AgentCgroups(AgentVmTest): """ This test verifies that the agent is running in the expected cgroups. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() @@ -35,7 +35,7 @@ def run(self): log.info("Restarting agent service to make sure service starts with new configuration that was setup by the cgroupconfigurator") self._ssh_client.run_command("agent-service restart", use_sudo=True) log.info("=====Validating agent cgroups=====") - self._run_remote_test("agent_cgroups-check_cgroups_agent.py") + self._run_remote_test(self._ssh_client, "agent_cgroups-check_cgroups_agent.py") log.info("Successfully Verified that agent present in correct cgroups") diff --git a/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py b/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py index 79f95fc547..be66428b91 100644 --- a/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py +++ b/tests_e2e/tests/agent_cgroups/agent_cpu_quota.py @@ -1,20 +1,21 @@ from typing import List, Dict, Any -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class AgentCPUQuota(AgentTest): +class AgentCPUQuota(AgentVmTest): """ The test verify that the agent detects when it is throttled for using too much CPU, that it detects processes that do belong to the agent's cgroup, and that resource metrics are generated. """ - def __init__(self, context): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() def run(self): log.info("=====Validating agent cpu quota checks") - self._run_remote_test("agent_cpu_quota-check_agent_cpu_quota.py", use_sudo=True) + self._run_remote_test(self._ssh_client, "agent_cpu_quota-check_agent_cpu_quota.py", use_sudo=True) log.info("Successfully Verified that agent running in expected CPU quotas") def get_ignore_error_rules(self) -> List[Dict[str, Any]]: diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py index 8c08ea7d3c..98f14e2832 100644 --- a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -23,15 +23,15 @@ from random import choice import uuid -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext -from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionWorkflow(AgentTest): +class ExtensionWorkflow(AgentVmTest): """ This scenario tests if the correct extension workflow sequence is being executed from the agent. It installs the GuestAgentDcrTestExtension on the test VM and makes requests to install, enable, update, and delete the extension @@ -59,12 +59,9 @@ class ExtensionWorkflow(AgentTest): - Match the operation sequence as per the test and make sure they are in the correct chronological order - Restart the agent and verify if the correct operation sequence is followed """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) - self._ssh_client = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + self._ssh_client = context.create_ssh_client() # This class represents the GuestAgentDcrTestExtension running on the test VM class GuestAgentDcrTestExtension: diff --git a/tests_e2e/tests/agent_firewall/agent_firewall.py b/tests_e2e/tests/agent_firewall/agent_firewall.py index 804443a470..c5b789dea7 100644 --- a/tests_e2e/tests/agent_firewall/agent_firewall.py +++ b/tests_e2e/tests/agent_firewall/agent_firewall.py @@ -16,23 +16,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class AgentFirewall(AgentTest): +class AgentFirewall(AgentVmTest): """ This test verifies the agent firewall rules are added properly. It checks each firewall rule is present and working as expected. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() def run(self): log.info("Checking iptable rules added by the agent") - self._run_remote_test(f"agent_firewall-verify_all_firewall_rules.py --user {self._context.username}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_firewall-verify_all_firewall_rules.py --user {self._context.username}", use_sudo=True) log.info("Successfully verified all rules present and working as expected.") diff --git a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py index 490fba3b8d..103c8b44cf 100755 --- a/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py +++ b/tests_e2e/tests/agent_not_provisioned/agent_not_provisioned.py @@ -21,16 +21,15 @@ from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class AgentNotProvisioned(AgentTest): +class AgentNotProvisioned(AgentVmTest): """ When osProfile.linuxConfiguration.provisionVMAgent is set to 'false', this test verifies that the agent is disabled and that extension operations are not allowed. @@ -66,8 +65,7 @@ def run(self): # Validate that the agent is not reporting status. # log.info("Verifying that the Agent status is 'Not Ready' (i.e. it is not reporting status).") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - instance_view: VirtualMachineInstanceView = vm.get_instance_view() + instance_view: VirtualMachineInstanceView = self._context.vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) assert_that(instance_view.vm_agent.statuses[0].code).described_as("The VM Agent should not be available").is_equal_to('ProvisioningState/Unavailable') diff --git a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py index 8de9e55967..6f0a562cd2 100755 --- a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py +++ b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py @@ -22,11 +22,14 @@ from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate -class DenyOutboundConnections(UpdateArmTemplate): +class DisableAgentProvisioning(UpdateArmTemplate): """ Updates the ARM template to set osProfile.linuxConfiguration.provisionVMAgent to false. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: + if not is_lisa_template: + raise Exception('This test can only customize LISA ARM templates.') + # # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is # under the 'lisa' namespace. diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index 91befd63b5..0736a8fe8b 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -20,20 +20,20 @@ from datetime import datetime from typing import Any, Dict, List -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext -from tests_e2e.tests.lib.identifiers import VmExtensionIds, VmExtensionIdentifier +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class AgentPublishTest(AgentTest): +class AgentPublishTest(AgentVmTest): """ This script verifies if the agent update performed in the vm. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client: SshClient = self._context.create_ssh_client() @@ -62,12 +62,12 @@ def _get_agent_info(self) -> None: def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags") - self._run_remote_test("update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y", use_sudo=True) + self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y", use_sudo=True) log.info('Updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: log.info("Verifying for agent update status") - self._run_remote_test("agent_publish-check_update.py") + self._run_remote_test(self._ssh_client, "agent_publish-check_update.py") log.info('Successfully checked the agent update') def _check_cse(self) -> None: diff --git a/tests_e2e/tests/agent_status/agent_status.py b/tests_e2e/tests/agent_status/agent_status.py index b9caef8f32..c02a3f4bf1 100644 --- a/tests_e2e/tests/agent_status/agent_status.py +++ b/tests_e2e/tests/agent_status/agent_status.py @@ -28,18 +28,17 @@ from time import sleep import json -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient class RetryableAgentStatusException(BaseException): pass -class AgentStatus(AgentTest): - def __init__(self, context: AgentTestContext): +class AgentStatus(AgentVmTest): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() @@ -139,8 +138,6 @@ def run(self): log.info("") log.info("*******Verifying the agent status updates 3 times*******") - vm = VirtualMachineClient(self._context.vm) - timeout = datetime.now() + timedelta(minutes=6) instance_view_exception = None status_updated = 0 @@ -149,7 +146,7 @@ def run(self): # Retry validating agent status updates 2 times with timeout of 6 minutes while datetime.now() <= timeout and status_updated < 2: - instance_view = vm.get_instance_view() + instance_view = self._context.vm.get_instance_view() log.info("") log.info( "Check instance view to validate that the Guest Agent reports valid status...") diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index d31e8ce3e6..8325599910 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -33,23 +33,19 @@ from azure.mgmt.compute.models import VirtualMachine from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false -from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class RsmUpdateBvt(AgentTest): +class RsmUpdateBvt(AgentVmTest): - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) - self._ssh_client = SshClient( - ip_address=self._context.vm_ip_address, - username=self._context.username, - private_key_file=self._context.private_key_file) + self._ssh_client = self._context.create_ssh_client() self._installed_agent_version = "9.9.9.9" self._downgrade_version = "9.9.9.9" @@ -131,7 +127,7 @@ def run(self) -> None: def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we send the rsm update request log.info('Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') - self._run_remote_test(f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) log.info('Verified latest GS contain requested version after rsm update requested') def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: @@ -141,11 +137,11 @@ def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ log.info('Executing modify_agent_version remote script to update agent installed version to lower than requested version') - self._run_remote_test(f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) log.info('Successfully updated agent installed version') if update_config: log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') - self._run_remote_test("update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) + self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Successfully updated agent update config') @staticmethod @@ -175,11 +171,10 @@ def _request_rsm_update(self, requested_version: str) -> None: This method is to simulate the rsm request. First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api """ - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - if not self._verify_agent_update_flag_enabled(vm): + if not self._verify_agent_update_flag_enabled(self._context.vm): # enable the flag log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") - self._enable_agent_update_flag(vm) + self._enable_agent_update_flag(self._context.vm) log.info("Updated the enableVMAgentPlatformUpdates flag to True") else: log.info("Already enableVMAgentPlatformUpdates flag set to True") @@ -240,7 +235,7 @@ def _verify_agent_reported_supported_feature_flag(self): """ log.info("Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") - self._run_remote_test("agent_update-verify_versioning_supported_feature.py", use_sudo=True) + self._run_remote_test(self._ssh_client, "agent_update-verify_versioning_supported_feature.py", use_sudo=True) log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag") def _verify_agent_reported_update_status(self, version: str): @@ -249,7 +244,7 @@ def _verify_agent_reported_update_status(self, version: str): """ log.info("Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format(version)) - self._run_remote_test(f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + self._run_remote_test(self._ssh_client, f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) log.info("Successfully Agent reported update status for version {0}".format(version)) def _retrieve_installed_agent_version(self): diff --git a/tests_e2e/tests/ext_cgroups/ext_cgroups.py b/tests_e2e/tests/ext_cgroups/ext_cgroups.py index 33092ca41e..94a0c97258 100644 --- a/tests_e2e/tests/ext_cgroups/ext_cgroups.py +++ b/tests_e2e/tests/ext_cgroups/ext_cgroups.py @@ -17,17 +17,17 @@ # limitations under the License. # from tests_e2e.tests.ext_cgroups.install_extensions import InstallExtensions -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext from tests_e2e.tests.lib.logging import log -class ExtCgroups(AgentTest): +class ExtCgroups(AgentVmTest): """ This test verifies the installed extensions assigned correctly in their cgroups. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client = self._context.create_ssh_client() @@ -35,7 +35,7 @@ def run(self): log.info("=====Installing extensions to validate ext cgroups scenario") InstallExtensions(self._context).run() log.info("=====Executing remote script check_cgroups_extensions.py to validate extension cgroups") - self._run_remote_test("ext_cgroups-check_cgroups_extensions.py", use_sudo=True) + self._run_remote_test(self._ssh_client, "ext_cgroups-check_cgroups_extensions.py", use_sudo=True) log.info("Successfully verified that extensions present in correct cgroup") diff --git a/tests_e2e/tests/ext_cgroups/install_extensions.py b/tests_e2e/tests/ext_cgroups/install_extensions.py index 6617730ed0..aebc6e3c03 100644 --- a/tests_e2e/tests/ext_cgroups/install_extensions.py +++ b/tests_e2e/tests/ext_cgroups/install_extensions.py @@ -19,8 +19,8 @@ from datetime import datetime, timedelta from pathlib import Path -from tests_e2e.tests.lib.agent_test_context import AgentTestContext -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient @@ -30,7 +30,7 @@ class InstallExtensions: This test installs the multiple extensions in order to verify extensions cgroups in the next test. """ - def __init__(self, context: AgentTestContext): + def __init__(self, context: AgentVmTestContext): self._context = context self._ssh_client = self._context.create_ssh_client() @@ -67,7 +67,7 @@ def _install_ama(self): def _install_vmaccess(self): # fetch the public key - public_key_file: Path = Path(self._context.private_key_file).with_suffix(".pub") + public_key_file: Path = Path(self._context.identity_file).with_suffix(".pub") with public_key_file.open() as f: public_key = f.read() # Invoke the extension diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index de051485ad..809c32f592 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -29,14 +29,14 @@ from azurelinuxagent.common.conf import get_etp_collection_period -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtTelemetryPipeline(AgentTest): +class ExtTelemetryPipeline(AgentVmTest): def run(self): ssh_client: SshClient = self._context.create_ssh_client() @@ -77,7 +77,8 @@ def run(self): log.info("") log.info("Add good extension events and check they are reported...") max_events = random.randint(10, 50) - self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + self._run_remote_test(ssh_client, + f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events}", use_sudo=True) log.info("") @@ -86,7 +87,8 @@ def run(self): # Add invalid events for each extension and check that the TelemetryEventsCollector drops them log.info("") log.info("Add bad extension events and check they are reported...") - self._run_remote_test(f"ext_telemetry_pipeline-add_extension_events.py " + self._run_remote_test(ssh_client, + f"ext_telemetry_pipeline-add_extension_events.py " f"--extensions {','.join(extensions)} " f"--num_events_total {max_events} " f"--num_events_bad {random.randint(5, max_events-5)}", use_sudo=True) diff --git a/tests_e2e/tests/extensions_disabled/extensions_disabled.py b/tests_e2e/tests/extensions_disabled/extensions_disabled.py index 27c62427ab..002d83357c 100755 --- a/tests_e2e/tests/extensions_disabled/extensions_disabled.py +++ b/tests_e2e/tests/extensions_disabled/extensions_disabled.py @@ -31,15 +31,14 @@ from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class ExtensionsDisabled(AgentTest): +class ExtensionsDisabled(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, settings: Any): self.extension = extension @@ -109,9 +108,8 @@ def run(self): # # Validate that the agent continued reporting status even if it is not processing extensions # - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) log.info("") - instance_view: VirtualMachineInstanceView = vm.get_instance_view() + instance_view: VirtualMachineInstanceView = self._context.vm.get_instance_view() log.info("Instance view of VM Agent:\n%s", instance_view.vm_agent.serialize()) assert_that(instance_view.vm_agent.statuses).described_as("The VM agent should have exactly 1 status").is_length(1) assert_that(instance_view.vm_agent.statuses[0].display_status).described_as("The VM Agent should be ready").is_equal_to('Ready') diff --git a/tests_e2e/tests/fips/fips.py b/tests_e2e/tests/fips/fips.py index 9f490de4ca..a5e2438a4a 100755 --- a/tests_e2e/tests/fips/fips.py +++ b/tests_e2e/tests/fips/fips.py @@ -20,16 +20,15 @@ import uuid from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds -class Fips(AgentTest): +class Fips(AgentVmTest): """ Enables FIPS on the test VM, which is Mariner 2 VM, and verifies that extensions with protected settings are handled correctly under FIPS. """ @@ -45,8 +44,7 @@ def run(self): raise Exception(f"Failed to enable FIPS: {e}") log.info("Restarting test VM") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - vm.restart(wait_for_boot=True, ssh_client=ssh_client) + self._context.vm.restart(wait_for_boot=True, ssh_client=ssh_client) try: command = "fips-check_fips_mariner" diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py index 676d7ed249..7be3f272c0 100755 --- a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -22,14 +22,13 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class KeyvaultCertificates(AgentTest): +class KeyvaultCertificates(AgentVmTest): def run(self): test_certificates = { 'C49A06B3044BD1778081366929B53EBF154133B3': { @@ -59,8 +58,6 @@ def run(self): else: log.info("Some test certificates had already been downloaded to the test VM (they have been deleted now):\n%s", existing_certificates) - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) - osprofile = { "location": self._context.vm.location, "properties": { @@ -77,13 +74,13 @@ def run(self): } } log.info("updating the vm's osProfile with the certificates to download:\n%s", osprofile) - vm.update(osprofile) + self._context.vm.update(osprofile) # If the test has already run on the VM, force a new goal state to ensure the certificates are downloaded since the VM model most likely already had the certificates # and the update operation would not have triggered a goal state if existing_certificates != "": log.info("Reapplying the goal state to ensure the test certificates are downloaded.") - vm.reapply() + self._context.vm.reapply() try: output = ssh_client.run_command(f"ls {expected_certificates}", use_sudo=True) diff --git a/tests_e2e/tests/lib/add_network_security_group.py b/tests_e2e/tests/lib/add_network_security_group.py index 28cf69b59f..4d46cf7ca5 100644 --- a/tests_e2e/tests/lib/add_network_security_group.py +++ b/tests_e2e/tests/lib/add_network_security_group.py @@ -32,14 +32,14 @@ class AddNetworkSecurityGroup(UpdateArmTemplate): """ Updates the ARM template to add a network security group allowing SSH access from the current machine. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: resources: List[Dict[str, Any]] = template["resources"] # Append the NSG to the list of resources network_security_group = json.loads(f"""{{ "type": "Microsoft.Network/networkSecurityGroups", "name": "{NETWORK_SECURITY_GROUP}", - "location": "[parameters('location')]", + "location": "[resourceGroup().location]", "apiVersion": "2020-05-01", "properties": {{ "securityRules": [] @@ -66,72 +66,106 @@ def update(self, template: Dict[str, Any]) -> None: except Exception as e: log.warning("******** Waagent: Failed to create Allow security rule for SSH, skipping rule: %s", e) - - # - # Add reference to the NSG to the properties of the subnets. - # - # The subnets are a copy property of the virtual network in LISA's ARM template: # - # { - # "condition": "[empty(parameters('virtual_network_resource_group'))]", - # "apiVersion": "2020-05-01", - # "type": "Microsoft.Network/virtualNetworks", - # "name": "[parameters('virtual_network_name')]", - # "location": "[parameters('location')]", - # "properties": { - # "addressSpace": { - # "addressPrefixes": [ - # "10.0.0.0/16" - # ] - # }, - # "copy": [ - # { - # "name": "subnets", - # "count": "[parameters('subnet_count')]", - # "input": { - # "name": "[concat(parameters('subnet_prefix'), copyIndex('subnets'))]", - # "properties": { - # "addressPrefix": "[concat('10.0.', copyIndex('subnets'), '.0/24')]" - # } - # } - # } - # ] - # } - # } + # Add a dependency on the NSG to the virtual network # network_resource = self._get_resource(resources, "Microsoft.Network/virtualNetworks") - - # Add a dependency on the NSG - nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" network_resource_dependencies = network_resource.get("dependsOn") + nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" if network_resource_dependencies is None: network_resource["dependsOn"] = [nsg_reference] else: network_resource_dependencies.append(nsg_reference) - subnets_copy = network_resource["properties"].get("copy") if network_resource.get("properties") is not None else None - if subnets_copy is None: - raise Exception("Cannot find the copy property of the virtual network in the ARM template") - - subnets = [i for i in subnets_copy if "name" in i and i["name"] == 'subnets'] - if len(subnets) == 0: - raise Exception("Cannot find the subnets of the virtual network in the ARM template") - - subnets_input = subnets[0].get("input") - if subnets_input is None: - raise Exception("Cannot find the input property of the subnets in the ARM template") - + # + # Add a reference to the NSG to the properties of the subnets. + # nsg_reference = json.loads(f"""{{ "networkSecurityGroup": {{ "id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" }} }}""") - subnets_properties = subnets_input.get("properties") - if subnets_properties is None: - subnets_input["properties"] = nsg_reference + if is_lisa_template: + # The subnets are a copy property of the virtual network in LISA's ARM template: + # + # { + # "condition": "[empty(parameters('virtual_network_resource_group'))]", + # "apiVersion": "2020-05-01", + # "type": "Microsoft.Network/virtualNetworks", + # "name": "[parameters('virtual_network_name')]", + # "location": "[parameters('location')]", + # "properties": { + # "addressSpace": { + # "addressPrefixes": [ + # "10.0.0.0/16" + # ] + # }, + # "copy": [ + # { + # "name": "subnets", + # "count": "[parameters('subnet_count')]", + # "input": { + # "name": "[concat(parameters('subnet_prefix'), copyIndex('subnets'))]", + # "properties": { + # "addressPrefix": "[concat('10.0.', copyIndex('subnets'), '.0/24')]" + # } + # } + # } + # ] + # } + # } + # + subnets_copy = network_resource["properties"].get("copy") if network_resource.get("properties") is not None else None + if subnets_copy is None: + raise Exception("Cannot find the copy property of the virtual network in the ARM template") + + subnets = [i for i in subnets_copy if "name" in i and i["name"] == 'subnets'] + if len(subnets) == 0: + raise Exception("Cannot find the subnets of the virtual network in the ARM template") + + subnets_input = subnets[0].get("input") + if subnets_input is None: + raise Exception("Cannot find the input property of the subnets in the ARM template") + + subnets_properties = subnets_input.get("properties") + if subnets_properties is None: + subnets_input["properties"] = nsg_reference + else: + subnets_properties.update(nsg_reference) else: - subnets_properties.update(nsg_reference) + # + # The subnets are simple property of the virtual network in template for scale sets: + # { + # "apiVersion": "2023-06-01", + # "type": "Microsoft.Network/virtualNetworks", + # "name": "[variables('virtualNetworkName')]", + # "location": "[resourceGroup().location]", + # "properties": { + # "addressSpace": { + # "addressPrefixes": [ + # "[variables('vnetAddressPrefix')]" + # ] + # }, + # "subnets": [ + # { + # "name": "[variables('subnetName')]", + # "properties": { + # "addressPrefix": "[variables('subnetPrefix')]", + # } + # } + # ] + # } + # } + subnets = network_resource["properties"].get("subnets") if network_resource.get("properties") is not None else None + if subnets is None: + raise Exception("Cannot find the subnets property of the virtual network in the ARM template") + + subnets_properties = subnets[0].get("properties") + if subnets_properties is None: + subnets["properties"] = nsg_reference + else: + subnets_properties.update(nsg_reference) @property def _my_ip_address(self) -> str: diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index dcfc4db98e..0021a8d74b 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -25,7 +25,7 @@ from assertpy import fail from typing import Any, Dict, List -from tests_e2e.tests.lib.agent_test_context import AgentTestContext +from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import FAIL_EXIT_CODE from tests_e2e.tests.lib.shell import CommandError @@ -47,18 +47,21 @@ class RemoteTestError(CommandError): class AgentTest(ABC): """ - Defines the interface for agent tests, which are simply constructed from an AgentTestContext and expose a single method, - run(), to execute the test. + Abstract base class for Agent tests """ def __init__(self, context: AgentTestContext): - self._context = context + self._context: AgentTestContext = context @abstractmethod def run(self): - pass + """ + Test must define this method, which is used to execute the test. + """ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: - # Tests can override this method to return a list with rules to ignore errors in the agent log (see agent_log.py for sample rules). + """ + Tests can override this method to return a list with rules to ignore errors in the agent log (see agent_log.py for sample rules). + """ return [] def get_ignore_errors_before_timestamp(self) -> datetime: @@ -69,10 +72,15 @@ def get_ignore_errors_before_timestamp(self) -> datetime: def run_from_command_line(cls): """ Convenience method to execute the test when it is being invoked directly from the command line (as opposed as - being invoked from a test framework or library. + being invoked from a test framework or library.) """ try: - cls(AgentTestContext.from_args()).run() + if issubclass(cls, AgentVmTest): + cls(AgentVmTestContext.from_args()).run() + elif issubclass(cls, AgentVmssTest): + cls(AgentVmssTestContext.from_args()).run() + else: + raise Exception(f"Class {cls.__name__} is not a valid test class") except SystemExit: # Bad arguments pass except AssertionError as e: @@ -84,12 +92,11 @@ def run_from_command_line(cls): sys.exit(0) - def _run_remote_test(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: + def _run_remote_test(self, ssh_client: SshClient, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None: """ Derived classes can use this method to execute a remote test (a test that runs over SSH). """ try: - ssh_client: SshClient = self._context.create_ssh_client() output = ssh_client.run_command(command=command, use_sudo=use_sudo, attempts=attempts, attempt_delay=attempt_delay) log.info("*** PASSED: [%s]\n%s", command, self._indent(output)) except CommandError as error: @@ -100,3 +107,16 @@ def _run_remote_test(self, command: str, use_sudo: bool = False, attempts: int = @staticmethod def _indent(text: str, indent: str = " " * 8): return "\n".join(f"{indent}{line}" for line in text.splitlines()) + + +class AgentVmTest(AgentTest): + """ + Base class for Agent tests that run on a single VM + """ + + +class AgentVmssTest(AgentTest): + """ + Base class for Agent tests that run on a scale set + """ + diff --git a/tests_e2e/tests/lib/agent_test_context.py b/tests_e2e/tests/lib/agent_test_context.py index e791542894..b818b1298b 100644 --- a/tests_e2e/tests/lib/agent_test_context.py +++ b/tests_e2e/tests/lib/agent_test_context.py @@ -17,161 +17,107 @@ import argparse import os +from abc import ABC from pathlib import Path -import tests_e2e -from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient from tests_e2e.tests.lib.ssh_client import SshClient -class AgentTestContext: +class AgentTestContext(ABC): """ - Execution context for agent tests. Defines the test VM, working directories and connection info for the tests. - - NOTE: The context is shared by all tests in the same runbook execution. Tests within the same test suite - are executed sequentially, but multiple test suites may be executed concurrently depending on the - concurrency level of the runbook. + Base class for the execution context of agent tests; includes the working directories and SSH info for the tests. """ - class Paths: - DEFAULT_TEST_SOURCE_DIRECTORY = Path(tests_e2e.__path__[0]) - - def __init__( - self, - working_directory: Path, - remote_working_directory: Path, - test_source_directory: Path = DEFAULT_TEST_SOURCE_DIRECTORY - ): - self._test_source_directory: Path = test_source_directory - self._working_directory: Path = working_directory - self._remote_working_directory: Path = remote_working_directory - - class Connection: - DEFAULT_SSH_PORT = 22 - - def __init__( - self, - ip_address: str, - username: str, - private_key_file: Path, - ssh_port: int = DEFAULT_SSH_PORT - ): - self._ip_address: str = ip_address - self._username: str = username - self._private_key_file: Path = private_key_file - self._ssh_port: int = ssh_port - - def __init__(self, vm: VmIdentifier, paths: Paths, connection: Connection): - self._vm: VmIdentifier = vm - self._paths = paths - self._connection = connection - - @property - def vm(self) -> VmIdentifier: - """ - The test VM (the VM on which the tested Agent is running) - """ - return self._vm + DEFAULT_SSH_PORT = 22 - @property - def vm_ip_address(self) -> str: - """ - The IP address of the test VM - """ - return self._connection._ip_address + def __init__(self, working_directory: Path, username: str, identity_file: Path, ssh_port: int): + self.working_directory: Path = working_directory + self.username: str = username + self.identity_file: Path = identity_file + self.ssh_port: int = ssh_port - @property - def test_source_directory(self) -> Path: - """ - Root directory for the source code of the tests. Used to build paths to specific scripts. + @staticmethod + def _create_argument_parser() -> argparse.ArgumentParser: """ - return self._paths._test_source_directory - - @property - def working_directory(self) -> Path: + Creates an ArgumentParser that includes the arguments common to the concrete classes derived from AgentTestContext """ - Tests can create temporary files under this directory. + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--cloud', dest="cloud", required=False, choices=['AzureCloud', 'AzureChinaCloud', 'AzureUSGovernment'], default="AzureCloud") + parser.add_argument('-g', '--group', required=True) + parser.add_argument('-l', '--location', required=True) + parser.add_argument('-s', '--subscription', required=True) - """ - return self._paths._working_directory + parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(Path().home() / "tmp")) - @property - def remote_working_directory(self) -> Path: - """ - Tests can create temporary files under this directory on the test VM. - """ - return self._paths._remote_working_directory + parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) + parser.add_argument('-k', '--identity-file', dest="identity_file", required=False, default=str(Path.home() / ".ssh" / "id_rsa")) + parser.add_argument('-p', '--ssh-port', dest="ssh_port", required=False, default=AgentTestContext.DEFAULT_SSH_PORT) - @property - def username(self) -> str: - """ - The username to use for SSH connections - """ - return self._connection._username + return parser - @property - def private_key_file(self) -> Path: - """ - The file containing the private SSH key for the username - """ - return self._connection._private_key_file - @property - def ssh_port(self) -> int: - """ - Port for SSH connections - """ - return self._connection._ssh_port +class AgentVmTestContext(AgentTestContext): + """ + Execution context for agent tests targeted to individual VMs. + """ + def __init__(self, working_directory: Path, vm: VirtualMachineClient, ip_address: str, username: str, identity_file: Path, ssh_port: int = AgentTestContext.DEFAULT_SSH_PORT): + super().__init__(working_directory, username, identity_file, ssh_port) + self.vm: VirtualMachineClient = vm + self.ip_address: str = ip_address def create_ssh_client(self) -> SshClient: """ Convenience method to create an SSH client using the connection info from the context. """ return SshClient( - ip_address=self.vm_ip_address, + ip_address=self.ip_address, username=self.username, - private_key_file=self.private_key_file, + identity_file=self.identity_file, port=self.ssh_port) @staticmethod def from_args(): """ - Creates an AgentTestContext from the command line arguments. + Creates an AgentVmTestContext from the command line arguments. """ - parser = argparse.ArgumentParser() - parser.add_argument('-c', '--cloud', dest="cloud", required=False, choices=['AzureCloud', 'AzureChinaCloud', 'AzureUSGovernment'], default="AzureCloud") - parser.add_argument('-g', '--group', required=True) - parser.add_argument('-l', '--location', required=True) - parser.add_argument('-s', '--subscription', required=True) + parser = AgentTestContext._create_argument_parser() parser.add_argument('-vm', '--vm', required=True) + parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default - parser.add_argument('-rw', '--remote-working-directory', dest="remote_working_directory", required=False, default=str(Path('/home')/os.getenv("USER"))) - parser.add_argument('-t', '--test-source-directory', dest="test_source_directory", required=False, default=str(AgentTestContext.Paths.DEFAULT_TEST_SOURCE_DIRECTORY)) - parser.add_argument('-w', '--working-directory', dest="working_directory", required=False, default=str(Path().home()/"tmp")) + args = parser.parse_args() - parser.add_argument('-a', '--ip-address', dest="ip_address", required=False) # Use the vm name as default - parser.add_argument('-u', '--username', required=False, default=os.getenv("USER")) - parser.add_argument('-k', '--private-key-file', dest="private_key_file", required=False, default=str(Path.home()/".ssh"/"id_rsa")) - parser.add_argument('-p', '--ssh-port', dest="ssh_port", required=False, default=AgentTestContext.Connection.DEFAULT_SSH_PORT) + working_directory: Path = Path(args.working_directory) + if not working_directory.exists(): + working_directory.mkdir(exist_ok=True) + + vm: VirtualMachineClient = VirtualMachineClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vm) + ip_address = args.ip_address if args.ip_address is not None else args.vm + return AgentVmTestContext(working_directory=working_directory, vm=vm, ip_address=ip_address, username=args.username, identity_file=Path(args.identity_file), ssh_port=args.ssh_port) + + +class AgentVmssTestContext(AgentTestContext): + """ + Execution context for agent tests targeted to VM Scale Sets. + """ + def __init__(self, working_directory: Path, vmss: VirtualMachineScaleSetClient, username: str, identity_file: Path, ssh_port: int = AgentTestContext.DEFAULT_SSH_PORT): + super().__init__(working_directory, username, identity_file, ssh_port) + self.vmss: VirtualMachineScaleSetClient = vmss + + @staticmethod + def from_args(): + """ + Creates an AgentVmssTestContext from the command line arguments. + """ + parser = AgentTestContext._create_argument_parser() + parser.add_argument('-vmss', '--vmss', required=True) args = parser.parse_args() - working_directory = Path(args.working_directory) + working_directory: Path = Path(args.working_directory) if not working_directory.exists(): working_directory.mkdir(exist_ok=True) - return AgentTestContext( - vm=VmIdentifier( - cloud=args.cloud, - location=args.location, - subscription=args.subscription, - resource_group=args.group, - name=args.vm), - paths=AgentTestContext.Paths( - working_directory=Path(working_directory), - remote_working_directory=Path(args.remote_working_directory), - test_source_directory=Path(args.test_source_directory)), - connection=AgentTestContext.Connection( - ip_address=args.ip_address if args.ip_address is not None else args.vm, - username=args.username, - private_key_file=Path(args.private_key_file), - ssh_port=args.ssh_port)) + vmss = VirtualMachineScaleSetClient(cloud=args.cloud, location=args.location, subscription=args.subscription, resource_group=args.group, name=args.vmss) + return AgentVmssTestContext(working_directory=working_directory, vmss=vmss, username=args.username, identity_file=Path(args.identity_file), ssh_port=args.ssh_port) + diff --git a/tests_e2e/tests/lib/azure_client.py b/tests_e2e/tests/lib/azure_sdk_client.py similarity index 67% rename from tests_e2e/tests/lib/azure_client.py rename to tests_e2e/tests/lib/azure_sdk_client.py index 3e01762e8b..f76d83ca72 100644 --- a/tests_e2e/tests/lib/azure_client.py +++ b/tests_e2e/tests/lib/azure_sdk_client.py @@ -17,18 +17,32 @@ from typing import Any, Callable +from azure.identity import DefaultAzureCredential from azure.core.polling import LROPoller +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry -class AzureClient: +class AzureSdkClient: """ - Utilities for classes using the Azure SDK. + Base class for classes implementing clients of the Azure SDK. """ _DEFAULT_TIMEOUT = 10 * 60 # (in seconds) + @staticmethod + def create_client(client_type: type, cloud: str, subscription_id: str): + """ + Creates an SDK client of the given 'client_type' + """ + azure_cloud = AZURE_CLOUDS[cloud] + return client_type( + base_url=azure_cloud.endpoints.resource_manager, + credential=DefaultAzureCredential(authority=azure_cloud.endpoints.active_directory), + credential_scopes=[azure_cloud.endpoints.resource_manager + "/.default"], + subscription_id=subscription_id) + @staticmethod def _execute_async_operation(operation: Callable[[], LROPoller], operation_name: str, timeout: int) -> Any: """ @@ -42,3 +56,4 @@ def _execute_async_operation(operation: Callable[[], LROPoller], operation_name: raise TimeoutError(f"[{operation_name}] did not complete within {timeout} seconds") log.info("[%s] completed", operation_name) return poller.result() + diff --git a/tests_e2e/tests/lib/logging.py b/tests_e2e/tests/lib/logging.py index a6cf6566b1..e713dce9d3 100644 --- a/tests_e2e/tests/lib/logging.py +++ b/tests_e2e/tests/lib/logging.py @@ -155,3 +155,18 @@ def set_current_thread_log(log_file: Path): log.close_current_thread_log() if initial_value is not None: log.set_current_thread_log(initial_value) + + +@contextlib.contextmanager +def set_thread_name(name: str): + """ + Context Manager to change the name of the current thread temporarily + """ + initial_name = current_thread().name + current_thread().name = name + try: + yield + finally: + current_thread().name = initial_name + + diff --git a/tests_e2e/tests/lib/resource_group_client.py b/tests_e2e/tests/lib/resource_group_client.py new file mode 100644 index 0000000000..9ca07a2602 --- /dev/null +++ b/tests_e2e/tests/lib/resource_group_client.py @@ -0,0 +1,74 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to create a resource group and deploy an arm template to it +# +from typing import Dict, Any + +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.resource import ResourceManagementClient +from azure.mgmt.resource.resources.models import DeploymentProperties, DeploymentMode + +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.logging import log + + +class ResourceGroupClient(AzureSdkClient): + """ + Provides operations on resource groups (create, template deployment, etc). + """ + def __init__(self, cloud: str, subscription: str, name: str, location: str = ""): + super().__init__() + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.name: str = name + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create_client(ResourceManagementClient, cloud, subscription) + + def create(self) -> None: + """ + Creates a resource group + """ + log.info("Creating resource group %s", self) + self._resource_client.resource_groups.create_or_update(self.name, {"location": self.location}) + + def deploy_template(self, template: Dict[str, Any], parameters: Dict[str, Any] = None): + """ + Deploys an ARM template to the resource group + """ + if parameters: + properties = DeploymentProperties(template=template, parameters=parameters, mode=DeploymentMode.incremental) + else: + properties = DeploymentProperties(template=template, mode=DeploymentMode.incremental) + + log.info("Deploying template to resource group %s...", self) + self._execute_async_operation( + operation=lambda: self._resource_client.deployments.begin_create_or_update(self.name, 'TestDeployment', {'properties': properties}), + operation_name=f"Deploy template to resource group {self}", + timeout=AzureSdkClient._DEFAULT_TIMEOUT) + + def delete(self) -> None: + """ + Deletes the resource group + """ + log.info("Deleting resource group %s (no wait)", self) + self._resource_client.resource_groups.begin_delete(self.name) # Do not wait for the deletion to complete + + def __str__(self): + return f"{self.name}" diff --git a/tests_e2e/tests/lib/ssh_client.py b/tests_e2e/tests/lib/ssh_client.py index 3e0d7269c3..ae7600c110 100644 --- a/tests_e2e/tests/lib/ssh_client.py +++ b/tests_e2e/tests/lib/ssh_client.py @@ -28,11 +28,11 @@ class SshClient(object): - def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22): - self._ip_address: str = ip_address - self._username: str = username - self._private_key_file: Path = private_key_file - self._port: int = port + def __init__(self, ip_address: str, username: str, identity_file: Path, port: int = 22): + self.ip_address: str = ip_address + self.username: str = username + self.identity_file: Path = identity_file + self.port: int = port def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> str: """ @@ -42,13 +42,13 @@ def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTE if re.match(r"^\s*sudo\s*", command): raise Exception("Do not include 'sudo' in the 'command' argument, use the 'use_sudo' parameter instead") - destination = f"ssh://{self._username}@{self._ip_address}:{self._port}" + destination = f"ssh://{self.username}@{self.ip_address}:{self.port}" # Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there. # Note, too, that when using sudo we need to carry over the value of PATH to the sudo session sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else '' command = [ - "ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, + "ssh", "-o", "StrictHostKeyChecking=no", "-i", self.identity_file, destination, f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}" ] @@ -79,11 +79,11 @@ def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool, attempts: int, attempt_delay: int) -> None: if remote_source: - source = f"{self._username}@{self._ip_address}:{source}" + source = f"{self.username}@{self.ip_address}:{source}" if remote_target: - target = f"{self._username}@{self._ip_address}:{target}" + target = f"{self.username}@{self.ip_address}:{target}" - command = ["scp", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file] + command = ["scp", "-o", "StrictHostKeyChecking=no", "-i", self.identity_file] if recursive: command.append("-r") command.extend([str(source), str(target)]) diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index 9637525f35..c50f7b74c7 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -22,12 +22,13 @@ class UpdateArmTemplate(ABC): @abstractmethod - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: """ Derived classes implement this method to customize the ARM template used to create the test VMs. The 'template' parameter is a dictionary created from the template's JSON document, as parsed by json.loads(). - The original JSON document is currently at https://github.com/microsoft/lisa/blob/main/lisa/sut_orchestrator/azure/arm_template.json + If the 'is_lisa_template' parameter is True, the template was created by LISA. The original JSON document is located at + https://github.com/microsoft/lisa/blob/main/lisa/sut_orchestrator/azure/arm_template.json """ @staticmethod diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index dd739fe535..37dcfaef13 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -24,59 +24,65 @@ import time from typing import Any, Dict, List -from azure.identity import DefaultAzureCredential from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineInstanceView, VirtualMachine +from azure.mgmt.network import NetworkManagementClient +from azure.mgmt.network.models import NetworkInterface, PublicIPAddress from azure.mgmt.resource import ResourceManagementClient -from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.azure_client import AzureClient -from tests_e2e.tests.lib.identifiers import VmIdentifier +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -class VirtualMachineClient(AzureClient): +class VirtualMachineClient(AzureSdkClient): """ - Provides operations on virtual machine (get instance view, update, restart, etc). + Provides operations on virtual machines (get instance view, update, restart, etc). """ - def __init__(self, vm: VmIdentifier): + def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): super().__init__() - self._identifier: VmIdentifier = vm - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) - self._resource_client = ResourceManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.resource_group: str = resource_group + self.name: str = name + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._resource_client = AzureSdkClient.create_client(ResourceManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create_client(NetworkManagementClient, cloud, subscription) + + def get_ip_address(self) -> str: + """ + Retrieves the public IP address of the virtual machine + """ + vm_model = self.get_model() + nic: NetworkInterface = self._network_client.network_interfaces.get( + resource_group_name=self.resource_group, + network_interface_name=vm_model.network_profile.network_interfaces[0].id.split('/')[-1]) # the name of the interface is the last component of the id + public_ip: PublicIPAddress = self._network_client.public_ip_addresses.get( + resource_group_name=self.resource_group, + public_ip_address_name=nic.ip_configurations[0].public_ip_address.id.split('/')[-1]) # the name of the ip address is the last component of the id + return public_ip.ip_address def get_model(self) -> VirtualMachine: """ Retrieves the model of the virtual machine. """ - log.info("Retrieving VM model for %s", self._identifier) + log.info("Retrieving VM model for %s", self) return execute_with_retry( lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) + resource_group_name=self.resource_group, + vm_name=self.name)) def get_instance_view(self) -> VirtualMachineInstanceView: """ Retrieves the instance view of the virtual machine """ - log.info("Retrieving instance view for %s", self._identifier) + log.info("Retrieving instance view for %s", self) return execute_with_retry(lambda: self._compute_client.virtual_machines.get( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name, + resource_group_name=self.resource_group, + vm_name=self.name, expand="instanceView" ).instance_view) @@ -84,37 +90,37 @@ def get_extensions(self) -> List[VirtualMachineExtension]: """ Retrieves the extensions installed on the virtual machine """ - log.info("Retrieving extensions for %s", self._identifier) + log.info("Retrieving extensions for %s", self) return execute_with_retry( lambda: self._compute_client.virtual_machine_extensions.list( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name)) + resource_group_name=self.resource_group, + vm_name=self.name)) - def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def update(self, properties: Dict[str, Any], timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Updates a set of properties on the virtual machine """ # location is a required by begin_create_or_update, always add it properties_copy = properties.copy() - properties_copy["location"] = self._identifier.location + properties_copy["location"] = self.location - log.info("Updating %s with properties: %s", self._identifier, properties_copy) + log.info("Updating %s with properties: %s", self, properties_copy) self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_create_or_update( - self._identifier.resource_group, - self._identifier.name, + self.resource_group, + self.name, properties_copy), - operation_name=f"Update {self._identifier}", + operation_name=f"Update {self}", timeout=timeout) - def reapply(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def reapply(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Reapplies the goal state on the virtual machine """ self._execute_async_operation( - lambda: self._compute_client.virtual_machines.begin_reapply(self._identifier.resource_group, self._identifier.name), - operation_name=f"Reapply {self._identifier}", + lambda: self._compute_client.virtual_machines.begin_reapply(self.resource_group, self.name), + operation_name=f"Reapply {self}", timeout=timeout) def restart( @@ -122,7 +128,7 @@ def restart( wait_for_boot, ssh_client: SshClient = None, boot_timeout: datetime.timedelta = datetime.timedelta(minutes=5), - timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Restarts (reboots) the virtual machine. @@ -138,9 +144,9 @@ def restart( self._execute_async_operation( lambda: self._compute_client.virtual_machines.begin_restart( - resource_group_name=self._identifier.resource_group, - vm_name=self._identifier.name), - operation_name=f"Restart {self._identifier}", + resource_group_name=self.resource_group, + vm_name=self.name), + operation_name=f"Restart {self}", timeout=timeout) if not wait_for_boot: @@ -148,7 +154,7 @@ def restart( start = datetime.datetime.utcnow() while datetime.datetime.utcnow() < start + boot_timeout: - log.info("Waiting for VM %s to boot", self._identifier) + log.info("Waiting for VM %s to boot", self) time.sleep(15) # Note that we always sleep at least 1 time, to give the reboot time to start instance_view = self.get_instance_view() power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code] @@ -164,19 +170,15 @@ def restart( log.info("Uptime: %s", uptime) boot_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=float(uptime)) if boot_time > before_restart: - log.info("VM %s completed boot and is running. Boot time: %s", self._identifier, boot_time) + log.info("VM %s completed boot and is running. Boot time: %s", self, boot_time) return log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) except CommandError as e: if e.exit_code == 255 and "Connection refused" in str(e): - log.info("VM %s is not yet accepting SSH connections", self._identifier) + log.info("VM %s is not yet accepting SSH connections", self) else: raise - raise Exception(f"VM {self._identifier} did not boot after {boot_timeout}") + raise Exception(f"VM {self} did not boot after {boot_timeout}") def __str__(self): - return f"{self._identifier}" - - - - + return f"{self.resource_group}:{self.name}" diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index 6697d594a6..699ed7cb4a 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -26,32 +26,24 @@ from azure.mgmt.compute import ComputeManagementClient from azure.mgmt.compute.models import VirtualMachineExtension, VirtualMachineExtensionInstanceView -from azure.identity import DefaultAzureCredential -from msrestazure.azure_cloud import Cloud -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS -from tests_e2e.tests.lib.azure_client import AzureClient -from tests_e2e.tests.lib.identifiers import VmIdentifier, VmExtensionIdentifier +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import execute_with_retry +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient -class VirtualMachineExtensionClient(AzureClient): +class VirtualMachineExtensionClient(AzureSdkClient): """ Client for operations virtual machine extensions. """ - def __init__(self, vm: VmIdentifier, extension: VmExtensionIdentifier, resource_name: str = None): + def __init__(self, vm: VirtualMachineClient, extension: VmExtensionIdentifier, resource_name: str = None): super().__init__() - self._vm: VmIdentifier = vm + self._vm: VirtualMachineClient = vm self._identifier = extension self._resource_name = resource_name or extension.type - cloud: Cloud = AZURE_CLOUDS[vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - self._compute_client: ComputeManagementClient = ComputeManagementClient( - credential=credential, - subscription_id=vm.subscription, - base_url=cloud.endpoints.resource_manager, - credential_scopes=[cloud.endpoints.resource_manager + "/.default"]) + self._compute_client: ComputeManagementClient = AzureSdkClient.create_client(ComputeManagementClient, self._vm.cloud, self._vm.subscription) def get_instance_view(self) -> VirtualMachineExtensionInstanceView: """ @@ -73,7 +65,7 @@ def enable( auto_upgrade_minor_version: bool = True, force_update: bool = False, force_update_tag: str = None, - timeout: int = AzureClient._DEFAULT_TIMEOUT + timeout: int = AzureSdkClient._DEFAULT_TIMEOUT ) -> None: """ Performs an enable operation on the extension. @@ -116,7 +108,7 @@ def enable( log.info("Provisioning state: %s", result.provisioning_state) - def delete(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None: + def delete(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: """ Performs a delete operation on the extension """ diff --git a/tests_e2e/tests/lib/virtual_machine_scale_set_client.py b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py new file mode 100644 index 0000000000..92738576ca --- /dev/null +++ b/tests_e2e/tests/lib/virtual_machine_scale_set_client.py @@ -0,0 +1,107 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to execute operations on virtual machines scale sets (list instances, delete, etc). +# + +import re + +from typing import List + +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.compute.models import VirtualMachineScaleSetVM, VirtualMachineScaleSetInstanceView +from azure.mgmt.network import NetworkManagementClient + +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry + + +class VmssInstanceIpAddress(object): + """ + IP address of a virtual machine scale set instance + """ + def __init__(self, instance_name: str, ip_address: str): + self.instance_name: str = instance_name + self.ip_address: str = ip_address + + def __str__(self): + return f"{self.instance_name}:{self.ip_address}" + + +class VirtualMachineScaleSetClient(AzureSdkClient): + """ + Provides operations on virtual machine scale sets. + """ + def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): + super().__init__() + self.cloud: str = cloud + self.location = location + self.subscription: str = subscription + self.resource_group: str = resource_group + self.name: str = name + self._compute_client = AzureSdkClient.create_client(ComputeManagementClient, cloud, subscription) + self._network_client = AzureSdkClient.create_client(NetworkManagementClient, cloud, subscription) + + def list_vms(self) -> List[VirtualMachineScaleSetVM]: + """ + Returns the VM instances of the virtual machine scale set + """ + log.info("Retrieving instances of scale set %s", self) + return list(self._compute_client.virtual_machine_scale_set_vms.list(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name)) + + def get_instances_ip_address(self) -> List[VmssInstanceIpAddress]: + """ + Returns a list containing the IP addresses of scale set instances + """ + log.info("Retrieving IP addresses of scale set %s", self) + ip_addresses = self._network_client.public_ip_addresses.list_virtual_machine_scale_set_public_ip_addresses(resource_group_name=self.resource_group, virtual_machine_scale_set_name=self.name) + ip_addresses = list(ip_addresses) + + def parse_instance(resource_id: str) -> str: + # the resource_id looks like /subscriptions/{subs}}/resourceGroups/{rg}/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss}/virtualMachines/{instance}/networkInterfaces/{netiace}/ipConfigurations/ipconfig1/publicIPAddresses/{name} + match = re.search(r'virtualMachines/(?P[0-9])/networkInterfaces', resource_id) + if match is None: + raise Exception(f"Unable to parse instance from IP address ID:{resource_id}") + return match.group('instance') + + return [VmssInstanceIpAddress(instance_name=f"{self.name}_{parse_instance(a.id)}", ip_address=a.ip_address) for a in ip_addresses if a.ip_address is not None] + + def delete_extension(self, extension: str, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: + """ + Deletes the given operation + """ + log.info("Deleting extension %s from %s", extension, self) + self._execute_async_operation( + operation=lambda: self._compute_client.virtual_machine_scale_set_extensions.begin_delete(resource_group_name=self.resource_group, vm_scale_set_name=self.name, vmss_extension_name=extension), + operation_name=f"Delete {extension} from {self}", + timeout=timeout) + + def get_instance_view(self) -> VirtualMachineScaleSetInstanceView: + """ + Retrieves the instance view of the virtual machine + """ + log.info("Retrieving instance view for %s", self) + return execute_with_retry(lambda: self._compute_client.virtual_machine_scale_sets.get_instance_view( + resource_group_name=self.resource_group, + vm_scale_set_name=self.name + )) + + def __str__(self): + return f"{self.resource_group}:{self.name}" + diff --git a/tests_e2e/tests/lib/identifiers.py b/tests_e2e/tests/lib/vm_extension_identifier.py similarity index 79% rename from tests_e2e/tests/lib/identifiers.py rename to tests_e2e/tests/lib/vm_extension_identifier.py index 45af22745f..26113e445c 100644 --- a/tests_e2e/tests/lib/identifiers.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -18,31 +18,16 @@ from typing import Dict, List -class VmIdentifier(object): - def __init__(self, cloud: str, location: str, subscription: str, resource_group: str, name: str): - """ - Represents the information that identifies a VM to the ARM APIs - """ - self.cloud: str = cloud - self.location = location - self.subscription: str = subscription - self.resource_group: str = resource_group - self.name: str = name - - def __str__(self): - return f"{self.resource_group}:{self.name}" - - class VmExtensionIdentifier(object): - def __init__(self, publisher: str, ext_type: str, version: str): - """ - Represents the information that identifies an extension to the ARM APIs + """ + Represents the information that identifies an extension to the ARM APIs - publisher - e.g. Microsoft.Azure.Extensions - type - e.g. CustomScript - version - e.g. 2.1, 2.* - name - arbitrary name for the extension ARM resource - """ + publisher - e.g. Microsoft.Azure.Extensions + type - e.g. CustomScript + version - e.g. 2.1, 2.* + name - arbitrary name for the extension ARM resource + """ + def __init__(self, publisher: str, ext_type: str, version: str): self.publisher: str = publisher self.type: str = ext_type self.version: str = version diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py index a42ca8900c..4df75fd2be 100644 --- a/tests_e2e/tests/multi_config_ext/multi_config_ext.py +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -27,14 +27,14 @@ from assertpy import fail from azure.mgmt.compute.models import VirtualMachineInstanceView -from tests_e2e.tests.lib.agent_test import AgentTest -from tests_e2e.tests.lib.identifiers import VmExtensionIds +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient -class MultiConfigExt(AgentTest): +class MultiConfigExt(AgentVmTest): class TestCase: def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension @@ -68,8 +68,16 @@ def delete_extensions(self, test_cases: Dict[str, TestCase]): test_case.extension.delete() log.info("") - vm: VirtualMachineClient = VirtualMachineClient(self._context.vm) + + vm: VirtualMachineClient = VirtualMachineClient( + cloud=self._context.vm.cloud, + location=self._context.vm.location, + subscription=self._context.vm.subscription, + resource_group=self._context.vm.resource_group, + name=self._context.vm.name) + instance_view: VirtualMachineInstanceView = vm.get_instance_view() + if instance_view.extensions is not None: for ext in instance_view.extensions: if ext.name in test_cases.keys(): diff --git a/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py index b767dc93cd..48827dbe14 100755 --- a/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py +++ b/tests_e2e/tests/no_outbound_connections/check_fallback_to_hgap.py @@ -18,12 +18,12 @@ # from assertpy import assert_that -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient -class NoOutboundConnections(AgentTest): +class CheckFallbackToHGAP(AgentVmTest): """ Check the agent log to verify that the default channel was changed to HostGAPlugin before executing any extensions. """ @@ -47,5 +47,5 @@ def run(self): if __name__ == "__main__": - NoOutboundConnections.run_from_command_line() + CheckFallbackToHGAP.run_from_command_line() diff --git a/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py index 66cc707d26..985e77b70f 100755 --- a/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py +++ b/tests_e2e/tests/no_outbound_connections/check_no_outbound_connections.py @@ -18,13 +18,13 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.shell import CommandError from tests_e2e.tests.lib.ssh_client import SshClient -class CheckNoOutboundConnections(AgentTest): +class CheckNoOutboundConnections(AgentVmTest): """ Verifies that there is no outbound connectivity on the test VM. """ diff --git a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py index 114999d5b4..838082d345 100755 --- a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py +++ b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py @@ -29,7 +29,10 @@ class DenyOutboundConnections(UpdateArmTemplate): """ Updates the ARM template to add a security rule that denies all outbound connections. """ - def update(self, template: Dict[str, Any]) -> None: + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: + if not is_lisa_template: + raise Exception('This test can only customize LISA ARM templates.') + resources = template["resources"] nsg = self._get_resource_by_name(resources, NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups") properties = nsg.get("properties") diff --git a/tests_e2e/tests/samples/error_remote_test.py b/tests_e2e/tests/samples/error_remote_test.py index 29612f4246..6b52e46cd0 100755 --- a/tests_e2e/tests/samples/error_remote_test.py +++ b/tests_e2e/tests/samples/error_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class ErrorRemoteTest(AgentTest): +class ErrorRemoteTest(AgentVmTest): """ A trivial remote test that fails """ def run(self): - self._run_remote_test("samples-error_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-error_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/error_test.py b/tests_e2e/tests/samples/error_test.py index 4c24080687..e2d584c6e1 100755 --- a/tests_e2e/tests/samples/error_test.py +++ b/tests_e2e/tests/samples/error_test.py @@ -17,10 +17,10 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class ErrorTest(AgentTest): +class ErrorTest(AgentVmTest): """ A trivial test that errors out """ diff --git a/tests_e2e/tests/samples/fail_remote_test.py b/tests_e2e/tests/samples/fail_remote_test.py index f0a50495ad..7a05b67a99 100755 --- a/tests_e2e/tests/samples/fail_remote_test.py +++ b/tests_e2e/tests/samples/fail_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class FailRemoteTest(AgentTest): +class FailRemoteTest(AgentVmTest): """ A trivial remote test that fails """ def run(self): - self._run_remote_test("samples-fail_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-fail_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/fail_test.py b/tests_e2e/tests/samples/fail_test.py index fcebd99183..dfdecb52fb 100755 --- a/tests_e2e/tests/samples/fail_test.py +++ b/tests_e2e/tests/samples/fail_test.py @@ -18,10 +18,10 @@ # from assertpy import fail -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class FailTest(AgentTest): +class FailTest(AgentVmTest): """ A trivial test that fails """ diff --git a/tests_e2e/tests/samples/pass_remote_test.py b/tests_e2e/tests/samples/pass_remote_test.py index 94e0cb604c..609ef4d4c7 100755 --- a/tests_e2e/tests/samples/pass_remote_test.py +++ b/tests_e2e/tests/samples/pass_remote_test.py @@ -17,15 +17,15 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest -class PassRemoteTest(AgentTest): +class PassRemoteTest(AgentVmTest): """ A trivial remote test that succeeds """ def run(self): - self._run_remote_test("samples-pass_remote_test.py") + self._run_remote_test(self._context.create_ssh_client(), "samples-pass_remote_test.py") if __name__ == "__main__": diff --git a/tests_e2e/tests/samples/pass_test.py b/tests_e2e/tests/samples/pass_test.py index 580db2dc08..d7c85a3552 100755 --- a/tests_e2e/tests/samples/pass_test.py +++ b/tests_e2e/tests/samples/pass_test.py @@ -17,11 +17,11 @@ # limitations under the License. # -from tests_e2e.tests.lib.agent_test import AgentTest +from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.logging import log -class PassTest(AgentTest): +class PassTest(AgentVmTest): """ A trivial test that passes. """ diff --git a/tests_e2e/tests/samples/vmss_test.py b/tests_e2e/tests/samples/vmss_test.py new file mode 100755 index 0000000000..0f50dad8f4 --- /dev/null +++ b/tests_e2e/tests/samples/vmss_test.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from tests_e2e.tests.lib.agent_test import AgentVmssTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient + + +class VmssTest(AgentVmssTest): + """ + Sample test for scale sets + """ + def run(self): + for address in self._context.vmss.get_instances_ip_address(): + ssh_client: SshClient = SshClient(ip_address=address.ip_address, username=self._context.username, identity_file=self._context.identity_file) + log.info("%s: Hostname: %s", address.instance_name, ssh_client.run_command("hostname").strip()) + log.info("* PASSED *") + + +if __name__ == "__main__": + VmssTest.run_from_command_line() From 9dbcecabcffa1ab9a334d1ec3fe9842f142ed24f Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 30 Oct 2023 13:28:44 -0700 Subject: [PATCH 088/240] Ignore dependencies when the extension does not have any settings (#2957) (#2962) * Ignore dependencies when the extension does not have any settings * Remove message --------- Co-authored-by: narrieta (cherry picked from commit 79bc12c8ca9f8aaacfb44a070812afe31123a600) --- azurelinuxagent/common/event.py | 1 + .../extensions_goal_state_from_vm_settings.py | 24 ++++++++++++++++--- azurelinuxagent/ga/agent_update_handler.py | 3 --- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 514c727fff..fe313968fe 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -109,6 +109,7 @@ class WALAEventOperation: OpenSsl = "OpenSsl" Partition = "Partition" PersistFirewallRules = "PersistFirewallRules" + ProvisionAfterExtensions = "ProvisionAfterExtensions" PluginSettingsVersionMismatch = "PluginSettingsVersionMismatch" InvalidExtensionConfig = "InvalidExtensionConfig" Provision = "Provision" diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index f6496bfd38..664d1e868f 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -22,6 +22,7 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.AgentGlobals import AgentGlobals +from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, VmSettingsParseError from azurelinuxagent.common.protocol.restapi import VMAgentFamily, Extension, ExtensionRequestedState, ExtensionSettings @@ -492,11 +493,28 @@ def _parse_dependency_level(depends_on, extension): length = len(depends_on) if length > 1: raise Exception('dependsOn should be an array with exactly one item for single-config extensions ({0}) (got {1})'.format(extension.name, depends_on)) - elif length == 0: + if length == 0: logger.warn('dependsOn is an empty array for extension {0}; setting the dependency level to 0'.format(extension.name)) - extension.settings[0].dependencyLevel = 0 + dependency_level = 0 else: - extension.settings[0].dependencyLevel = depends_on[0]['dependencyLevel'] + dependency_level = depends_on[0]['dependencyLevel'] + depends_on_extension = depends_on[0].get('dependsOnExtension') + if depends_on_extension is None: + # TODO: Consider removing this check and its telemetry after a few releases if we do not receive any telemetry indicating + # that dependsOnExtension is actually missing from the vmSettings + message = 'Missing dependsOnExtension on extension {0}'.format(extension.name) + logger.warn(message) + add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=False, log_event=False) + else: + message = '{0} depends on {1}'.format(extension.name, depends_on_extension) + logger.info(message) + add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=True, log_event=False) + if len(extension.settings) == 0: + message = 'Extension {0} does not have any settings. Will ignore dependency (dependency level: {1})'.format(extension.name, dependency_level) + logger.warn(message) + add_event(WALAEventOperation.ProvisionAfterExtensions, message=message, is_success=False, log_event=False) + else: + extension.settings[0].dependencyLevel = dependency_level else: # multi-config settings_by_name = {} diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index a8390c1c7d..5ec44fab51 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -268,9 +268,6 @@ def __check_if_downgrade_is_requested_and_allowed(self, requested_version): """ if not self._is_requested_version_update: if requested_version < CURRENT_VERSION: - msg = "Downgrade requested in the GoalState, but downgrades are not supported for self-update version:{0}, " \ - "skipping agent update".format(requested_version) - self.__log_event(LogLevel.INFO, msg) return False return True From 22e9dfe037f24785b6a8af0144ede2c8157f6080 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:37:41 -0700 Subject: [PATCH 089/240] Cache daemon version (#2942) (#2963) * cache daemon version * address comments * test update (cherry picked from commit 279d55725c44550da610d7e29b0e38bbdcf9fab0) --- azurelinuxagent/common/version.py | 12 +++++++----- azurelinuxagent/ga/agent_update_handler.py | 13 +++++++++++-- tests/common/test_version.py | 10 +++++++--- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index c056425432..6450c4cab1 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -56,14 +56,16 @@ def get_daemon_version(): else: # The agent process which execute the extensions can have different version(after upgrades) and importing version from that process may provide wrong version for daemon. # so launching new process with sys.executable python provides the correct version for daemon which preinstalled in the image. + daemon_version = "0.0.0.0" try: cmd = ["{0}".format(sys.executable), "-c", "from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)"] - version = shellutil.run_command(cmd) - return FlexibleVersion(version) - except Exception as e: # Make the best effort to get the daemon version, but don't fail the update if we can't. So default to 2.2.53 as env variable is not set < 2.2.53 + daemon_version = shellutil.run_command(cmd) + except Exception as e: # Make the best effort to get the daemon version, otherwise default to 0.0.0.0(unknown) logger.warn("Failed to get the daemon version: {0}", ustr(e)) - return FlexibleVersion("2.2.53") - + # set the daemon version to the environment variable to cache it for future calls. + set_daemon_version(daemon_version) + return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) + def get_f5_platform(): """ diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 5ec44fab51..d311a5c43e 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -9,7 +9,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.logger import LogLevel from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource -from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus +from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus, VERSION_0 from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN @@ -113,6 +113,15 @@ def __get_agent_upgrade_type(requested_version): return AgentUpgradeType.Hotfix return AgentUpgradeType.Normal + @staticmethod + def __get_daemon_version_for_update(): + daemon_version = get_daemon_version() + if daemon_version != FlexibleVersion(VERSION_0): + return daemon_version + # We return 0.0.0.0 if we failed to retrieve daemon version. In that case, + # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. + return FlexibleVersion("2.2.53") + def __get_next_upgrade_times(self, now): """ Get the next upgrade times @@ -326,7 +335,7 @@ def run(self, goal_state): if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): return - daemon_version = get_daemon_version() + daemon_version = self.__get_daemon_version_for_update() if requested_version < daemon_version: # Don't process the update if the requested version is less than daemon version, # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with diff --git a/tests/common/test_version.py b/tests/common/test_version.py index bdc07c0046..89156f65c1 100644 --- a/tests/common/test_version.py +++ b/tests/common/test_version.py @@ -137,11 +137,15 @@ def test_get_daemon_version_should_return_the_version_that_was_previously_set(se os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) def test_get_daemon_version_from_fallback_when_the_version_has_not_been_set(self): - with patch("azurelinuxagent.common.utils.shellutil.run_command", return_value=FlexibleVersion("2.2.53")): + with patch("azurelinuxagent.common.utils.shellutil.run_command", return_value="2.3.53") as mock_run_command: self.assertEqual( - FlexibleVersion("2.2.53"), get_daemon_version(), - "The daemon version should not be defined. Environment={0}".format(os.environ) + FlexibleVersion("2.3.53"), get_daemon_version(), + "The daemon version should be defined. Environment={0}".format(os.environ) ) + self.assertEqual(FlexibleVersion("2.3.53"), get_daemon_version(), "The daemon version should be 2.3.53") + self.assertEqual(1, mock_run_command.call_count, "The daemon version should be read from env value on second time") + + os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) class TestCurrentAgentName(AgentTestCase): From cb548d29dcaac56b2413dca209e042d364d26ff6 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:50:41 -0700 Subject: [PATCH 090/240] update warning message (#2946) (#2964) (cherry picked from commit 33552eecc277a44a875f862bb3ae6a6f40334c49) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 6450c4cab1..f85732da77 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -61,7 +61,7 @@ def get_daemon_version(): cmd = ["{0}".format(sys.executable), "-c", "from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)"] daemon_version = shellutil.run_command(cmd) except Exception as e: # Make the best effort to get the daemon version, otherwise default to 0.0.0.0(unknown) - logger.warn("Failed to get the daemon version: {0}", ustr(e)) + logger.info("Failed to get the daemon version. The error is: {0} \n[This error can be ignored since it has no impact on customer. So we return as unknown version: 0.0.0.0]", ustr(e)) # set the daemon version to the environment variable to cache it for future calls. set_daemon_version(daemon_version) return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) From 0c4fe3a0c849d7a64c7d467316117f07ce434547 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:27:36 -0700 Subject: [PATCH 091/240] fix self-update frequency to spread over 24 hrs for regular type and 4 hrs for hotfix (#2948) (#2965) * update self-update frequency * address comment * mark with comment * addressed comment (cherry picked from commit f15e6ef7b4d17d514b639e8bc2e78507a2d71096) --- azurelinuxagent/ga/agent_update_handler.py | 78 ++++++++++------------ tests/ga/test_agent_update_handler.py | 2 +- tests/ga/test_update.py | 2 +- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index d311a5c43e..a650f110ac 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -50,7 +50,7 @@ def __init__(self, protocol): self._is_requested_version_update = True # This is to track the current update type(requested version or self update) self.update_state = AgentUpdateHandlerUpdateState() - def __should_update_agent(self, requested_version): + def __check_if_agent_update_allowed_and_update_next_upgrade_times(self, requested_version): """ requested version update: update is allowed once per (as specified in the conf.get_autoupdate_frequency()) @@ -70,6 +70,7 @@ def __should_update_agent(self, requested_version): if next_attempt_time > now: return False # The time limit elapsed for us to allow updates. + self.update_state.last_attempted_requested_version_update_time = now return True else: next_hotfix_time, next_normal_time = self.__get_next_upgrade_times(now) @@ -77,17 +78,12 @@ def __should_update_agent(self, requested_version): if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): + # Update the last upgrade check time even if no new agent is available for upgrade + self.update_state.last_attempted_hotfix_update_time = now + self.update_state.last_attempted_normal_update_time = now return True return False - def __update_last_attempt_update_times(self): - now = datetime.datetime.now() - if self._is_requested_version_update: - self.update_state.last_attempted_requested_version_update_time = now - else: - self.update_state.last_attempted_normal_update_time = now - self.update_state.last_attempted_hotfix_update_time = now - def __should_agent_attempt_manifest_download(self): """ The agent should attempt to download the manifest if @@ -306,6 +302,7 @@ def run(self, goal_state): if not self.__should_agent_attempt_manifest_download(): return if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled + # TODO: Need to revisit this msg when version is missing in Goal state. We may need to handle better way to report the error warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) GAUpdateReportState.report_error_msg = warn_msg agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) @@ -319,49 +316,46 @@ def run(self, goal_state): if "Missing requested version" in GAUpdateReportState.report_error_msg: GAUpdateReportState.report_error_msg = "" - if requested_version == CURRENT_VERSION: + # Check if an update is allowed and update next upgrade times even if no new agent is available for upgrade + if not self.__check_if_agent_update_allowed_and_update_next_upgrade_times(requested_version): return - # Check if an update is allowed - if not self.__should_update_agent(requested_version): + if requested_version == CURRENT_VERSION: return if warn_msg != "": self.__log_event(LogLevel.WARNING, warn_msg) - try: - # Downgrades are not allowed for self-update version - # Added it in try block after agent update timewindow check so that we don't log it too frequently - if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): - return - - daemon_version = self.__get_daemon_version_for_update() - if requested_version < daemon_version: - # Don't process the update if the requested version is less than daemon version, - # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with - # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version. - # - raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than " - "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version)) - - msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format( - self._gs_id, str(requested_version)) - self.__log_event(LogLevel.INFO, msg) - - agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + # Downgrades are not allowed for self-update version + if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): + return - if agent.is_blacklisted or not agent.is_downloaded: - msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( - str(agent.version)) - self.__log_event(LogLevel.WARNING, msg) - return + daemon_version = self.__get_daemon_version_for_update() + if requested_version < daemon_version: + # Don't process the update if the requested version is less than daemon version, + # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with + # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version. + # + raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than " + "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version)) + + # Todo: Need to update the message when we fix RSM stuff + msg = "Self-update discovered new agent version:{0} in agent manifest for goal state {1}, will update the agent before processing the goal state.".format( + str(requested_version), self._gs_id) + self.__log_event(LogLevel.INFO, msg) + + agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + + if agent.is_blacklisted or not agent.is_downloaded: + msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( + str(agent.version)) + self.__log_event(LogLevel.WARNING, msg) + return - # We delete the directory and the zip package from the filesystem except current version and target version - self.__purge_extra_agents_from_disk(CURRENT_VERSION, known_agents=[agent]) - self.__proceed_with_update(requested_version) + # We delete the directory and the zip package from the filesystem except current version and target version + self.__purge_extra_agents_from_disk(CURRENT_VERSION, known_agents=[agent]) + self.__proceed_with_update(requested_version) - finally: - self.__update_last_attempt_update_times() except Exception as err: if isinstance(err, AgentUpgradeExitException): diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index d387164147..d91cbb8019 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -77,7 +77,7 @@ def __assert_agent_directories_exist_and_others_dont_exist(self, versions): def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Goal state incarnation_{0} is requesting a new agent version {1}'.format(inc, version) in kwarg['message'] and kwarg[ + 'discovered new agent version:{0} in agent manifest for goal state incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent requested version found. Got: {0}".format( diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index b3cf721580..4c58c850e8 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1593,7 +1593,7 @@ def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] # This will throw if corresponding message not found so not asserting on that requested_version_found = next(kwarg for kwarg in agent_msgs if - "Goal state incarnation_1 is requesting a new agent version 5.2.1.0, will update the agent before processing the goal state" in kwarg['message']) + "discovered new agent version:5.2.1.0 in agent manifest for goal state incarnation_1, will update the agent before processing the goal state" in kwarg['message']) self.assertTrue(requested_version_found['is_success'], "The requested version found op should be reported as a success") From 5b04dc8db2b8fe2b56dd56fe265072d0c937508b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 31 Oct 2023 11:58:49 -0700 Subject: [PATCH 092/240] Reduce the firewall check period in agent firewall tests (#2966) * reduce firewall check period * reduce firewall check period --- .../tests/scripts/agent_firewall-verify_all_firewall_rules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py index 3af6bf69e1..2ef8454fd2 100755 --- a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py +++ b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py @@ -39,7 +39,7 @@ WIRESERVER_ENDPOINT_FILE = '/var/lib/waagent/WireServerEndpoint' WIRESERVER_IP = '168.63.129.16' VERSIONS_PATH = '/?comp=versions' -FIREWALL_PERIOD = 60 +FIREWALL_PERIOD = 30 class FirewallRules(object): @@ -144,7 +144,7 @@ def check_all_iptables() -> bool: log.info("-----Verifying all ip table rules are present in rule set") # Agent will re-add rules within OS.EnableFirewallPeriod, So waiting that time + some buffer - found: bool = retry_if_false(check_all_iptables, attempts=2, delay=FIREWALL_PERIOD+30) + found: bool = retry_if_false(check_all_iptables, attempts=2, delay=FIREWALL_PERIOD+15) if not found: fail("IP table rules missing in rule set.\n Current iptable rules:\n {0}".format( From e39336828033f2c31c983ccc764049e299ca561a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 31 Oct 2023 12:58:56 -0700 Subject: [PATCH 093/240] undo get daemon version change (#2951) (#2967) * undo daemon change * pylint (cherry picked from commit fabe7e5843b796fcdddb303fd724d9d823d65727) --- azurelinuxagent/common/version.py | 16 ++-------------- tests/common/test_version.py | 15 +++++---------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index f85732da77..ff9c903b93 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -21,7 +21,6 @@ import sys import azurelinuxagent.common.conf as conf -from azurelinuxagent.common import logger import azurelinuxagent.common.utils.shellutil as shellutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.future import ustr, get_linux_distribution @@ -49,22 +48,11 @@ def get_daemon_version(): The value indicates the version of the daemon that started the current agent process or, if the current process is the daemon, the version of the current process. If the variable is not set (because the agent is < 2.2.53, or the process was not started by the daemon and - the process is not the daemon itself) the function returns version of agent which started by the python + the process is not the daemon itself) the function returns "0.0.0.0" """ if __DAEMON_VERSION_ENV_VARIABLE in os.environ: return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) - else: - # The agent process which execute the extensions can have different version(after upgrades) and importing version from that process may provide wrong version for daemon. - # so launching new process with sys.executable python provides the correct version for daemon which preinstalled in the image. - daemon_version = "0.0.0.0" - try: - cmd = ["{0}".format(sys.executable), "-c", "from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)"] - daemon_version = shellutil.run_command(cmd) - except Exception as e: # Make the best effort to get the daemon version, otherwise default to 0.0.0.0(unknown) - logger.info("Failed to get the daemon version. The error is: {0} \n[This error can be ignored since it has no impact on customer. So we return as unknown version: 0.0.0.0]", ustr(e)) - # set the daemon version to the environment variable to cache it for future calls. - set_daemon_version(daemon_version) - return FlexibleVersion(os.environ[__DAEMON_VERSION_ENV_VARIABLE]) + return FlexibleVersion("0.0.0.0") def get_f5_platform(): diff --git a/tests/common/test_version.py b/tests/common/test_version.py index 89156f65c1..156cdf1ab1 100644 --- a/tests/common/test_version.py +++ b/tests/common/test_version.py @@ -136,16 +136,11 @@ def test_get_daemon_version_should_return_the_version_that_was_previously_set(se finally: os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) - def test_get_daemon_version_from_fallback_when_the_version_has_not_been_set(self): - with patch("azurelinuxagent.common.utils.shellutil.run_command", return_value="2.3.53") as mock_run_command: - self.assertEqual( - FlexibleVersion("2.3.53"), get_daemon_version(), - "The daemon version should be defined. Environment={0}".format(os.environ) - ) - self.assertEqual(FlexibleVersion("2.3.53"), get_daemon_version(), "The daemon version should be 2.3.53") - self.assertEqual(1, mock_run_command.call_count, "The daemon version should be read from env value on second time") - - os.environ.pop(DAEMON_VERSION_ENV_VARIABLE) + def test_get_daemon_version_should_return_zero_when_the_version_has_not_been_set(self): + self.assertEqual( + FlexibleVersion("0.0.0.0"), get_daemon_version(), + "The daemon version should not be defined. Environment={0}".format(os.environ) + ) class TestCurrentAgentName(AgentTestCase): From dffa667d2167d4d0f001f8336d3f00176d7df4d6 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:32:28 -0700 Subject: [PATCH 094/240] disable agent update (#2953) (#2968) (cherry picked from commit 9b15b0486248090448cd69c46aebdd2b8f608694) --- tests_e2e/orchestrator/runbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index a076264036..6422bfb817 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_update, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" # # Parameters used to create test VMs From 9aecd68a4c94d10c8b0160c80ce8dfb864782387 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:25:27 -0700 Subject: [PATCH 095/240] Change agent_cgroups to own Vm (#2972) * Change cgroups to own Vm * Agent cgroups should own vm --- tests_e2e/test_suites/agent_cgroups.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests_e2e/test_suites/agent_cgroups.yml b/tests_e2e/test_suites/agent_cgroups.yml index 32a290c9ef..d6d1fc0f17 100644 --- a/tests_e2e/test_suites/agent_cgroups.yml +++ b/tests_e2e/test_suites/agent_cgroups.yml @@ -5,4 +5,5 @@ name: "AgentCgroups" tests: - "agent_cgroups/agent_cgroups.py" - "agent_cgroups/agent_cpu_quota.py" -images: "cgroups-endorsed" \ No newline at end of file +images: "cgroups-endorsed" +owns_vm: true \ No newline at end of file From ce5b8d6b8db3047066e50834575520cebd8a53af Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 3 Nov 2023 15:02:47 -0700 Subject: [PATCH 096/240] Check SSH connectivity during end-to-end tests (#2970) Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 9209b18f01..ca0243044b 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -17,6 +17,7 @@ import datetime import json import logging +import time import traceback import uuid @@ -58,7 +59,7 @@ from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log from tests_e2e.tests.lib.agent_log import AgentLogRecord from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient -from tests_e2e.tests.lib.shell import run_command +from tests_e2e.tests.lib.shell import run_command, CommandError from tests_e2e.tests.lib.ssh_client import SshClient @@ -398,6 +399,8 @@ def _setup_test_nodes(self) -> None: ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file)) + self._check_ssh_connectivity(ssh_client) + # # Cleanup the test node (useful for developer runs) # @@ -447,6 +450,26 @@ def _setup_test_nodes(self) -> None: log.info("Completed test node setup") + @staticmethod + def _check_ssh_connectivity(ssh_client: SshClient) -> None: + # We may be trying to connect to the test node while it is still booting. Execute a simple command to check that SSH is ready, + # and raise an exception if it is not after a few attempts. + max_attempts = 5 + for attempt in range(max_attempts): + try: + log.info("Checking SSH connectivity to the test node...") + ssh_client.run_command("echo 'SSH connectivity check'") + log.info("SSH is ready.") + break + except CommandError as error: + # Check for "System is booting up. Unprivileged users are not permitted to log in yet. Please come back later. For technical details, see pam_nologin(8)." + if "Unprivileged users are not permitted to log in yet" not in error.stderr: + raise + if attempt >= max_attempts - 1: + raise Exception(f"SSH connectivity check failed after {max_attempts} attempts, giving up [{error}]") + log.info("SSH is not ready [%s], will retry after a short delay.", error) + time.sleep(15) + def _collect_logs_from_test_nodes(self) -> None: """ Collects the test logs from the test nodes and copies them to the local machine From dfc4cc8a711a381c88e15b190bc14a4ce3995492 Mon Sep 17 00:00:00 2001 From: Zhidong Peng Date: Tue, 7 Nov 2023 08:16:00 -0800 Subject: [PATCH 097/240] Gathering Guest ProxyAgent Log Files (#2975) --- azurelinuxagent/ga/logcollector_manifests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azurelinuxagent/ga/logcollector_manifests.py b/azurelinuxagent/ga/logcollector_manifests.py index e77da3d47f..b22316be95 100644 --- a/azurelinuxagent/ga/logcollector_manifests.py +++ b/azurelinuxagent/ga/logcollector_manifests.py @@ -119,4 +119,8 @@ echo,### Gathering Disk Info ### diskinfo, + +echo,### Gathering Guest ProxyAgent Log Files ### +copy,/var/log/proxyagent/* +echo, """ From a539385a97915329169fd20905bdfcbaaa2ae9dc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 7 Nov 2023 09:44:53 -0800 Subject: [PATCH 098/240] Remove debug info from waagent.status.json (#2971) * Remove debug info from waagent.status.json * pylint warnings * pylint --------- Co-authored-by: narrieta --- azurelinuxagent/ga/exthandlers.py | 30 +------ azurelinuxagent/ga/update.py | 6 +- tests/ga/test_extension.py | 132 +++++++++++++----------------- tests/ga/test_update.py | 1 - 4 files changed, 62 insertions(+), 107 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 3575c3e6d2..a09a81e96c 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -53,8 +53,7 @@ from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION, \ - PY_VERSION_MAJOR, PY_VERSION_MICRO, PY_VERSION_MINOR +from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION _HANDLER_NAME_PATTERN = r'^([^-]+)' _HANDLER_VERSION_PATTERN = r'(\d+(?:\.\d+)*)' @@ -964,33 +963,6 @@ def report_ext_handlers_status(self, goal_state_changed=False, vm_agent_update_s message=msg) return None - def get_ext_handlers_status_debug_info(self, vm_status): - status_blob_text = self.protocol.get_status_blob_data() - if status_blob_text is None: - status_blob_text = "" - - support_multi_config = {} - vm_status_data = get_properties(vm_status) - vm_handler_statuses = vm_status_data.get('vmAgent', {}).get('extensionHandlers') - for handler_status in vm_handler_statuses: - if handler_status.get('name') is not None: - support_multi_config[handler_status.get('name')] = handler_status.get('supports_multi_config') - - debug_text = json.dumps({ - "agentName": AGENT_NAME, - "daemonVersion": str(version.get_daemon_version()), - "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], - "supportsMultiConfig": support_multi_config - }) - - return '''{{ - "__comment__": "The __status__ property is the actual status reported to CRP", - "__status__": {0}, - "__debug__": {1} -}} -'''.format(status_blob_text, debug_text) - def report_ext_handler_status(self, vm_status, ext_handler, goal_state_changed): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 147402709c..2862f8e906 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -617,8 +617,10 @@ def _report_status(self, exthandlers_handler, agent_update_handler): if vm_status is not None: self._report_extensions_summary(vm_status) if self._goal_state is not None: - agent_status = exthandlers_handler.get_ext_handlers_status_debug_info(vm_status) - self._goal_state.save_to_history(agent_status, AGENT_STATUS_FILE) + status_blob_text = exthandlers_handler.protocol.get_status_blob_data() + if status_blob_text is None: + status_blob_text = "{}" + self._goal_state.save_to_history(status_blob_text, AGENT_STATUS_FILE) if self._goal_state.extensions_goal_state.is_outdated: exthandlers_handler.protocol.client.get_host_plugin().clear_fast_track_state() diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index e3e365d9b6..62bd11099d 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -28,16 +28,14 @@ import unittest from azurelinuxagent.common import conf -from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_extensions, \ - get_agent_supported_features_list_for_crp +from azurelinuxagent.common.agent_supported_feature import get_agent_supported_features_list_for_crp from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.datacontract import get_properties from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.fileutil import read_file from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO, AGENT_NAME, \ - AGENT_VERSION +from azurelinuxagent.common.version import AGENT_VERSION from azurelinuxagent.common.exception import ResourceGoneError, ExtensionDownloadError, ProtocolError, \ ExtensionErrorCodes, ExtensionError, GoalStateAggregateStatusCodes from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, ExtHandlerStatus, \ @@ -3247,91 +3245,75 @@ def mock_http_put(url, *args, **_): ) expected_status = { - "__comment__": "The __status__ property is the actual status reported to CRP", - "__status__": { - "version": "1.1", - "timestampUTC": "1970-01-01T00:00:00Z", - "aggregateStatus": { - "guestAgentStatus": { - "version": AGENT_VERSION, + "version": "1.1", + "timestampUTC": "1970-01-01T00:00:00Z", + "aggregateStatus": { + "guestAgentStatus": { + "version": AGENT_VERSION, + "status": "Ready", + "formattedMessage": { + "lang": "en-US", + "message": "Guest Agent is running" + } + }, + "handlerAggregateStatus": [ + { + "handlerVersion": "1.0.0", + "handlerName": "OSTCExtensions.ExampleHandlerLinux", "status": "Ready", + "code": 0, + "useExactVersion": True, "formattedMessage": { "lang": "en-US", - "message": "Guest Agent is running" - } - }, - "handlerAggregateStatus": [ - { - "handlerVersion": "1.0.0", - "handlerName": "OSTCExtensions.ExampleHandlerLinux", - "status": "Ready", - "code": 0, - "useExactVersion": True, - "formattedMessage": { - "lang": "en-US", - "message": "Plugin enabled" - }, - "runtimeSettingsStatus": { - "settingsStatus": { - "status": { - "name": "OSTCExtensions.ExampleHandlerLinux", - "configurationAppliedTime": None, - "operation": None, - "status": "success", - "code": 0, - "formattedMessage": { - "lang": "en-US", - "message": None - } - }, - "version": 1.0, - "timestampUTC": "1970-01-01T00:00:00Z" + "message": "Plugin enabled" + }, + "runtimeSettingsStatus": { + "settingsStatus": { + "status": { + "name": "OSTCExtensions.ExampleHandlerLinux", + "configurationAppliedTime": None, + "operation": None, + "status": "success", + "code": 0, + "formattedMessage": { + "lang": "en-US", + "message": None + } }, - "sequenceNumber": 0 - } - } - ], - "vmArtifactsAggregateStatus": { - "goalStateAggregateStatus": { - "formattedMessage": { - "lang": "en-US", - "message": "GoalState executed successfully" + "version": 1.0, + "timestampUTC": "1970-01-01T00:00:00Z" }, - "timestampUTC": "1970-01-01T00:00:00Z", - "inSvdSeqNo": "1", - "status": "Success", - "code": 0 + "sequenceNumber": 0 } } - }, - "guestOSInfo": None, - "supportedFeatures": supported_features - }, - "__debug__": { - "agentName": AGENT_NAME, - "daemonVersion": "0.0.0.0", - "pythonVersion": "Python: {0}.{1}.{2}".format(PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO), - "extensionSupportedFeatures": [name for name, _ in get_agent_supported_features_list_for_extensions().items()], - "supportsMultiConfig": { - "OSTCExtensions.ExampleHandlerLinux": False + ], + "vmArtifactsAggregateStatus": { + "goalStateAggregateStatus": { + "formattedMessage": { + "lang": "en-US", + "message": "GoalState executed successfully" + }, + "timestampUTC": "1970-01-01T00:00:00Z", + "inSvdSeqNo": "1", + "status": "Success", + "code": 0 + } } - } + }, + "guestOSInfo": None, + "supportedFeatures": supported_features } - exthandlers_handler.run() - vm_status = exthandlers_handler.report_ext_handlers_status() - actual_status_json = json.loads(exthandlers_handler.get_ext_handlers_status_debug_info(vm_status)) + exthandlers_handler.report_ext_handlers_status() - # Don't compare the guestOSInfo - status_property = actual_status_json.get("__status__") - self.assertIsNotNone(status_property, "The status file is missing the __status__ property") - self.assertIsNotNone(status_property.get("guestOSInfo"), "The status file is missing the guestOSInfo property") - status_property["guestOSInfo"] = None + actual_status = json.loads(protocol.get_status_blob_data()) - actual_status_json.pop('guestOSInfo', None) + # Don't compare the guestOSInfo + self.assertIsNotNone(actual_status.get("guestOSInfo"), "The status file is missing the guestOSInfo property") + actual_status["guestOSInfo"] = None - self.assertEqual(expected_status, actual_status_json) + self.assertEqual(expected_status, actual_status) def test_it_should_process_extensions_only_if_allowed(self): def assert_extensions_called(exthandlers_handler, expected_call_count=0): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 4c58c850e8..49a4e0befa 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2198,7 +2198,6 @@ def create_vm_status(extension_status): exthandlers_handler.report_ext_handlers_status = Mock(return_value=create_vm_status(ExtensionStatusValue.success)) else: exthandlers_handler.report_ext_handlers_status = Mock(side_effect=[create_vm_status(s) for s in extension_statuses]) - exthandlers_handler.get_ext_handlers_status_debug_info = Mock(return_value='') yield exthandlers_handler From 5542f849723ae86170b5e5f46c11bcad57c400d2 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:27:24 -0800 Subject: [PATCH 099/240] Extension sequencing scenario (#2969) * update tests * cleanup * . * . * . * . * . * . * . * . * . * Add new test cases * Update scenario to support new tests * Scenario should support failing extensions and extensions with no settings * Clean up test * Remove locations from test suite yml * Fix deployment issue * Support creating multiple resource groups for vmss in one run * AzureMonitorLinuxAgent is not supported on flatcar * AzureMonitor is not supported on flatcar * remove agent update * Address PR comments * Fix issue with getting random ssh client * Address PR Comments * Address PR Comments * Address PR comments * Do not keep rg count in runbook * Use try/finally with lock * only check logs after scenario startS * Change to instance member --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 21 +- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/ext_sequencing.yml | 10 + .../ext_sequencing/ext_seq_test_cases.py | 318 ++++++++++++++++++ .../tests/ext_sequencing/ext_sequencing.py | 306 +++++++++++++++++ .../tests/lib/vm_extension_identifier.py | 3 +- .../ext_sequencing-get_ext_enable_time.py | 87 +++++ 7 files changed, 742 insertions(+), 5 deletions(-) create mode 100644 tests_e2e/test_suites/ext_sequencing.yml create mode 100644 tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py create mode 100644 tests_e2e/tests/ext_sequencing/ext_sequencing.py create mode 100755 tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index ca0243044b..c9e177fcc5 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -169,6 +169,14 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._create_scale_set: bool self._delete_scale_set: bool + # + # Test suites within the same runbook may be executed concurrently, and we need to keep track of how many resource + # groups are being created. We use this lock and counter to allow only 1 thread to increment the resource group + # count. + # + _rg_count_lock = RLock() + _rg_count = 0 + def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_working_path: str, lisa_log_path: str, lisa_log: Logger): """ Initializes the AgentTestSuite from the data passed as arguments by LISA. @@ -231,9 +239,16 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ if isinstance(environment.nodes[0], LocalNode): # We need to create a new VMSS. - # Use the same naming convention as LISA for the scale set name: lisa---e0-n0. Note that we hardcode the resource group - # id to "e0" and the scale set name to "n0" since we are creating a single scale set. - self._resource_group_name = f"lisa-{self._runbook_name}-{RUN_ID}-e0" + # Use the same naming convention as LISA for the scale set name: lisa---e-n0 + # Note that we hardcode the scale set name to "n0" since we are creating a single scale set. + # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase + # characters in its domain name label. + AgentTestSuite._rg_count_lock.acquire() + try: + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" + AgentTestSuite._rg_count += 1 + finally: + AgentTestSuite._rg_count_lock.release() self._vmss_name = f"{self._resource_group_name}-n0" self._test_nodes = [] # we'll fill this up when the scale set is created self._create_scale_set = True diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 6422bfb817..336d22cf67 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml new file mode 100644 index 0000000000..1976a85025 --- /dev/null +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -0,0 +1,10 @@ +# +# Adds extensions with multiple dependencies to VMSS using 'provisionAfterExtensions' property and validates they are +# enabled in order of dependencies. +# +name: "ExtSequencing" +tests: + - "ext_sequencing/ext_sequencing.py" +images: "endorsed" +# This scenario is executed on instances of a scaleset created by the agent test suite. +executes_on_scale_set: true \ No newline at end of file diff --git a/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py new file mode 100644 index 0000000000..d1c942d0af --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_seq_test_cases.py @@ -0,0 +1,318 @@ +def add_one_dependent_ext_without_settings(): + # Dependent extensions without settings should be enabled with dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_two_extensions_with_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": [], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_one_dependent_extension(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing a dependent + # extension + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_all_dependencies(): + # Checks that extensions are enabled after adding and removing dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_one_dependent_extension(): + # Checks that a valid enable sequence occurs when only one extension has dependencies + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["RunCommandLinux", "CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def add_single_dependencies(): + # Checks that extensions are enabled in the correct order when there is only one valid sequence and each extension + # has no more than one dependency + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": [], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "provisionAfterExtensions": ["AzureMonitorLinuxAgent"], + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + } + ] + + +def remove_all_dependent_extensions(): + # Checks that remaining extensions with dependencies are enabled in the correct order after removing all dependent + # extension + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + } + ] + + +def add_failing_dependent_extension_with_one_dependency(): + # This case tests that extensions dependent on a failing extensions are skipped, but extensions that are not + # dependent on the failing extension still get enabled + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True, + "settings": {} + } + }, + { + "name": "RunCommandLinux", + "properties": { + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "exit 1" + } + } + } + ] + + +def add_failing_dependent_extension_with_two_dependencies(): + # This case tests that all extensions dependent on a failing extensions are skipped + return [ + { + "name": "AzureMonitorLinuxAgent", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.Azure.Monitor", + "type": "AzureMonitorLinuxAgent", + "typeHandlerVersion": "1.5", + "autoUpgradeMinorVersion": True + } + }, + { + "name": "RunCommandLinux", + "properties": { + "provisionAfterExtensions": ["CustomScript"], + "publisher": "Microsoft.CPlat.Core", + "type": "RunCommandLinux", + "typeHandlerVersion": "1.0", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "date" + } + } + }, + { + "name": "CustomScript", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.1", + "autoUpgradeMinorVersion": True, + "settings": { + "commandToExecute": "exit 1" + } + } + } + ] diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py new file mode 100644 index 0000000000..3af9e64fe6 --- /dev/null +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test adds extensions with multiple dependencies to a VMSS using the 'provisionAfterExtensions' property and +# validates they are enabled in order of dependencies. +# +import copy +import re +import uuid +from datetime import datetime +from typing import List, Dict, Any + +from assertpy import fail +from azure.mgmt.compute.models import VirtualMachineScaleSetVMExtensionsSummary + +from tests_e2e.tests.ext_sequencing.ext_seq_test_cases import add_one_dependent_ext_without_settings, add_two_extensions_with_dependencies, \ + remove_one_dependent_extension, remove_all_dependencies, add_one_dependent_extension, \ + add_single_dependencies, remove_all_dependent_extensions, add_failing_dependent_extension_with_one_dependency, add_failing_dependent_extension_with_two_dependencies +from tests_e2e.tests.lib.agent_test import AgentVmssTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.virtual_machine_scale_set_client import VmssInstanceIpAddress +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient +from tests_e2e.tests.lib.ssh_client import SshClient + + +class ExtSequencing(AgentVmssTest): + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._scenario_start = datetime.min + + # Cases to test different dependency scenarios + _test_cases = [ + add_one_dependent_ext_without_settings, + add_two_extensions_with_dependencies, + # remove_one_dependent_extension should only be run after another test case which has RunCommandLinux in the + # model + remove_one_dependent_extension, + # remove_all_dependencies should only be run after another test case which has extension dependencies in the + # model + remove_all_dependencies, + add_one_dependent_extension, + add_single_dependencies, + # remove_all_dependent_extensions should only be run after another test case which has dependent extension in + # the model + remove_all_dependent_extensions, + add_failing_dependent_extension_with_one_dependency, + add_failing_dependent_extension_with_two_dependencies + ] + + @staticmethod + def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + dependency_map: Dict[str, Dict[str, Any]] = dict() + + for ext in extensions: + ext_name = ext['name'] + provisioned_after = ext['properties'].get('provisionAfterExtensions') + depends_on = provisioned_after if provisioned_after else [] + # We know an extension should fail if commandToExecute is exactly "exit 1" + ext_settings = ext['properties'].get("settings") + ext_command = ext['properties']['settings'].get("commandToExecute") if ext_settings else None + should_fail = ext_command == "exit 1" + dependency_map[ext_name] = {"should_fail": should_fail, "depends_on": depends_on} + + return dependency_map + + @staticmethod + def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensionsSummary], ssh_client: SshClient, test_case_start: datetime) -> List[str]: + # Using VmExtensionIds to get publisher for each ext to be used in remote script + extension_full_names = { + "AzureMonitorLinuxAgent": VmExtensionIds.AzureMonitorLinuxAgent, + "RunCommandLinux": VmExtensionIds.RunCommand, + "CustomScript": VmExtensionIds.CustomScript + } + enabled_times = [] + for ext in extensions: + # Only check extensions which succeeded provisioning + if "succeeded" in ext.statuses_summary[0].code: + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type '{extension_full_names[ext.name]}' --start_time '{str(test_case_start)}'", use_sudo=True) + enabled_times.append( + { + "name": ext.name, + "enabled_time": datetime.strptime(enabled_time.strip(), u'%Y-%m-%d %H:%M:%S') + } + ) + + # sort the extensions based on their enabled datetime + sorted_extensions = sorted(enabled_times, key=lambda ext_: ext_["enabled_time"]) + log.info("") + log.info("Extensions sorted by time they were enabled: {0}".format( + ', '.join(["{0}: {1}".format(ext["name"], ext["enabled_time"]) for ext in sorted_extensions]))) + sorted_extension_names = [ext["name"] for ext in sorted_extensions] + return sorted_extension_names + + @staticmethod + def _validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): + installed_ext = dict() + + # Iterate through the extensions in the enabled order and validate if their depending extensions are already + # enabled prior to that. + for ext in sorted_extension_names: + # Check if the depending extension are already installed + if ext not in dependency_map: + # There should not be any unexpected extensions on the scale set, even in the case we share the VMSS, + # because we update the scale set model with the extensions. Any extensions that are not in the scale + # set model would be disabled. + fail("Unwanted extension found in VMSS Instance view: {0}".format(ext)) + if dependency_map[ext] is not None: + dependencies = dependency_map[ext].get('depends_on') + for dep in dependencies: + if installed_ext.get(dep) is None: + # The depending extension is not installed prior to the current extension + if relax_check: + log.info("{0} is not installed prior to {1}".format(dep, ext)) + else: + fail("{0} is not installed prior to {1}".format(dep, ext)) + + # Mark the current extension as installed + installed_ext[ext] = ext + + # Validate that only extensions expected to fail, and their dependent extensions, failed + for ext, details in dependency_map.items(): + failing_ext_dependencies = [dep for dep in details['depends_on'] if dependency_map[dep]['should_fail']] + if ext not in installed_ext: + if details['should_fail']: + log.info("Extension {0} failed as expected".format(ext)) + elif failing_ext_dependencies: + log.info("Extension {0} failed as expected because it is dependent on {1}".format(ext, ' and '.join(failing_ext_dependencies))) + else: + fail("{0} unexpectedly failed. Only extensions that are expected to fail or depend on a failing extension should fail".format(ext)) + + log.info("Validated extension sequencing") + + def run(self): + instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address() + ssh_clients: Dict[str, SshClient] = dict() + for instance in instances_ip_address: + ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) + + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("uname -a")): + raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") + + # This is the base ARM template that's used for deploying extensions for this scenario + base_extension_template = { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json", + "contentVersion": "1.0.0.0", + "resources": [ + { + "type": "Microsoft.Compute/virtualMachineScaleSets", + "name": f"{self._context.vmss.name}", + "location": "[resourceGroup().location]", + "apiVersion": "2018-06-01", + "properties": { + "virtualMachineProfile": { + "extensionProfile": { + "extensions": [] + } + } + } + } + ] + } + + for case in self._test_cases: + test_case_start = datetime.now() + if self._scenario_start == datetime.min: + self._scenario_start = test_case_start + + # Assign unique guid to forceUpdateTag for each extension to make sure they're always unique to force CRP + # to generate a new sequence number each time + test_guid = str(uuid.uuid4()) + extensions = case() + for ext in extensions: + ext["properties"].update({ + "forceUpdateTag": test_guid + }) + + # We update the extension template here with extensions that are specific to the scenario that we want to + # test out + log.info("") + log.info("Test case: {0}".format(case.__name__.replace('_', ' '))) + ext_template = copy.deepcopy(base_extension_template) + ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ + 'extensions'] = extensions + + # Log the dependency map for the extensions in this test case + dependency_map = self._get_dependency_map(extensions) + log.info("") + log.info("The dependency map of the extensions for this test case is:") + for ext, details in dependency_map.items(): + dependencies = details.get('depends_on') + dependency_list = "-" if not dependencies else ' and '.join(dependencies) + log.info("{0} depends on {1}".format(ext, dependency_list)) + + # Deploy updated extension template to the scale set. + log.info("") + log.info("Deploying extensions with the above dependencies to the scale set...") + rg_client = ResourceGroupClient(self._context.vmss.cloud, self._context.vmss.subscription, + self._context.vmss.resource_group, self._context.vmss.location) + try: + rg_client.deploy_template(template=ext_template) + except Exception as e: + # We only expect to catch an exception during deployment if we are forcing one of the extensions to + # fail. We know an extension should fail if "failing" is in the case name. Otherwise, report the + # failure. + deployment_failure_pattern = r"[\s\S]*\"details\": [\s\S]* \"code\": \"(?P.*)\"[\s\S]* \"message\": \"(?P.*)\"[\s\S]*" + msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM. Please see the VM extension instance view for other failures. The first extension failed due to the error: VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." + deployment_failure_match = re.match(deployment_failure_pattern, str(e)) + if "failing" not in case.__name__: + fail("Extension template deployment unexpectedly failed: {0}".format(e)) + elif not deployment_failure_match or deployment_failure_match.group("code") != "VMExtensionProvisioningError" or not re.match(msg_pattern, deployment_failure_match.group("msg")): + fail("Extension template deployment failed as expected, but with an unexpected error: {0}".format(e)) + + # Get the extensions on the VMSS from the instance view + log.info("") + instance_view_extensions = self._context.vmss.get_instance_view().extensions + + # Validate that the extensions were enabled in the correct order on each instance of the scale set + for instance_name, ssh_client in ssh_clients.items(): + log.info("") + log.info("Validate extension sequencing on {0}:{1}...".format(instance_name, ssh_client.ip_address)) + + # Sort the VM extensions by the time they were enabled + sorted_extension_names = self._get_sorted_extension_names(instance_view_extensions, ssh_client, test_case_start) + + # Validate that the extensions were enabled in the correct order. We relax this check if no settings + # are provided for a dependent extension, since the guest agent currently ignores dependencies in this + # case. + relax_check = True if "settings" in case.__name__ else False + self._validate_extension_sequencing(dependency_map, sorted_extension_names, relax_check) + + log.info("------") + + def get_ignore_errors_before_timestamp(self) -> datetime: + # Ignore errors in the agent log before the first test case starts + return self._scenario_start + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # WARNING ExtHandler ExtHandler Missing dependsOnExtension on extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent + # This message appears when an extension doesn't depend on another extension + # + { + 'message': r"Missing dependsOnExtension on extension .*" + }, + # + # WARNING ExtHandler ExtHandler Extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent does not have any settings. Will ignore dependency (dependency level: 1) + # We currently ignore dependencies for extensions without settings + # + { + 'message': r"Extension .* does not have any settings\. Will ignore dependency \(dependency level: \d\)" + }, + # + # 2023-10-31T17:46:59.675959Z WARNING ExtHandler ExtHandler Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Dependent extension Microsoft.Azure.Extensions.CustomScript failed or timed out, will skip processing the rest of the extensions" + }, + # + # 2023-10-31T17:48:13.349214Z ERROR ExtHandler ExtHandler Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0 + # We intentionally make CustomScript fail to test that dependent extensions are skipped + # + { + 'message': r"Event: name=Microsoft.Azure.Extensions.CustomScript, op=ExtensionProcessing, message=Dependent Extension Microsoft.Azure.Extensions.CustomScript did not succeed. Status was error, duration=0" + }, + # + # 2023-10-31T17:47:07.689083Z WARNING ExtHandler ExtHandler [PERIODIC] This status is being reported by the Guest Agent since no status file was reported by extension Microsoft.Azure.Monitor.AzureMonitorLinuxAgent: [ExtensionStatusError] Status file /var/lib/waagent/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11/status/6.status does not exist + # We expect extensions that are dependent on a failing extension to not report status + # + { + 'message': r"\[PERIODIC\] This status is being reported by the Guest Agent since no status file was reported by extension .*: \[ExtensionStatusError\] Status file \/var\/lib\/waagent\/.*\/status\/\d.status does not exist" + }, + # + # 2023-10-31T17:48:11.306835Z WARNING ExtHandler ExtHandler A new goal state was received, but not all the extensions in the previous goal state have completed: [('Microsoft.Azure.Extensions.CustomScript', 'error'), ('Microsoft.Azure.Monitor.AzureMonitorLinuxAgent', 'transitioning'), ('Microsoft.CPlat.Core.RunCommandLinux', 'success')] + # This message appears when the previous test scenario had failing extensions due to extension dependencies + # + { + 'message': r"A new goal state was received, but not all the extensions in the previous goal state have completed: \[(\(u?'.*', u?'(error|transitioning|success)'\),?)+\]" + } + ] + return ignore_rules + + +if __name__ == "__main__": + ExtSequencing.run_from_command_line() diff --git a/tests_e2e/tests/lib/vm_extension_identifier.py b/tests_e2e/tests/lib/vm_extension_identifier.py index 26113e445c..fa304cb766 100644 --- a/tests_e2e/tests/lib/vm_extension_identifier.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -33,7 +33,8 @@ def __init__(self, publisher: str, ext_type: str, version: str): self.version: str = version unsupported_distros: Dict[str, List[str]] = { - "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"] + "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"], + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar"] } def supports_distro(self, system_info: str) -> bool: diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py new file mode 100755 index 0000000000..b9b2c66cb5 --- /dev/null +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -0,0 +1,87 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Gets the timestamp for when the provided extension was enabled +# + +import argparse +import re +import sys + +from datetime import datetime +from pathlib import Path + + +def main(): + """ + Returns the timestamp of when the provided extension was enabled + """ + parser = argparse.ArgumentParser() + parser.add_argument("--ext_type", dest='ext_type', required=True) + parser.add_argument("--start_time", dest='start_time', required=True) + args, _ = parser.parse_known_args() + + # Extension enabled time is in extension CommandExecution.log + command_exec_log_path = Path('/var/log/azure/' + args.ext_type + '/CommandExecution.log') + command_exec_log = open(command_exec_log_path, 'r') + enabled_match = None + for line in command_exec_log.readlines(): + line = line.rstrip() + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + # AMA logs enable succeeded and its timestamp to the command execution log: + # 2023-11-01T23:22:53.124603Z INFO ExtHandler [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11] Command: ./shim.sh -enable + # [stdout] + # 2023/09/26 04:07:33 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.5] Enable,success,0,Enable succeeded + enable_pattern = r'.*(?P\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) \[Microsoft\.Azure\.Monitor\.AzureMonitorLinuxAgent\-.*] .*Enable succeeded.*' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + else: + # For RC and CSE, we can determine when enable succeeded from the stdout of the enable command execution from + # the command execution log: + # 2023-09-26T04:07:39.042948Z INFO ExtHandler [Microsoft.CPlat.Core.RunCommandLinux-1.0.5] Command: bin/run-command-shim enable + # [stdout] + # ... + # time=2023-09-26T04:07:37Z version=v1.0.4/git@b3be41d-dirty operation=enable seq=0 event=enabledevent=enabled + enable_pattern = r'time=(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*event=enabled' + match = re.match(enable_pattern, line) + if match: + enabled_match = match + + if not enabled_match: + # Try to get enabled time from extension command execution logs + print("Agent log does not show extension was enabled", file=sys.stderr) + sys.exit(1) + + if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S') + else: + enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ') + + start_time = datetime.strptime(args.start_time, u'%Y-%m-%d %H:%M:%S.%f') + if enable_time < start_time: + print("Agent log does not show extension was enabled after this test case started. Last enabled time was {0}. This test case started at {1}".format(enable_time, start_time), file=sys.stderr) + sys.exit(1) + else: + print(enable_time) + + sys.exit(0) + + +if __name__ == "__main__": + main() From 9324a894df27f51f12100b75ba45cba1b886fdb2 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 8 Nov 2023 11:30:15 -0800 Subject: [PATCH 100/240] rename log file for agent publish scenario (#2956) * rename log file * add param * address comment --- tests_e2e/tests/agent_publish/agent_publish.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index 0736a8fe8b..617d25271c 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -61,9 +61,9 @@ def _get_agent_info(self) -> None: log.info('Agent info \n%s', stdout) def _prepare_agent(self) -> None: - log.info("Modifying agent update related config flags") - self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y", use_sudo=True) - log.info('Updated agent-update DownloadNewAgents GAFamily config flags') + log.info("Modifying agent update related config flags and renaming the log file") + self._run_remote_test(self._ssh_client, "sh -c 'agent-service stop && mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log && update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y'", use_sudo=True) + log.info('Renamed log file and updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: log.info("Verifying for agent update status") From 8f3ee961c2524b2c4e1a54f5823b0891399277f9 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 8 Nov 2023 12:48:34 -0800 Subject: [PATCH 101/240] Fix name collisions on resource groups created by AgentTestSuite (#2981) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_suite.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index c9e177fcc5..bed6210763 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -239,13 +239,14 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ if isinstance(environment.nodes[0], LocalNode): # We need to create a new VMSS. - # Use the same naming convention as LISA for the scale set name: lisa---e-n0 - # Note that we hardcode the scale set name to "n0" since we are creating a single scale set. - # Resource group name cannot have any uppercase characters, because the publicIP cannot have uppercase - # characters in its domain name label. + # Use the same naming convention as LISA for the scale set name: lisa----n0, + # except that, for the "rg_name", LISA uses "e" as prefix (e.g. "e0", "e1", etc.), while we use "w" (for + # WALinuxAgent, e.g. "w0", "w1", etc.) to avoid name collisions. Also, note that we hardcode the scale set name + # to "n0" since we are creating a single scale set. Lastly, the resource group name cannot have any uppercase + # characters, because the publicIP cannot have uppercase characters in its domain name label. AgentTestSuite._rg_count_lock.acquire() try: - self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-e{AgentTestSuite._rg_count}" + self._resource_group_name = f"lisa-{self._runbook_name.lower()}-{RUN_ID}-w{AgentTestSuite._rg_count}" AgentTestSuite._rg_count += 1 finally: AgentTestSuite._rg_count_lock.release() From d10cdff5d1a8979875fe1c3e202bd052818218d6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 8 Nov 2023 15:23:20 -0800 Subject: [PATCH 102/240] Save goal state history explicitly (#2977) * Save goal state explicitly * typo * remove default value in internal method --------- Co-authored-by: narrieta --- azurelinuxagent/common/protocol/goal_state.py | 40 ++++++++++++------- azurelinuxagent/common/protocol/util.py | 8 ++-- azurelinuxagent/common/protocol/wire.py | 14 +++---- azurelinuxagent/ga/update.py | 4 +- tests/common/protocol/test_goal_state.py | 18 +++++++-- tests/common/protocol/test_protocol_util.py | 4 +- tests/ga/test_update.py | 6 +-- tests/lib/mock_wire_protocol.py | 6 ++- 8 files changed, 61 insertions(+), 39 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 1b4bcea829..2eb89c1ebb 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -70,7 +70,7 @@ def __init__(self, msg, inner=None): class GoalState(object): - def __init__(self, wire_client, goal_state_properties=GoalStateProperties.All, silent=False): + def __init__(self, wire_client, goal_state_properties=GoalStateProperties.All, silent=False, save_to_history=False): """ Fetches the goal state using the given wire client. @@ -84,6 +84,7 @@ def __init__(self, wire_client, goal_state_properties=GoalStateProperties.All, s try: self._wire_client = wire_client self._history = None + self._save_to_history = save_to_history self._extensions_goal_state = None # populated from vmSettings or extensionsConfig self._goal_state_properties = goal_state_properties self.logger = logger.Logger(logger.DEFAULT_LOGGER) @@ -186,7 +187,8 @@ def _fetch_manifest(self, manifest_type, name, uris): try: is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track) - self._history.save_manifest(name, xml_text) + if self._save_to_history: + self._history.save_manifest(name, xml_text) return ExtensionManifest(xml_text) except Exception as e: raise ProtocolError("Failed to retrieve {0} manifest. Error: {1}".format(manifest_type, ustr(e))) @@ -263,11 +265,12 @@ def _update(self, force_update): # Start a new history subdirectory and capture the updated goal state tag = "{0}".format(incarnation) if vm_settings is None else "{0}-{1}".format(incarnation, vm_settings.etag) - self._history = GoalStateHistory(timestamp, tag) - if goal_state_updated: - self._history.save_goal_state(xml_text) - if vm_settings_updated: - self._history.save_vm_settings(vm_settings.get_redacted_text()) + if self._save_to_history: + self._history = GoalStateHistory(timestamp, tag) + if goal_state_updated: + self._history.save_goal_state(xml_text) + if vm_settings_updated: + self._history.save_vm_settings(vm_settings.get_redacted_text()) # # Continue fetching the rest of the goal state @@ -324,7 +327,8 @@ def _download_certificates(self, certs_uri): if len(certs.warnings) > 0: self.logger.warn(certs.warnings) add_event(op=WALAEventOperation.GoalState, message=certs.warnings) - self._history.save_certificates(json.dumps(certs.summary)) + if self._save_to_history: + self._history.save_certificates(json.dumps(certs.summary)) return certs def _check_and_download_missing_certs_on_disk(self): @@ -357,8 +361,9 @@ def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_set msg = 'The HGAP stopped supporting vmSettings; will fetched the goal state from the WireServer.' self.logger.info(msg) add_event(op=WALAEventOperation.VmSettings, message=msg) - self._history = GoalStateHistory(datetime.datetime.utcnow(), incarnation) - self._history.save_goal_state(xml_text) + if self._save_to_history: + self._history = GoalStateHistory(datetime.datetime.utcnow(), incarnation) + self._history.save_goal_state(xml_text) self._extensions_goal_state = self._fetch_full_wire_server_goal_state(incarnation, xml_doc) if self._extensions_goal_state.created_on_timestamp < vm_settings_support_stopped_error.timestamp: self._extensions_goal_state.is_outdated = True @@ -368,7 +373,8 @@ def _restore_wire_server_goal_state(self, incarnation, xml_text, xml_doc, vm_set add_event(op=WALAEventOperation.VmSettings, message=msg) def save_to_history(self, data, file_name): - self._history.save(data, file_name) + if self._save_to_history: + self._history.save(data, file_name) @staticmethod def _fetch_goal_state(wire_client): @@ -463,21 +469,24 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): else: xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header()) extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(incarnation, xml_text, self._wire_client) - self._history.save_extensions_config(extensions_config.get_redacted_text()) + if self._save_to_history: + self._history.save_extensions_config(extensions_config.get_redacted_text()) hosting_env = None if GoalStateProperties.HostingEnv & self._goal_state_properties: hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig") xml_text = self._wire_client.fetch_config(hosting_env_uri, self._wire_client.get_header()) hosting_env = HostingEnv(xml_text) - self._history.save_hosting_env(xml_text) + if self._save_to_history: + self._history.save_hosting_env(xml_text) shared_config = None if GoalStateProperties.SharedConfig & self._goal_state_properties: shared_conf_uri = findtext(xml_doc, "SharedConfig") xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header()) shared_config = SharedConfig(xml_text) - self._history.save_shared_conf(xml_text) + if self._save_to_history: + self._history.save_shared_conf(xml_text) # SharedConfig.xml is used by other components (Azsec and Singularity/HPC Infiniband), so save it to the agent's root directory as well shared_config_file = os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME) try: @@ -496,7 +505,8 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): if remote_access_uri is not None: xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert()) remote_access = RemoteAccess(xml_text) - self._history.save_remote_access(xml_text) + if self._save_to_history: + self._history.save_remote_access(xml_text) self._incarnation = incarnation self._role_instance_id = role_instance_id diff --git a/azurelinuxagent/common/protocol/util.py b/azurelinuxagent/common/protocol/util.py index 7d7f901681..b90e9542c3 100644 --- a/azurelinuxagent/common/protocol/util.py +++ b/azurelinuxagent/common/protocol/util.py @@ -188,7 +188,7 @@ def _clear_wireserver_endpoint(self): return logger.error("Failed to clear wiresever endpoint: {0}", e) - def _detect_protocol(self, init_goal_state=True): + def _detect_protocol(self, save_to_history, init_goal_state=True): """ Probe protocol endpoints in turn. """ @@ -217,7 +217,7 @@ def _detect_protocol(self, init_goal_state=True): try: protocol = WireProtocol(endpoint) - protocol.detect(init_goal_state=init_goal_state) + protocol.detect(init_goal_state=init_goal_state, save_to_history=save_to_history) self._set_wireserver_endpoint(endpoint) return protocol @@ -268,7 +268,7 @@ def clear_protocol(self): finally: self._lock.release() - def get_protocol(self, init_goal_state=True): + def get_protocol(self, init_goal_state=True, save_to_history=False): """ Detect protocol by endpoint. :returns: protocol instance @@ -296,7 +296,7 @@ def get_protocol(self, init_goal_state=True): logger.info("Detect protocol endpoint") - protocol = self._detect_protocol(init_goal_state=init_goal_state) + protocol = self._detect_protocol(save_to_history=save_to_history, init_goal_state=init_goal_state) IOErrorCounter.set_protocol_endpoint(endpoint=protocol.get_endpoint()) self._save_protocol(WIRE_PROTOCOL_NAME) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index ea397f0497..c93624cb1b 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -73,7 +73,7 @@ def __init__(self, endpoint): raise ProtocolError("WireProtocol endpoint is None") self.client = WireClient(endpoint) - def detect(self, init_goal_state=True): + def detect(self, init_goal_state=True, save_to_history=False): self.client.check_wire_protocol_version() trans_prv_file = os.path.join(conf.get_lib_dir(), @@ -86,7 +86,7 @@ def detect(self, init_goal_state=True): # Initialize the goal state, including all the inner properties if init_goal_state: logger.info('Initializing goal state during protocol detection') - self.client.reset_goal_state() + self.client.reset_goal_state(save_to_history=save_to_history) def update_host_plugin_from_goal_state(self): self.client.update_host_plugin_from_goal_state() @@ -777,13 +777,13 @@ def update_host_plugin(self, container_id, role_config_name): self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def update_goal_state(self, silent=False): + def update_goal_state(self, silent=False, save_to_history=False): """ Updates the goal state if the incarnation or etag changed """ try: if self._goal_state is None: - self._goal_state = GoalState(self, silent=silent) + self._goal_state = GoalState(self, silent=silent, save_to_history=save_to_history) else: self._goal_state.update(silent=silent) @@ -792,7 +792,7 @@ def update_goal_state(self, silent=False): except Exception as exception: raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception))) - def reset_goal_state(self, goal_state_properties=GoalStateProperties.All, silent=False): + def reset_goal_state(self, goal_state_properties=GoalStateProperties.All, silent=False, save_to_history=False): """ Resets the goal state """ @@ -800,7 +800,7 @@ def reset_goal_state(self, goal_state_properties=GoalStateProperties.All, silent if not silent: logger.info("Forcing an update of the goal state.") - self._goal_state = GoalState(self, goal_state_properties=goal_state_properties, silent=silent) + self._goal_state = GoalState(self, goal_state_properties=goal_state_properties, silent=silent, save_to_history=save_to_history) except ProtocolError: raise @@ -936,7 +936,7 @@ def upload_status_blob(self): if extensions_goal_state.status_upload_blob is None: # the status upload blob is in ExtensionsConfig so force a full goal state refresh - self.reset_goal_state(silent=True) + self.reset_goal_state(silent=True, save_to_history=True) extensions_goal_state = self.get_goal_state().extensions_goal_state if extensions_goal_state.status_upload_blob is None: diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 2862f8e906..6e7b5b917c 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -339,7 +339,7 @@ def run(self, debug=False): # Initialize the goal state; some components depend on information provided by the goal state and this # call ensures the required info is initialized (e.g. telemetry depends on the container ID.) # - protocol = self.protocol_util.get_protocol() + protocol = self.protocol_util.get_protocol(save_to_history=True) self._initialize_goal_state(protocol) @@ -503,7 +503,7 @@ def _try_update_goal_state(self, protocol): try: max_errors_to_log = 3 - protocol.client.update_goal_state(silent=self._update_goal_state_error_count >= max_errors_to_log) + protocol.client.update_goal_state(silent=self._update_goal_state_error_count >= max_errors_to_log, save_to_history=True) self._goal_state = protocol.get_goal_state() diff --git a/tests/common/protocol/test_goal_state.py b/tests/common/protocol/test_goal_state.py index c33851e9fb..5b4a2948af 100644 --- a/tests/common/protocol/test_goal_state.py +++ b/tests/common/protocol/test_goal_state.py @@ -112,14 +112,18 @@ def test_fetching_the_goal_state_should_save_the_goal_state_to_the_history_direc protocol.mock_wire_data.set_incarnation(999) protocol.mock_wire_data.set_etag(888) - _ = GoalState(protocol.client) + _ = GoalState(protocol.client, save_to_history=True) self._assert_directory_contents( self._find_history_subdirectory("999-888"), ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "Certificates.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) + @staticmethod + def _get_history_directory(): + return os.path.join(conf.get_lib_dir(), ARCHIVE_DIRECTORY_NAME) + def _find_history_subdirectory(self, tag): - matches = glob.glob(os.path.join(self.tmp_dir, ARCHIVE_DIRECTORY_NAME, "*_{0}".format(tag))) + matches = glob.glob(os.path.join(self._get_history_directory(), "*_{0}".format(tag))) self.assertTrue(len(matches) == 1, "Expected one history directory for tag {0}. Got: {1}".format(tag, matches)) return matches[0] @@ -136,7 +140,7 @@ def test_update_should_create_new_history_subdirectories(self): protocol.mock_wire_data.set_incarnation(123) protocol.mock_wire_data.set_etag(654) - goal_state = GoalState(protocol.client) + goal_state = GoalState(protocol.client, save_to_history=True) self._assert_directory_contents( self._find_history_subdirectory("123-654"), ["GoalState.xml", "ExtensionsConfig.xml", "VmSettings.json", "Certificates.json", "SharedConfig.xml", "HostingEnvironmentConfig.xml"]) @@ -164,7 +168,7 @@ def test_it_should_redact_the_protected_settings_when_saving_to_the_history_dire protocol.mock_wire_data.set_incarnation(888) protocol.mock_wire_data.set_etag(888) - goal_state = GoalState(protocol.client) + goal_state = GoalState(protocol.client, save_to_history=True) extensions_goal_state = goal_state.extensions_goal_state protected_settings = [] @@ -221,6 +225,12 @@ def test_it_should_save_vm_settings_on_parse_errors(self): self.assertEqual(expected, actual, "The vmSettings were not saved correctly") + def test_should_not_save_to_the_history_by_default(self): + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: + _ = GoalState(protocol.client) # omit the save_to_history parameter + history = self._get_history_directory() + self.assertFalse(os.path.exists(history), "The history directory not should have been created") + @staticmethod @contextlib.contextmanager def _create_protocol_ws_and_hgap_in_sync(): diff --git a/tests/common/protocol/test_protocol_util.py b/tests/common/protocol/test_protocol_util.py index 06683cef25..b60ca9af95 100644 --- a/tests/common/protocol/test_protocol_util.py +++ b/tests/common/protocol/test_protocol_util.py @@ -127,14 +127,14 @@ def test_detect_protocol_no_dhcp(self, WireProtocol, mock_get_lib_dir, _): endpoint_file = protocol_util._get_wireserver_endpoint_file_path() # pylint: disable=unused-variable # Test wire protocol when no endpoint file has been written - protocol_util._detect_protocol() + protocol_util._detect_protocol(save_to_history=False) self.assertEqual(KNOWN_WIRESERVER_IP, protocol_util.get_wireserver_endpoint()) # Test wire protocol on dhcp failure protocol_util.osutil.is_dhcp_available.return_value = True protocol_util.dhcp_handler.run.side_effect = DhcpError() - self.assertRaises(ProtocolError, protocol_util._detect_protocol) + self.assertRaises(ProtocolError, lambda: protocol_util._detect_protocol(save_to_history=False)) @patch("azurelinuxagent.common.protocol.util.WireProtocol") def test_get_protocol(self, WireProtocol, _): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 49a4e0befa..8bac67746c 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2176,7 +2176,7 @@ def _create_update_handler(): @contextlib.contextmanager -def _mock_exthandlers_handler(extension_statuses=None): +def _mock_exthandlers_handler(extension_statuses=None, save_to_history=False): """ Creates an ExtHandlersHandler that doesn't actually handle any extensions, but that returns status for 1 extension. The returned ExtHandlersHandler uses a mock WireProtocol, and both the run() and report_ext_handlers_status() are @@ -2191,7 +2191,7 @@ def create_vm_status(extension_status): vm_status.vmAgent.extensionHandlers[0].extension_status.status = extension_status return vm_status - with mock_wire_protocol(DATA_FILE) as protocol: + with mock_wire_protocol(DATA_FILE, save_to_history=save_to_history) as protocol: exthandlers_handler = ExtHandlersHandler(protocol) exthandlers_handler.run = Mock() if extension_statuses is None: @@ -2237,7 +2237,7 @@ def test_it_should_process_goal_state_only_on_new_goal_state(self): self.assertEqual(3, agent_update_handler.run.call_count, "agent_update_handler.run() should have been called on the new goal state") def test_it_should_write_the_agent_status_to_the_history_folder(self): - with _mock_exthandlers_handler() as exthandlers_handler: + with _mock_exthandlers_handler(save_to_history=True) as exthandlers_handler: update_handler = _create_update_handler() remote_access_handler = Mock() remote_access_handler.run = Mock() diff --git a/tests/lib/mock_wire_protocol.py b/tests/lib/mock_wire_protocol.py index 4e3521fef2..78cbc59e2e 100644 --- a/tests/lib/mock_wire_protocol.py +++ b/tests/lib/mock_wire_protocol.py @@ -22,7 +22,7 @@ @contextlib.contextmanager -def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_handler=None, http_put_handler=None, do_not_mock=lambda method, url: False, fail_on_unknown_request=True): +def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_handler=None, http_put_handler=None, do_not_mock=lambda method, url: False, fail_on_unknown_request=True, save_to_history=False): """ Creates a WireProtocol object that handles requests to the WireServer, the Host GA Plugin, and some requests to storage (requests that provide mock data in wire_protocol_data.py). @@ -38,6 +38,8 @@ def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_han The 'do_not_mock' lambda can be used to skip the mocks for specific requests; if the lambda returns True, the mocks won't be applied and the original common.utils.restutil.http_request will be invoked instead. + The 'save_to_history' parameter is passed thru in the call to WireProtocol.detect(). + The returned protocol object maintains a list of "tracked" urls. When a handler function returns a value than is not None the url for the request is automatically added to the tracked list. The handler function can add other items to this list using the track_url() method on the mock. @@ -147,7 +149,7 @@ def stop(): # go do it try: protocol.start() - protocol.detect() + protocol.detect(save_to_history=save_to_history) yield protocol finally: protocol.stop() From 584059475fec5ddcd5784f352907e1ab52c99dda Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 10 Nov 2023 10:55:48 -0800 Subject: [PATCH 103/240] Handle errors when adding logs to the archive (#2982) Co-authored-by: narrieta --- azurelinuxagent/ga/logcollector.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/ga/logcollector.py b/azurelinuxagent/ga/logcollector.py index 393dd3c2ef..e21b1f51f1 100644 --- a/azurelinuxagent/ga/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -360,9 +360,18 @@ def collect_logs_and_get_archive(self): try: compressed_archive = zipfile.ZipFile(COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED) + max_errors = 8 + error_count = 0 for file_to_collect in files_to_collect: - archive_file_name = LogCollector._convert_file_name_to_archive_name(file_to_collect) - compressed_archive.write(file_to_collect.encode("utf-8"), arcname=archive_file_name) + try: + archive_file_name = LogCollector._convert_file_name_to_archive_name(file_to_collect) + compressed_archive.write(file_to_collect.encode("utf-8"), arcname=archive_file_name) + except Exception as e: + error_count += 1 + if error_count >= max_errors: + raise Exception("Too many errors, giving up. Last error: {0}".format(ustr(e))) + else: + _LOGGER.warning("Failed to add file %s to the archive: %s", file_to_collect, ustr(e)) compressed_archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH) _LOGGER.info("Successfully compressed files. Compressed archive size is %s b", compressed_archive_size) From 5ea6b350abc3edd308b36e1226197f0ab31b00f8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 13 Nov 2023 09:49:22 -0800 Subject: [PATCH 104/240] Timing issue while checking cpu quota (#2976) * timing issue * fix pylint" * undo --- .gitignore | 2 +- .../tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fd64d3314e..79226a4920 100644 --- a/.gitignore +++ b/.gitignore @@ -90,4 +90,4 @@ ENV/ # pyenv .python-version -.vscode/ +.vscode/ \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py index 63871b43a6..c8aad49f59 100755 --- a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py +++ b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py @@ -169,7 +169,8 @@ def verify_process_check_on_agent_cgroups(): # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s'] wait_for_log_message( "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes") - if not check_agent_quota_disabled(): + disabled: bool = retry_if_false(check_agent_quota_disabled) + if not disabled: fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) @@ -199,7 +200,8 @@ def verify_throttling_time_check_on_agent_cgroups(): timeout=datetime.timedelta(minutes=10)) wait_for_log_message("Stopped tracking cgroup walinuxagent.service", timeout=datetime.timedelta(minutes=10)) wait_for_log_message("Executing systemctl daemon-reload...", timeout=datetime.timedelta(minutes=5)) - if not check_agent_quota_disabled(): + disabled: bool = retry_if_false(check_agent_quota_disabled) + if not disabled: fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) From 029fdb14ea84fa7fa416d5ecf140491da0c4f8cc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 16 Nov 2023 14:11:22 -0800 Subject: [PATCH 105/240] Use case-insentive match when cleaning up test resource groups (#2986) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline-cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index 961778cf68..c679d29724 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -51,6 +51,6 @@ steps: --output json \ --query value \ | jq --arg date "$date" '.[] | select (.createdTime < $date).name' \ - | grep '${{ parameters.name_pattern }}' \ + | grep -i '${{ parameters.name_pattern }}' \ | xargs -l -t -r az group delete --no-wait -y -n \ || echo "No resource groups found to delete" From ce17d9ba182e32b8321f38b36c7c77f4d1047349 Mon Sep 17 00:00:00 2001 From: d1r3ct0r Date: Mon, 20 Nov 2023 19:25:26 +0300 Subject: [PATCH 106/240] Update supported Ubuntu versions (#2980) --- azurelinuxagent/common/osutil/factory.py | 11 +++++------ azurelinuxagent/common/osutil/ubuntu.py | 2 +- tests/common/osutil/test_factory.py | 7 +++++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 83123e3f53..c130ebc2a4 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -66,15 +66,14 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) return ClearLinuxUtil() if distro_name == "ubuntu": - if Version(distro_version) in [Version("12.04"), Version("12.10")]: + ubuntu_version = Version(distro_version) + if ubuntu_version in [Version("12.04"), Version("12.10")]: return Ubuntu12OSUtil() - if Version(distro_version) in [Version("14.04"), Version("14.10")]: + if ubuntu_version in [Version("14.04"), Version("14.10")]: return Ubuntu14OSUtil() - if Version(distro_version) in [Version('16.04'), Version('16.10'), Version('17.04')]: + if ubuntu_version in [Version('16.04'), Version('16.10'), Version('17.04')]: return Ubuntu16OSUtil() - if Version(distro_version) in [Version('18.04'), Version('18.10'), - Version('19.04'), Version('19.10'), - Version('20.04')]: + if ubuntu_version >= Version('18.04') and ubuntu_version <= Version('24.04'): return Ubuntu18OSUtil() if distro_full_name == "Snappy Ubuntu Core": return UbuntuSnappyOSUtil() diff --git a/azurelinuxagent/common/osutil/ubuntu.py b/azurelinuxagent/common/osutil/ubuntu.py index 5a21511c91..2b82d8ca31 100644 --- a/azurelinuxagent/common/osutil/ubuntu.py +++ b/azurelinuxagent/common/osutil/ubuntu.py @@ -88,7 +88,7 @@ def unregister_agent_service(self): class Ubuntu18OSUtil(Ubuntu16OSUtil): """ - Ubuntu 18.04, 18.10, 19.04, 19.10, 20.04 + Ubuntu >=18.04 and <=24.04 """ def __init__(self): super(Ubuntu18OSUtil, self).__init__() diff --git a/tests/common/osutil/test_factory.py b/tests/common/osutil/test_factory.py index fbe5e24d3d..46bf6a8758 100644 --- a/tests/common/osutil/test_factory.py +++ b/tests/common/osutil/test_factory.py @@ -98,6 +98,13 @@ def test_get_osutil_it_should_return_ubuntu(self): self.assertTrue(isinstance(ret, Ubuntu18OSUtil)) self.assertEqual(ret.get_service_name(), "walinuxagent") + ret = _get_osutil(distro_name="ubuntu", + distro_code_name="focal", + distro_version="24.04", + distro_full_name="") + self.assertTrue(isinstance(ret, Ubuntu18OSUtil)) + self.assertEqual(ret.get_service_name(), "walinuxagent") + ret = _get_osutil(distro_name="ubuntu", distro_code_name="", distro_version="10.04", From da328996ab68ba8aed12f1fbafab0f5343b55127 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 20 Nov 2023 09:21:04 -0800 Subject: [PATCH 107/240] Fix pylint warning (#2988) Co-authored-by: narrieta --- azurelinuxagent/common/osutil/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index c130ebc2a4..e2f15afb56 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -73,7 +73,7 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) return Ubuntu14OSUtil() if ubuntu_version in [Version('16.04'), Version('16.10'), Version('17.04')]: return Ubuntu16OSUtil() - if ubuntu_version >= Version('18.04') and ubuntu_version <= Version('24.04'): + if Version('18.04') <= ubuntu_version <= Version('24.04'): return Ubuntu18OSUtil() if distro_full_name == "Snappy Ubuntu Core": return UbuntuSnappyOSUtil() From a335d528c1260c45d0e1482918c163e0f12585f2 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 20 Nov 2023 09:36:06 -0800 Subject: [PATCH 108/240] Add information about HTTP proxies (#2985) * Add information about HTTP proxies * no_proxy --------- Co-authored-by: narrieta --- README.md | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4227d53592..3d3a824e1f 100644 --- a/README.md +++ b/README.md @@ -58,13 +58,33 @@ The information flow from the platform to the agent occurs via two channels: * A TCP endpoint exposing a REST API used to obtain deployment and topology configuration. -The agent will use an HTTP proxy if provided via the `http_proxy` (for `http` requests) or -`https_proxy` (for `https` requests) environment variables. The `HttpProxy.Host` and -`HttpProxy.Port` configuration variables (see below), if used, will override the environment -settings. Due to limitations of Python, the agent *does not* support HTTP proxies requiring -authentication. Note that when the agent service is managed by systemd, environment variables -such as `http_proxy` and `https_proxy` should be defined using one the mechanisms provided by -systemd (e.g. by using Environment or EnvironmentFile in the service file). +### HTTP Proxy +The Agent will use an HTTP proxy if provided via the `http_proxy` (for `http` requests) or +`https_proxy` (for `https` requests) environment variables. Due to limitations of Python, +the agent *does not* support HTTP proxies requiring authentication. + +Similarly, the Agent will bypass the proxy if the environment variable `no_proxy` is set. + +Note that the way to define those environment variables for the Agent service varies across different distros. For distros +that use systemd, a common approach is to use Environment or EnvironmentFile in the [Service] section of the service +definition, for example using an override or a drop-in file (see "systemctl edit" for overrides). + +Example +```bash + # cat /etc/systemd/system/walinuxagent.service.d/http-proxy.conf + [Service] + Environment="http_proxy=http://proxy.example.com:80/" + Environment="https_proxy=http://proxy.example.com:80/" + # +``` + +The Agent passes its environment to the VM Extensions it executes, including `http_proxy` and `https_proxy`, so defining +a proxy for the Agent will also define it for the VM Extensions. + + +The [`HttpProxy.Host` and `HttpProxy.Port`](#httpproxyhost-httpproxyport) configuration variables, if used, override +the environment settings. Note that this configuration variables are local to the Agent process and are not passed to +VM Extensions. ## Requirements @@ -564,7 +584,7 @@ directory. _Type: String_ _Default: None_ -If set, the agent will use this proxy server to access the internet. These values +If set, the agent will use this proxy server for HTTP/HTTPS requests. These values *will* override the `http_proxy` or `https_proxy` environment variables. Lastly, `HttpProxy.Host` is required (if to be used) and `HttpProxy.Port` is optional. From 5a41542bee3c00237800c26fc870ed605302d7e9 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 22 Nov 2023 15:26:34 -0800 Subject: [PATCH 109/240] agent persist firewall scenario (#2983) * agent persist firewall scenario * address comments * new comments --- .../orchestrator/lib/agent_test_loader.py | 23 +- .../lib/agent_test_suite_combinator.py | 25 +++ tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/orchestrator/scripts/agent-service | 7 + .../test_suites/agent_persist_firewall.yml | 19 ++ .../agent_persist_firewall.py | 78 +++++++ tests_e2e/tests/lib/firewall_helpers.py | 209 ++++++++++++++++++ tests_e2e/tests/lib/retry.py | 3 +- tests_e2e/tests/lib/virtual_machine_client.py | 2 +- ...gent_firewall-verify_all_firewall_rules.py | 141 ++---------- .../agent_persist_firewall-access_wireserver | 85 +++++++ .../scripts/agent_persist_firewall-test_setup | 30 +++ ..._firewall-verify_firewall_rules_on_boot.py | 176 +++++++++++++++ ...firewall-verify_firewalld_rules_readded.py | 170 ++++++++++++++ ...verify_persist_firewall_service_running.py | 70 ++++++ 15 files changed, 915 insertions(+), 125 deletions(-) create mode 100644 tests_e2e/test_suites/agent_persist_firewall.yml create mode 100644 tests_e2e/tests/agent_persist_firewall/agent_persist_firewall.py create mode 100644 tests_e2e/tests/lib/firewall_helpers.py create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-test_setup create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-verify_firewall_rules_on_boot.py create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-verify_persist_firewall_service_running.py diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index a1ac6c2a46..11e665c13f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -65,6 +65,8 @@ class TestSuiteInfo(object): template: str # skip test suite if the test not supposed to run on specific clouds skip_on_clouds: List[str] + # skip test suite if test suite not suppose to run on specific images + skip_on_images: List[str] def __str__(self): return self.name @@ -168,6 +170,12 @@ def _parse_image(image: str) -> str: if suite_skip_cloud not in ["AzureCloud", "AzureChinaCloud", "AzureUSGovernment"]: raise Exception(f"Invalid cloud {suite_skip_cloud} for in {suite.name}") + # if the suite specifies skip images, validate that images used in our tests + for suite_skip_image in suite.skip_on_images: + if suite_skip_image not in self.images: + raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {suite_skip_image} in images.yml") + + @staticmethod def _load_test_suites(test_suites: str) -> List[TestSuiteInfo]: # @@ -205,6 +213,8 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: owns_vm: true install_test_agent: true template: "bvts/template.py" + skip_on_clouds: "AzureChinaCloud" + skip_on_images: "ubuntu_2004" * name - A string used to identify the test suite * tests - A list of the tests in the suite. Each test can be specified by a string (the path for its source code relative to @@ -231,7 +241,9 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: * skip_on_clouds - [Optional; string or list of strings] If given, the test suite will be skipped in the specified cloud(e.g. "AzureCloud"). If not specified, the test suite will be executed in all the clouds that we use. This is useful if you want to skip a test suite validation in a particular cloud when certain feature is not available in that cloud. - + # skip_on_images - [Optional; string or list of strings] If given, the test suite will be skipped on the specified images or image sets(e.g. "ubuntu_2004"). + If not specified, the test suite will be executed on all the images that we use. This is useful + if you want to skip a test suite validation on a particular images or image sets when certain feature is not available on that image. """ test_suite: Dict[str, Any] = AgentTestLoader._load_file(description_file) @@ -286,6 +298,15 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: else: test_suite_info.skip_on_clouds = [] + skip_on_images = test_suite.get("skip_on_images") + if skip_on_images is not None: + if isinstance(skip_on_images, str): + test_suite_info.skip_on_images = [skip_on_images] + else: + test_suite_info.skip_on_images = skip_on_images + else: + test_suite_info.skip_on_images = [] + return test_suite_info @staticmethod diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index b3d84a1211..ad25151b5f 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -140,6 +140,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: runbook_images = self._get_runbook_images(loader) skip_test_suites: List[str] = [] + skip_test_suites_images: List[str] = [] for test_suite_info in loader.test_suites: if self.runbook.cloud in test_suite_info.skip_on_clouds: skip_test_suites.append(test_suite_info.name) @@ -149,7 +150,14 @@ def create_environment_list(self) -> List[Dict[str, Any]]: else: images_info: List[VmImageInfo] = self._get_test_suite_images(test_suite_info, loader) + skip_images_info: List[VmImageInfo] = self._get_test_suite_skip_images(test_suite_info, loader) + if len(skip_images_info) > 0: + skip_test_suite_image = f"{test_suite_info.name}: {','.join([i.urn for i in skip_images_info])}" + skip_test_suites_images.append(skip_test_suite_image) + for image in images_info: + if image in skip_images_info: + continue # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter if self._is_vhd(image.urn): marketplace_image = "" @@ -238,6 +246,9 @@ def create_environment_list(self) -> List[Dict[str, Any]]: if len(skip_test_suites) > 0: self._log.info("Skipping test suites %s", skip_test_suites) + if len(skip_test_suites_images) > 0: + self._log.info("Skipping test suits run on images \n %s", '\n'.join([f"\t{skip}" for skip in skip_test_suites_images])) + return environments def create_existing_vm_environment(self) -> Dict[str, Any]: @@ -440,6 +451,20 @@ def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> Lis unique[i.urn] = i return [v for k, v in unique.items()] + @staticmethod + def _get_test_suite_skip_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> List[VmImageInfo]: + """ + Returns images that need to be skipped by the suite. + + A test suite may reference multiple image sets and sets can intersect; this method eliminates any duplicates. + """ + skip_unique: Dict[str, VmImageInfo] = {} + for image in suite.skip_on_images: + image_list = loader.images[image] + for i in image_list: + skip_unique[i.urn] = i + return [v for k, v in skip_unique.items()] + def _get_location(self, suite_info: TestSuiteInfo, image: VmImageInfo) -> str: """ Returns the location on which the test VM for the given test suite and image should be created. diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 336d22cf67..5bc48a5dfb 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall" # # Parameters used to create test VMs diff --git a/tests_e2e/orchestrator/scripts/agent-service b/tests_e2e/orchestrator/scripts/agent-service index d740ef8f41..5c4c7ee090 100755 --- a/tests_e2e/orchestrator/scripts/agent-service +++ b/tests_e2e/orchestrator/scripts/agent-service @@ -43,11 +43,13 @@ if command -v systemctl &> /dev/null; then service-stop() { systemctl stop $1; } service-restart() { systemctl restart $1; } service-start() { systemctl start $1; } + service-disable() { systemctl disable $1; } else service-status() { service $1 status; } service-stop() { service $1 stop; } service-restart() { service $1 restart; } service-start() { service $1 start; } + service-disable() { service $1 disable; } fi python=$(get-agent-python) @@ -83,3 +85,8 @@ if [[ "$cmd" == "status" ]]; then echo "Service status..." service-status $service_name fi + +if [[ "$cmd" == "disable" ]]; then + echo "Disabling service..." + service-disable $service_name +fi diff --git a/tests_e2e/test_suites/agent_persist_firewall.yml b/tests_e2e/test_suites/agent_persist_firewall.yml new file mode 100644 index 0000000000..137f3af87e --- /dev/null +++ b/tests_e2e/test_suites/agent_persist_firewall.yml @@ -0,0 +1,19 @@ +# +# Iptable rules that agent add not persisted on reboot. So we use firewalld service if distro supports it otherwise agent creates custom service and only runs on boot before network up. +# so that attacker will not have room to contact the wireserver +# This test verifies that either of the service is active. Ensure those rules are added on boot and working as expected. +# +name: "AgentPersistFirewall" +tests: + - "agent_persist_firewall/agent_persist_firewall.py" +images: + - "endorsed" + - "endorsed-arm64" +owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. +# agent persist firewall service not running on flatcar distro since agent can't install custom service due to read only filesystem. +# so skipping the test run on flatcar distro. +# (2023-11-14T19:04:13.738695Z ERROR ExtHandler ExtHandler Unable to setup the persistent firewall rules: [Errno 30] Read-only file system: '/lib/systemd/system/waagent-network-setup.service) +skip_on_images: + - "flatcar" + - "flatcar_arm64" + - "debian_9" # TODO: Reboot is slow on debian_9. Need to investigate further. \ No newline at end of file diff --git a/tests_e2e/tests/agent_persist_firewall/agent_persist_firewall.py b/tests_e2e/tests/agent_persist_firewall/agent_persist_firewall.py new file mode 100644 index 0000000000..5bfeb403a9 --- /dev/null +++ b/tests_e2e/tests/agent_persist_firewall/agent_persist_firewall.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.ssh_client import SshClient + + +class AgentPersistFirewallTest(AgentVmTest): + """ + This test verifies agent setup persist firewall rules using custom network setup service or firewalld service. Ensure those rules are added on boot and working as expected. + """ + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client: SshClient = self._context.create_ssh_client() + + def run(self): + self._test_setup() + # Test case 1: After test agent install, verify firewalld or network.setup is running + self._verify_persist_firewall_service_running() + # Test case 2: Perform reboot and ensure firewall rules added on boot and working as expected + self._context.vm.restart(wait_for_boot=True, ssh_client=self._ssh_client) + self._verify_persist_firewall_service_running() + self._verify_firewall_rules_on_boot("first_boot") + # Test case 3: Disable the agent(so that agent won't get started after reboot) + # perform reboot and ensure firewall rules added on boot even after agent is disabled + self._disable_agent() + self._context.vm.restart(wait_for_boot=True, ssh_client=self._ssh_client) + self._verify_persist_firewall_service_running() + self._verify_firewall_rules_on_boot("second_boot") + # Test case 4: perform firewalld rules deletion and ensure deleted rules added back to rule set after agent start + self._verify_firewall_rules_readded() + + def _test_setup(self): + log.info("Doing test setup") + self._run_remote_test(self._ssh_client, f"agent_persist_firewall-test_setup {self._context.username}", + use_sudo=True) + log.info("Successfully completed test setup\n") + + def _verify_persist_firewall_service_running(self): + log.info("Verifying persist firewall service is running") + self._run_remote_test(self._ssh_client, "agent_persist_firewall-verify_persist_firewall_service_running.py", + use_sudo=True) + log.info("Successfully verified persist firewall service is running\n") + + def _verify_firewall_rules_on_boot(self, boot_name): + log.info("Verifying firewall rules on {0}".format(boot_name)) + self._run_remote_test(self._ssh_client, f"agent_persist_firewall-verify_firewall_rules_on_boot.py --user {self._context.username} --boot_name {boot_name}", + use_sudo=True) + log.info("Successfully verified firewall rules on {0}".format(boot_name)) + + def _disable_agent(self): + log.info("Disabling agent") + self._run_remote_test(self._ssh_client, "agent-service disable", use_sudo=True) + log.info("Successfully disabled agent\n") + + def _verify_firewall_rules_readded(self): + log.info("Verifying firewall rules readded") + self._run_remote_test(self._ssh_client, "agent_persist_firewall-verify_firewalld_rules_readded.py", + use_sudo=True) + log.info("Successfully verified firewall rules readded\n") diff --git a/tests_e2e/tests/lib/firewall_helpers.py b/tests_e2e/tests/lib/firewall_helpers.py new file mode 100644 index 0000000000..0e6ddd4051 --- /dev/null +++ b/tests_e2e/tests/lib/firewall_helpers.py @@ -0,0 +1,209 @@ +from typing import List, Tuple + +from assertpy import fail + +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.utils import shellutil +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + +WIRESERVER_ENDPOINT_FILE = '/var/lib/waagent/WireServerEndpoint' +WIRESERVER_IP = '168.63.129.16' +FIREWALL_PERIOD = 30 + +# helper methods shared by multiple tests + +class IPTableRules(object): + # -D deletes the specific rule in the iptable chain + DELETE_COMMAND = "-D" + + # -C checks if a specific rule exists + CHECK_COMMAND = "-C" + + +class FirewalldRules(object): + # checks if a specific rule exists + QUERY_PASSTHROUGH = "--query-passthrough" + + # removes a specific rule + REMOVE_PASSTHROUGH = "--remove-passthrough" + + +def get_wireserver_ip() -> str: + try: + with open(WIRESERVER_ENDPOINT_FILE, 'r') as f: + wireserver_ip = f.read() + except Exception: + wireserver_ip = WIRESERVER_IP + return wireserver_ip + + +def get_root_accept_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', + 'owner', + '--uid-owner', + '0', '-j', 'ACCEPT', '-w'] + + +def get_non_root_accept_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', + '--destination-port', '53', '-j', + 'ACCEPT', '-w'] + + +def get_non_root_drop_rule_command(command: str) -> List[str]: + return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', + 'conntrack', '--ctstate', + 'INVALID,NEW', '-j', 'DROP', '-w'] + + +def get_non_root_accept_tcp_firewalld_rule(command): + return ["firewall-cmd", "--permanent", "--direct", command, "ipv4", "-t", "security", "-A", "OUTPUT", "-d", + get_wireserver_ip(), + "-p", "tcp", "--destination-port", "53", "-j", "ACCEPT"] + + +def get_root_accept_firewalld_rule(command): + return ["firewall-cmd", "--permanent", "--direct", command, "ipv4", "-t", "security", "-A", "OUTPUT", "-d", + get_wireserver_ip(), + "-p", "tcp", "-m", "owner", "--uid-owner", "0", "-j", "ACCEPT"] + + +def get_non_root_drop_firewalld_rule(command): + return ["firewall-cmd", "--permanent", "--direct", command, "ipv4", "-t", "security", "-A", "OUTPUT", "-d", + get_wireserver_ip(), + "-p", "tcp", "-m", "conntrack", "--ctstate", "INVALID,NEW", "-j", "DROP"] + + +def execute_cmd(cmd: List[str]): + """ + Note: The shellutil.run_command return stdout if exit_code=0, otherwise returns Exception + """ + return shellutil.run_command(cmd, track_process=False) + + +def execute_cmd_return_err_code(cmd: List[str]): + """ + Note: The shellutil.run_command return err_code plus stdout/stderr + """ + try: + stdout = execute_cmd(cmd) + return 0, stdout + except Exception as error: + return -1, ustr(error) + + +def check_if_iptable_rule_is_available(full_command: List[str]) -> bool: + """ + This function is used to check if given rule is present in iptable rule set + "-C" return exit code 0 if the rule is available. + """ + exit_code, _ = execute_cmd_return_err_code(full_command) + return exit_code == 0 + + +def print_current_iptable_rules() -> None: + """ + This function prints the current iptable rules + """ + try: + cmd = ["sudo", "iptables", "-t", "security", "-L", "-nxv"] + stdout = execute_cmd(cmd) + for line in stdout.splitlines(): + log.info(str(line)) + except Exception as error: + log.warning("Error -- Failed to fetch the ip table rule set {0}".format(error)) + + +def get_all_iptable_rule_commands(command: str) -> Tuple[List[str], List[str], List[str]]: + return get_root_accept_rule_command(command), get_non_root_accept_rule_command(command), get_non_root_drop_rule_command(command) + + +def verify_all_rules_exist() -> None: + """ + This function is used to verify all the iptable rules are present in the rule set + """ + def check_all_iptables() -> bool: + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(IPTableRules.CHECK_COMMAND) + found: bool = check_if_iptable_rule_is_available(root_accept) and check_if_iptable_rule_is_available( + non_root_accept) and check_if_iptable_rule_is_available(non_root_drop) + return found + + log.info("Verifying all ip table rules are present in rule set") + # Agent will re-add rules within OS.EnableFirewallPeriod, So waiting that time + some buffer + found: bool = retry_if_false(check_all_iptables, attempts=2, delay=FIREWALL_PERIOD+15) + + if not found: + fail("IP table rules missing in rule set.\n Current iptable rules: {0}".format( + print_current_iptable_rules())) + + log.info("verified All ip table rules are present in rule set") + + +def firewalld_service_running(): + """ + Checks if firewalld service is running on the VM + Eg: firewall-cmd --state + > running + """ + cmd = ["firewall-cmd", "--state"] + exit_code, output = execute_cmd_return_err_code(cmd) + if exit_code != 0: + log.warning("Firewall service not running: {0}".format(output)) + return exit_code == 0 and output.rstrip() == "running" + + +def get_all_firewalld_rule_commands(command): + return get_root_accept_firewalld_rule(command), get_non_root_accept_tcp_firewalld_rule( + command), get_non_root_drop_firewalld_rule(command) + + +def check_if_firewalld_rule_is_available(command): + """ + This function is used to check if given firewalld rule is present in rule set + --query-passthrough return exit code 0 if the rule is available + """ + exit_code, _ = execute_cmd_return_err_code(command) + if exit_code == 0: + return True + return False + + +def verify_all_firewalld_rules_exist(): + """ + This function is used to verify all the firewalld rules are present in the rule set + """ + + def check_all_firewalld_rules(): + root_accept, non_root_accept, non_root_drop = get_all_firewalld_rule_commands(FirewalldRules.QUERY_PASSTHROUGH) + found = check_if_firewalld_rule_is_available(root_accept) and check_if_firewalld_rule_is_available( + non_root_accept) and check_if_firewalld_rule_is_available(non_root_drop) + return found + + log.info("Verifying all firewalld rules are present in rule set") + found = retry_if_false(check_all_firewalld_rules, attempts=2) + + if not found: + fail("Firewalld rules missing in rule set. {0}".format( + print_current_firewalld_rules())) + + print_current_firewalld_rules() + log.info("verified All firewalld rules are present in rule set") + + +def print_current_firewalld_rules(): + """ + This function prints the current firewalld rules + """ + try: + cmd = ["firewall-cmd", "--permanent", "--direct", "--get-all-passthroughs"] + exit_code, stdout = execute_cmd_return_err_code(cmd) + if exit_code != 0: + log.warning("Warning -- Failed to fetch firewalld rules with error code: %s and error: %s", exit_code, + stdout) + else: + log.info("Current firewalld rules:") + for line in stdout.splitlines(): + log.info(str(line)) + except Exception as error: + raise Exception("Error -- Failed to fetch the firewalld rule set {0}".format(error)) diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index 3996b3ba3e..db0a52fcf2 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -50,7 +50,8 @@ def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: in try: return operation() except CommandError as e: - retryable = e.exit_code == 255 and ("Connection timed out" in e.stderr or "Connection refused" in e.stderr) + retryable = ((e.exit_code == 255 and ("Connection timed out" in e.stderr or "Connection refused" in e.stderr)) or + "Unprivileged users are not permitted to log in yet" in e.stderr) if not retryable or i >= attempts: raise log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", attempt_delay, i, attempts, e) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index 37dcfaef13..bc38b1b35a 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -174,7 +174,7 @@ def restart( return log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) except CommandError as e: - if e.exit_code == 255 and "Connection refused" in str(e): + if (e.exit_code == 255 and "Connection refused" in str(e)) or "Unprivileged users are not permitted to log in yet" in str(e): log.info("VM %s is not yet accepting SSH connections", self) else: raise diff --git a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py index 2ef8454fd2..2d165bc175 100755 --- a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py +++ b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py @@ -22,41 +22,22 @@ import os import pwd import socket -from typing import List, Tuple +from typing import List -from assertpy import fail from azurelinuxagent.common.utils import shellutil -from azurelinuxagent.common.utils.shellutil import CommandError from azurelinuxagent.common.utils.textutil import format_exception +from tests_e2e.tests.lib.firewall_helpers import get_root_accept_rule_command, get_non_root_accept_rule_command, \ + get_non_root_drop_rule_command, print_current_iptable_rules, get_wireserver_ip, get_all_iptable_rule_commands, \ + check_if_iptable_rule_is_available, IPTableRules, verify_all_rules_exist, FIREWALL_PERIOD, execute_cmd from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import run_remote_test import http.client as httpclient -from tests_e2e.tests.lib.retry import retry_if_false, retry +from tests_e2e.tests.lib.retry import retry ROOT_USER = 'root' -WIRESERVER_ENDPOINT_FILE = '/var/lib/waagent/WireServerEndpoint' -WIRESERVER_IP = '168.63.129.16' VERSIONS_PATH = '/?comp=versions' -FIREWALL_PERIOD = 30 - - -class FirewallRules(object): - # -D deletes the specific rule in the iptable chain - DELETE_COMMAND = "-D" - - # -C checks if a specific rule exists - CHECK_COMMAND = "-C" - - -def get_wireserver_ip() -> str: - try: - with open(WIRESERVER_ENDPOINT_FILE, 'r') as f: - wireserver_ip = f.read() - except Exception: - wireserver_ip = WIRESERVER_IP - return wireserver_ip def switch_user(user: str) -> None: @@ -71,88 +52,6 @@ def switch_user(user: str) -> None: raise Exception("Error -- failed to switch user to {0} : Failed with exception {1}".format(user, e)) -def get_root_accept_rule_command(command: str) -> List[str]: - return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', - 'owner', - '--uid-owner', - '0', '-j', 'ACCEPT', '-w'] - - -def get_non_root_accept_rule_command(command: str) -> List[str]: - return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', - '--destination-port', '53', '-j', - 'ACCEPT', '-w'] - - -def get_non_root_drop_rule_command(command: str) -> List[str]: - return ['sudo', 'iptables', '-t', 'security', command, 'OUTPUT', '-d', get_wireserver_ip(), '-p', 'tcp', '-m', - 'conntrack', '--ctstate', - 'INVALID,NEW', '-j', 'DROP', '-w'] - - -def execute_cmd(cmd: List[str]): - """ - Note: The shellutil.run_command return stdout if exit_code=0, otherwise returns commanderror - """ - try: - stdout = shellutil.run_command(cmd) - except CommandError as e: - return e.returncode, e.stdout, e.stderr - return 0, stdout, "" - - -def check_if_iptable_rule_is_available(full_command: List[str]) -> bool: - """ - This function is used to check if given rule is present in iptable rule set - "-C" return exit code 0 if the rule is available. - """ - exit_code, _, _ = execute_cmd(full_command) - if exit_code == 0: - return True - return False - - -def print_current_iptable_rules(): - """ - This function prints the current iptable rules - """ - try: - cmd = ['sudo', 'iptables', '-L', 'OUTPUT', '-t', 'security', '-nxv'] - exit_code, stdout, stderr = execute_cmd(cmd) - if exit_code != 0: - log.warning("Warning -- Failed to fetch the ip table rules with error code: %s and error: %s", exit_code, stderr) - else: - for line in stdout.splitlines(): - log.info(str(line)) - except Exception as error: - raise Exception("Error -- Failed to fetch the ip table rule set {0}".format(error)) - - -def get_all_iptable_rule_commands(command: str) -> Tuple[List[str], List[str], List[str]]: - return get_root_accept_rule_command(command), get_non_root_accept_rule_command(command), get_non_root_drop_rule_command(command) - - -def verify_all_rules_exist() -> None: - """ - This function is used to verify all the iptable rules are present in the rule set - """ - def check_all_iptables() -> bool: - root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(FirewallRules.CHECK_COMMAND) - found: bool = check_if_iptable_rule_is_available(root_accept) and check_if_iptable_rule_is_available( - non_root_accept) and check_if_iptable_rule_is_available(non_root_drop) - return found - - log.info("-----Verifying all ip table rules are present in rule set") - # Agent will re-add rules within OS.EnableFirewallPeriod, So waiting that time + some buffer - found: bool = retry_if_false(check_all_iptables, attempts=2, delay=FIREWALL_PERIOD+15) - - if not found: - fail("IP table rules missing in rule set.\n Current iptable rules:\n {0}".format( - print_current_iptable_rules())) - - log.info("verified All ip table rules are present in rule set") - - def verify_rules_deleted_successfully(commands: List[List[str]] = None) -> None: """ This function is used to verify if provided rule or all(if not specified) iptable rules are deleted successfully. @@ -163,7 +62,7 @@ def verify_rules_deleted_successfully(commands: List[List[str]] = None) -> None: commands = [] if not commands: - root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands("-C") + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(IPTableRules.CHECK_COMMAND) commands.extend([root_accept, non_root_accept, non_root_drop]) # "-C" return error code 1 when not available which is expected after deletion @@ -183,7 +82,7 @@ def delete_iptable_rules(commands: List[List[str]] = None) -> None: if commands is None: commands = [] if not commands: - root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(FirewallRules.CHECK_COMMAND) + root_accept, non_root_accept, non_root_drop = get_all_iptable_rule_commands(IPTableRules.DELETE_COMMAND) commands.extend([root_accept, non_root_accept, non_root_drop]) log.info("-----Deleting ip table rules \n %s", commands) @@ -297,10 +196,10 @@ def verify_non_root_accept_rule(): shellutil.run_command(stop_agent) # deleting non root accept rule - non_root_accept_delete_cmd = get_non_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + non_root_accept_delete_cmd = get_non_root_accept_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([non_root_accept_delete_cmd]) # verifying deletion successful - non_root_accept_check_cmd = get_non_root_accept_rule_command(FirewallRules.CHECK_COMMAND) + non_root_accept_check_cmd = get_non_root_accept_rule_command(IPTableRules.CHECK_COMMAND) verify_rules_deleted_successfully([non_root_accept_check_cmd]) log.info("** Current IP table rules\n") @@ -326,7 +225,7 @@ def verify_non_root_accept_rule(): log.info("Ensuring missing rules are re-added by the running agent") # deleting non root accept rule - non_root_accept_delete_cmd = get_non_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + non_root_accept_delete_cmd = get_non_root_accept_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([non_root_accept_delete_cmd]) verify_all_rules_exist() @@ -354,13 +253,13 @@ def verify_root_accept_rule(): shellutil.run_command(stop_agent) # deleting root accept rule - root_accept_delete_cmd = get_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + root_accept_delete_cmd = get_root_accept_rule_command(IPTableRules.DELETE_COMMAND) # deleting drop rule too otherwise after restart, the daemon will go into loop since it cannot connect to wireserver. This would block the agent initialization - drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + drop_delete_cmd = get_non_root_drop_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([root_accept_delete_cmd, drop_delete_cmd]) # verifying deletion successful - root_accept_check_cmd = get_root_accept_rule_command(FirewallRules.CHECK_COMMAND) - drop_check_cmd = get_non_root_drop_rule_command(FirewallRules.CHECK_COMMAND) + root_accept_check_cmd = get_root_accept_rule_command(IPTableRules.CHECK_COMMAND) + drop_check_cmd = get_non_root_drop_rule_command(IPTableRules.CHECK_COMMAND) verify_rules_deleted_successfully([root_accept_check_cmd, drop_check_cmd]) log.info("** Current IP table rules\n") @@ -383,7 +282,7 @@ def verify_root_accept_rule(): log.info("Ensuring missing rules are re-added by the running agent") # deleting root accept rule - root_accept_delete_cmd = get_root_accept_rule_command(FirewallRules.DELETE_COMMAND) + root_accept_delete_cmd = get_root_accept_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([root_accept_delete_cmd]) verify_all_rules_exist() @@ -393,7 +292,7 @@ def verify_root_accept_rule(): log.info("root accept rule verified successfully\n") -def verify_non_root_dcp_rule(): +def verify_non_root_drop_rule(): """ This function verifies drop rule and make sure it is re added by agent after deletion """ @@ -407,10 +306,10 @@ def verify_non_root_dcp_rule(): shellutil.run_command(stop_agent) # deleting non root delete rule - non_root_drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + non_root_drop_delete_cmd = get_non_root_drop_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([non_root_drop_delete_cmd]) # verifying deletion successful - non_root_drop_check_cmd = get_non_root_drop_rule_command(FirewallRules.CHECK_COMMAND) + non_root_drop_check_cmd = get_non_root_drop_rule_command(IPTableRules.CHECK_COMMAND) verify_rules_deleted_successfully([non_root_drop_check_cmd]) log.info("** Current IP table rules\n") @@ -436,7 +335,7 @@ def verify_non_root_dcp_rule(): log.info("Ensuring missing rules are re-added by the running agent") # deleting non root delete rule - non_root_drop_delete_cmd = get_non_root_drop_rule_command(FirewallRules.DELETE_COMMAND) + non_root_drop_delete_cmd = get_non_root_drop_rule_command(IPTableRules.DELETE_COMMAND) delete_iptable_rules([non_root_drop_delete_cmd]) verify_all_rules_exist() @@ -462,7 +361,7 @@ def main(): verify_non_root_accept_rule() verify_root_accept_rule() - verify_non_root_dcp_rule() + verify_non_root_drop_rule() parser = argparse.ArgumentParser() diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver b/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver new file mode 100755 index 0000000000..c38e0a5706 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Helper script which tries to access Wireserver on system reboot. Also prints out iptable rules if non-root and still +# able to access Wireserver + +USER=$(whoami) +echo "$(date --utc +%FT%T.%3NZ): Running as user: $USER" + +function check_online +{ + ping 8.8.8.8 -c 1 -i .2 -t 30 > /dev/null 2>&1 && echo 0 || echo 1 +} + +# Check more, sleep less +MAX_CHECKS=10 +# Initial starting value for checks +CHECKS=0 +IS_ONLINE=$(check_online) + +# Loop while we're not online. +while [ "$IS_ONLINE" -eq 1 ]; do + + CHECKS=$((CHECKS + 1)) + if [ $CHECKS -gt $MAX_CHECKS ]; then + break + fi + + echo "$(date --utc +%FT%T.%3NZ): Network still not accessible" + # We're offline. Sleep for a bit, then check again + sleep 1; + IS_ONLINE=$(check_online) + +done + +if [ "$IS_ONLINE" -eq 1 ]; then + # We will never be able to get online. Kill script. + echo "Unable to connect to network, exiting now" + echo "ExitCode: 1" + exit 1 +fi + +echo "Finally online, Time: $(date --utc +%FT%T.%3NZ)" +echo "Trying to contact Wireserver as $USER to see if accessible" + +echo "" +echo "IPTables before accessing Wireserver" +sudo iptables -t security -L -nxv +echo "" + +WIRE_IP=$(cat /var/lib/waagent/WireServerEndpoint 2>/dev/null || echo '168.63.129.16' | tr -d '[:space:]') +if command -v wget >/dev/null 2>&1; then + wget --tries=3 "http://$WIRE_IP/?comp=versions" --timeout=5 -O "/tmp/wire-versions-$USER.xml" +else + curl --retry 3 --retry-delay 5 --connect-timeout 5 "http://$WIRE_IP/?comp=versions" -o "/tmp/wire-versions-$USER.xml" +fi +WIRE_EC=$? +echo "ExitCode: $WIRE_EC" + +if [[ "$USER" != "root" && "$WIRE_EC" == 0 ]]; then + echo "Wireserver should not be accessible for non-root user ($USER)" +fi + +if [[ "$USER" != "root" ]]; then +echo "" +echo "checking tcp traffic to wireserver port 53 for non-root user ($USER)" +echo -n 2>/dev/null < /dev/tcp/$WIRE_IP/53 && echo 0 || echo 1 # Establish network connection for port 53 +TCP_EC=$? +echo "TCP 53 Connection ExitCode: $TCP_EC" +fi \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-test_setup b/tests_e2e/tests/scripts/agent_persist_firewall-test_setup new file mode 100755 index 0000000000..a157e58cbe --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-test_setup @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Script adds cron job on reboot to make sure iptables rules are added to allow access to Wireserver and also, enable the firewall config flag +# + +if [[ $# -ne 1 ]]; then + echo "Usage: agent_persist_firewall-test_setup " + exit 1 +fi + +echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver > /tmp/reboot-cron-root.log 2>&1" | crontab -u root - +echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver > /tmp/reboot-cron-$1.log 2>&1" | crontab -u $1 - +update-waagent-conf OS.EnableFirewall=y \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewall_rules_on_boot.py b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewall_rules_on_boot.py new file mode 100755 index 0000000000..549e368b25 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewall_rules_on_boot.py @@ -0,0 +1,176 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script checks firewall rules are set on boot through cron job logs.And also capture the logs for debugging purposes. +# +import argparse +import os +import re +import shutil + +from assertpy import fail + +from azurelinuxagent.common.utils import shellutil +from tests_e2e.tests.lib.firewall_helpers import verify_all_rules_exist +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry + + +def move_cron_logs_to_var_log(): + # Move the cron logs to /var/log + log.info("Moving cron logs to /var/log for debugging purposes") + for cron_log in [ROOT_CRON_LOG, NON_ROOT_CRON_LOG, NON_ROOT_WIRE_XML, ROOT_WIRE_XML]: + try: + shutil.move(src=cron_log, dst=os.path.join("/var", "log", + "{0}.{1}".format(os.path.basename(cron_log), + BOOT_NAME))) + except Exception as e: + log.info("Unable to move cron log to /var/log; {0}".format(e)) + + +def check_wireserver_versions_file_exist(wire_version_file): + log.info("Checking wire-versions file exist: {0}".format(wire_version_file)) + if not os.path.exists(wire_version_file): + log.info("File: {0} not found".format(wire_version_file)) + return False + + if os.stat(wire_version_file).st_size > 0: + return True + + return False + + +def verify_data_in_cron_logs(cron_log, verify, err_msg): + log.info("Verifying Cron logs") + + def cron_log_checks(): + + if not os.path.exists(cron_log): + raise Exception("Cron log file not found: {0}".format(cron_log)) + with open(cron_log) as f: + cron_logs_lines = list(map(lambda _: _.strip(), f.readlines())) + if not cron_logs_lines: + raise Exception("Empty cron file, looks like cronjob didnt run") + + if any("Unable to connect to network, exiting now" in line for line in cron_logs_lines): + raise Exception("VM was unable to connect to network on startup. Skipping test validation") + + if not any("ExitCode" in line for line in cron_logs_lines): + raise Exception("Cron logs still incomplete, will try again in a minute") + + if not any(verify(line) for line in cron_logs_lines): + fail("Verification failed! (UNEXPECTED): {0}".format(err_msg)) + + log.info("Verification succeeded. Cron logs as expected") + + retry(cron_log_checks) + + +def verify_wireserver_ip_reachable_for_root(): + """ + For root logs - + Ensure the /var/log/wire-versions-root.xml is not-empty (generated by the cron job) + Ensure the exit code in the /var/log/reboot-cron-root.log file is 0 + """ + log.info("Verifying Wireserver IP is reachable from root user") + + def check_exit_code(line): + match = re.match("ExitCode:\\s(\\d+)", line) + return match is not None and int(match.groups()[0]) == 0 + + verify_data_in_cron_logs(cron_log=ROOT_CRON_LOG, verify=check_exit_code, + err_msg="Exit Code should be 0 for root based cron job!") + + if not check_wireserver_versions_file_exist(ROOT_WIRE_XML): + fail("Wire version file should not be empty for root user!") + + +def verify_wireserver_ip_unreachable_for_non_root(): + """ + For non-root - + Ensure the /tmp/wire-versions-non-root.xml is empty (generated by the cron job) + Ensure the exit code in the /tmp/reboot-cron-non-root.log file is non-0 + """ + log.info("Verifying WireServer IP is unreachable from non-root user") + + def check_exit_code(line): + match = re.match("ExitCode:\\s(\\d+)", line) + return match is not None and int(match.groups()[0]) != 0 + + verify_data_in_cron_logs(cron_log=NON_ROOT_CRON_LOG, verify=check_exit_code, + err_msg="Exit Code should be non-0 for non-root cron job!") + + if check_wireserver_versions_file_exist(NON_ROOT_WIRE_XML): + fail("Wire version file should be empty for non-root user!") + + +def verify_tcp_connection_to_wireserver_for_non_root(): + """ + For non-root - + Ensure the TCP 53 Connection exit code in the /tmp/reboot-cron-non-root.log file is 0 + """ + log.info("Verifying TCP connection to Wireserver port for non-root user") + + def check_exit_code(line): + match = re.match("TCP 53 Connection ExitCode:\\s(\\d+)", line) + return match is not None and int(match.groups()[0]) == 0 + + verify_data_in_cron_logs(cron_log=NON_ROOT_CRON_LOG, verify=check_exit_code, + err_msg="TCP 53 Connection Exit Code should be 0 for non-root cron job!") + + +def generate_svg(): + """ + This is a good to have, but not must have. Not failing tests if we're unable to generate a SVG + """ + log.info("Running systemd-analyze plot command to get the svg for boot execution order") + dest_dir = os.path.join("/var", "log", "svgs") + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + svg_name = os.path.join(dest_dir, "{0}.svg".format(BOOT_NAME)) + cmd = ["systemd-analyze plot > {0}".format(svg_name)] + err_code, stdout = shellutil.run_get_output(cmd) + if err_code != 0: + log.info("Unable to generate svg: {0}".format(stdout)) + log.info("SVG generated successfully") + + +def main(): + try: + # Verify firewall rules are set on boot through cron job logs + verify_wireserver_ip_unreachable_for_non_root() + verify_wireserver_ip_reachable_for_root() + verify_tcp_connection_to_wireserver_for_non_root() + verify_all_rules_exist() + finally: + # save the logs to /var/log to capture by collect-logs, this might be useful for debugging + move_cron_logs_to_var_log() + generate_svg() + + +parser = argparse.ArgumentParser() +parser.add_argument('-u', '--user', required=True, help="Non root user") +parser.add_argument('-bn', '--boot_name', required=True, help="Boot Name") +args = parser.parse_args() +NON_ROOT_USER = args.user +BOOT_NAME = args.boot_name +ROOT_CRON_LOG = "/tmp/reboot-cron-root.log" +NON_ROOT_CRON_LOG = f"/tmp/reboot-cron-{NON_ROOT_USER}.log" +NON_ROOT_WIRE_XML = f"/tmp/wire-versions-{NON_ROOT_USER}.xml" +ROOT_WIRE_XML = "/tmp/wire-versions-root.xml" +main() diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py new file mode 100755 index 0000000000..5cec654a16 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py @@ -0,0 +1,170 @@ +#!/usr/bin/env pypy3 +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script deleting the firewalld rules and ensure deleted rules added back to the firewalld rule set after agent start +# + +from azurelinuxagent.common.osutil import get_osutil +from tests_e2e.tests.lib.firewall_helpers import firewalld_service_running, print_current_firewalld_rules, \ + get_non_root_accept_tcp_firewalld_rule, get_all_firewalld_rule_commands, FirewalldRules, execute_cmd, \ + check_if_firewalld_rule_is_available, verify_all_firewalld_rules_exist, get_root_accept_firewalld_rule, \ + get_non_root_drop_firewalld_rule +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry + + +def delete_firewalld_rules(commands=None): + """ + This function is used to delete the provided rule or all(if not specified) from the firewalld rules + """ + if commands is None: + commands = [] + if not commands: + root_accept, non_root_accept, non_root_drop = get_all_firewalld_rule_commands(FirewalldRules.REMOVE_PASSTHROUGH) + commands.extend([root_accept, non_root_accept, non_root_drop]) + + log.info("Deleting firewalld rules \n %s", commands) + + try: + cmd = None + for command in commands: + cmd = command + retry(lambda: execute_cmd(cmd=cmd), attempts=3) + except Exception as e: + raise Exception("Error -- Failed to Delete the firewalld rule set {0}".format(e)) + + log.info("Success --Deletion of firewalld rule") + + +def verify_rules_deleted_successfully(commands=None): + """ + This function is used to verify if provided rule or all(if not specified) rules are deleted successfully. + """ + log.info("Verifying requested rules deleted successfully") + + if commands is None: + commands = [] + + if not commands: + root_accept, non_root_accept, non_root_drop = get_all_firewalld_rule_commands(FirewalldRules.QUERY_PASSTHROUGH) + commands.extend([root_accept, non_root_accept, non_root_drop]) + + # "--QUERY-PASSTHROUGH" return error code 1 when not available which is expected after deletion + for command in commands: + if not check_if_firewalld_rule_is_available(command): + pass + else: + raise Exception("Deletion of firewalld rules not successful\n.Current firewalld rules:\n" + print_current_firewalld_rules()) + + log.info("firewalld rules deleted successfully \n %s", commands) + + +def verify_non_root_accept_rule(): + """ + This function verifies the non root accept rule and make sure it is re added by agent after deletion + """ + log.info("verifying non root accept rule") + agent_name = get_osutil().get_service_name() + # stop the agent, so that it won't re-add rules while checking + log.info("stop the agent, so that it won't re-add rules while checking") + cmd = ["systemctl", "stop", agent_name] + execute_cmd(cmd) + + # deleting tcp rule + accept_tcp_rule_with_delete = get_non_root_accept_tcp_firewalld_rule(FirewalldRules.REMOVE_PASSTHROUGH) + delete_firewalld_rules([accept_tcp_rule_with_delete]) + + # verifying deletion successful + accept_tcp_rule_with_check = get_non_root_accept_tcp_firewalld_rule(FirewalldRules.QUERY_PASSTHROUGH) + verify_rules_deleted_successfully([accept_tcp_rule_with_check]) + + # restart the agent to re-add the deleted rules + log.info("restart the agent to re-add the deleted rules") + cmd = ["systemctl", "restart", agent_name] + execute_cmd(cmd=cmd) + + verify_all_firewalld_rules_exist() + + +def verify_root_accept_rule(): + """ + This function verifies the root accept rule and make sure it is re added by agent after deletion + """ + log.info("Verifying root accept rule") + agent_name = get_osutil().get_service_name() + # stop the agent, so that it won't re-add rules while checking + log.info("stop the agent, so that it won't re-add rules while checking") + cmd = ["systemctl", "stop", agent_name] + execute_cmd(cmd) + + # deleting root accept rule + root_accept_rule_with_delete = get_root_accept_firewalld_rule(FirewalldRules.REMOVE_PASSTHROUGH) + delete_firewalld_rules([root_accept_rule_with_delete]) + + # verifying deletion successful + root_accept_rule_with_check = get_root_accept_firewalld_rule(FirewalldRules.QUERY_PASSTHROUGH) + verify_rules_deleted_successfully([root_accept_rule_with_check]) + + # restart the agent to re-add the deleted rules + log.info("restart the agent to re-add the deleted rules") + cmd = ["systemctl", "restart", agent_name] + execute_cmd(cmd=cmd) + + verify_all_firewalld_rules_exist() + + +def verify_non_root_drop_rule(): + """ + This function verifies drop rule and make sure it is re added by agent after deletion + """ + log.info("Verifying non root drop rule") + agent_name = get_osutil().get_service_name() + # stop the agent, so that it won't re-add rules while checking + log.info("stop the agent, so that it won't re-add rules while checking") + cmd = ["systemctl", "stop", agent_name] + execute_cmd(cmd) + + # deleting non-root drop rule + non_root_drop_with_delete = get_non_root_drop_firewalld_rule(FirewalldRules.REMOVE_PASSTHROUGH) + delete_firewalld_rules([non_root_drop_with_delete]) + + # verifying deletion successful + non_root_drop_with_check = get_non_root_drop_firewalld_rule(FirewalldRules.QUERY_PASSTHROUGH) + verify_rules_deleted_successfully([non_root_drop_with_check]) + + # restart the agent to re-add the deleted rules + log.info("restart the agent to re-add the deleted rules") + cmd = ["systemctl", "restart", agent_name] + execute_cmd(cmd=cmd) + + verify_all_firewalld_rules_exist() + + +def main(): + + if firewalld_service_running(): + log.info("Displaying current firewalld rules") + print_current_firewalld_rules() + verify_non_root_accept_rule() + verify_root_accept_rule() + verify_non_root_drop_rule() + else: + log.info("firewalld.service is not running and skipping test") + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-verify_persist_firewall_service_running.py b/tests_e2e/tests/scripts/agent_persist_firewall-verify_persist_firewall_service_running.py new file mode 100755 index 0000000000..87e1e29e1d --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-verify_persist_firewall_service_running.py @@ -0,0 +1,70 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script verifies firewalld rules set on the vm if firewalld service is running and if it's not running, it verifies network-setup service is enabled by the agent +# +from assertpy import fail + +from azurelinuxagent.common.osutil import get_osutil +from azurelinuxagent.common.utils import shellutil +from tests_e2e.tests.lib.firewall_helpers import execute_cmd_return_err_code, \ + firewalld_service_running, verify_all_firewalld_rules_exist +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + + +def verify_network_setup_service_enabled(): + """ + Checks if network-setup service is enabled in the vm + """ + agent_name = get_osutil().get_service_name() + service_name = "{0}-network-setup.service".format(agent_name) + cmd = ["systemctl", "is-enabled", service_name] + + def op(cmd): + exit_code, output = execute_cmd_return_err_code(cmd) + return exit_code == 0 and output.rstrip() == "enabled" + + try: + status = retry_if_false(lambda: op(cmd), attempts=5, delay=30) + except Exception as e: + log.warning("Error -- while checking network.service is-enabled status {0}".format(e)) + status = False + if not status: + cmd = ["systemctl", "status", service_name] + fail("network-setup.service is not enabled!. Current status: {0}".format(shellutil.run_command(cmd))) + + log.info("network-setup.service is enabled") + + +def verify_firewall_service_running(): + log.info("Ensure test agent initialize the firewalld/network service setup") + + # Check if firewall active on the Vm + log.info("Checking if firewall service is active on the VM") + if firewalld_service_running(): + # Checking if firewalld rules are present in rule set if firewall service is active + verify_all_firewalld_rules_exist() + else: + # Checking if network-setup service is enabled if firewall service is not active + log.info("Checking if network-setup service is enabled by the agent since firewall service is not active") + verify_network_setup_service_enabled() + + +if __name__ == "__main__": + verify_firewall_service_running() From 56543ed6e3ed8ccede91fb537773977d282f55da Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:14:28 -0800 Subject: [PATCH 110/240] GA versioning refactor plus fetch new rsm properties. (#2974) * GA versioning refactor * added comment * added abstract decorator * undo abstract change * update names * addressed comments * pylint * agent family * state name * address comments * conf change --- azurelinuxagent/common/conf.py | 14 +- ...sions_goal_state_from_extensions_config.py | 9 +- .../extensions_goal_state_from_vm_settings.py | 9 +- azurelinuxagent/common/protocol/restapi.py | 24 +- azurelinuxagent/ga/agent_update_handler.py | 418 ++++++------------ azurelinuxagent/ga/ga_version_updater.py | 156 +++++++ azurelinuxagent/ga/guestagent.py | 11 - azurelinuxagent/ga/rsm_version_updater.py | 147 ++++++ .../ga/self_update_version_updater.py | 186 ++++++++ azurelinuxagent/ga/update.py | 7 +- ...sions_goal_state_from_extensions_config.py | 40 ++ ..._extensions_goal_state_from_vm_settings.py | 56 ++- ....xml => ext_conf-agent_family_version.xml} | 4 + .../ext_conf-rsm_version_properties_false.xml | 152 +++++++ ... => vm_settings-agent_family_version.json} | 4 + ...gs-requested_version_properties_false.json | 145 ++++++ ...d_version.xml => ext_conf_rsm_version.xml} | 4 + ...t_conf_version_missing_in_agent_family.xml | 31 ++ ... ext_conf_version_missing_in_manifest.xml} | 4 + .../wire/ext_conf_version_not_from_rsm.xml | 33 ++ ...t_conf_vm_not_enabled_for_rsm_upgrades.xml | 33 ++ tests/ga/test_agent_update_handler.py | 302 +++++++------ tests/ga/test_update.py | 146 +++--- tests/lib/wire_protocol_data.py | 5 +- .../scripts/agent_update-wait_for_rsm_gs.py | 4 +- 25 files changed, 1423 insertions(+), 521 deletions(-) create mode 100644 azurelinuxagent/ga/ga_version_updater.py create mode 100644 azurelinuxagent/ga/rsm_version_updater.py create mode 100644 azurelinuxagent/ga/self_update_version_updater.py rename tests/data/hostgaplugin/{ext_conf-requested_version.xml => ext_conf-agent_family_version.xml} (97%) create mode 100644 tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml rename tests/data/hostgaplugin/{vm_settings-requested_version.json => vm_settings-agent_family_version.json} (97%) create mode 100644 tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json rename tests/data/wire/{ext_conf_requested_version.xml => ext_conf_rsm_version.xml} (89%) create mode 100644 tests/data/wire/ext_conf_version_missing_in_agent_family.xml rename tests/data/wire/{ext_conf_requested_version_missing_in_manifest.xml => ext_conf_version_missing_in_manifest.xml} (89%) create mode 100644 tests/data/wire/ext_conf_version_not_from_rsm.xml create mode 100644 tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 167d520d09..57d6c9d280 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -622,20 +622,20 @@ def get_etp_collection_period(conf=__conf__): return conf.get_int("Debug.EtpCollectionPeriod", 300) -def get_hotfix_upgrade_frequency(conf=__conf__): +def get_self_update_hotfix_frequency(conf=__conf__): """ - Determines the frequency to check for Hotfix upgrades (. version changed in new upgrades). + Determines the frequency to check for Hotfix upgrades ( version changed in new upgrades). NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_int("Debug.AutoUpdateHotfixFrequency", 4 * 60 * 60) + return conf.get_int("Debug.SelfUpdateHotfixFrequency", 4 * 60 * 60) -def get_normal_upgrade_frequency(conf=__conf__): +def get_self_update_regular_frequency(conf=__conf__): """ - Determines the frequency to check for Normal upgrades (. version changed in new upgrades). + Determines the frequency to check for regular upgrades (.. version changed in new upgrades). NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_int("Debug.AutoUpdateNormalFrequency", 24 * 60 * 60) + return conf.get_int("Debug.SelfUpdateRegularFrequency", 24 * 60 * 60) def get_enable_ga_versioning(conf=__conf__): @@ -643,7 +643,7 @@ def get_enable_ga_versioning(conf=__conf__): If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR. NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", False) + return conf.get_switch("Debug.EnableGAVersioning", True) def get_firewall_rules_log_period(conf=__conf__): diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py index a8bfa25054..2b98819a2a 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_extensions_config.py @@ -61,9 +61,16 @@ def _parse_extensions_config(self, xml_text, wire_client): for ga_family in ga_families: name = findtext(ga_family, "Name") version = findtext(ga_family, "Version") + is_version_from_rsm = findtext(ga_family, "IsVersionFromRSM") + is_vm_enabled_for_rsm_upgrades = findtext(ga_family, "IsVMEnabledForRSMUpgrades") uris_list = find(ga_family, "Uris") uris = findall(uris_list, "Uri") - family = VMAgentFamily(name, version) + family = VMAgentFamily(name) + family.version = version + if is_version_from_rsm is not None: # checking None because converting string to lowercase + family.is_version_from_rsm = is_version_from_rsm.lower() == "true" + if is_vm_enabled_for_rsm_upgrades is not None: # checking None because converting string to lowercase + family.is_vm_enabled_for_rsm_upgrades = is_vm_enabled_for_rsm_upgrades.lower() == "true" for uri in uris: family.uris.append(gettext(uri)) self._agent_families.append(family) diff --git a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py index 664d1e868f..041ddedcda 100644 --- a/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py +++ b/azurelinuxagent/common/protocol/extensions_goal_state_from_vm_settings.py @@ -243,6 +243,8 @@ def _parse_agent_manifests(self, vm_settings): # { # "name": "Prod", # "version": "9.9.9.9", + # "isVersionFromRSM": true, + # "isVMEnabledForRSMUpgrades": true, # "uris": [ # "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", # "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" @@ -267,10 +269,15 @@ def _parse_agent_manifests(self, vm_settings): for family in families: name = family["name"] version = family.get("version") + is_version_from_rsm = family.get("isVersionFromRSM") + is_vm_enabled_for_rsm_upgrades = family.get("isVMEnabledForRSMUpgrades") uris = family.get("uris") if uris is None: uris = [] - agent_family = VMAgentFamily(name, version) + agent_family = VMAgentFamily(name) + agent_family.version = version + agent_family.is_version_from_rsm = is_version_from_rsm + agent_family.is_vm_enabled_for_rsm_upgrades = is_vm_enabled_for_rsm_upgrades for u in uris: agent_family.uris.append(u) self._agent_families.append(agent_family) diff --git a/azurelinuxagent/common/protocol/restapi.py b/azurelinuxagent/common/protocol/restapi.py index 725e2d7bb4..35b40cf13c 100644 --- a/azurelinuxagent/common/protocol/restapi.py +++ b/azurelinuxagent/common/protocol/restapi.py @@ -22,7 +22,6 @@ from azurelinuxagent.common.datacontract import DataContract, DataContractList from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.textutil import getattrib from azurelinuxagent.common.version import DISTRO_VERSION, DISTRO_NAME, CURRENT_VERSION @@ -69,23 +68,16 @@ def __init__(self): class VMAgentFamily(object): - def __init__(self, name, version=None): + def __init__(self, name): self.name = name - # This is the Requested version as specified by the Goal State, it defaults to 0.0.0.0 if not specified in GS - self.requested_version_string = VERSION_0 if version is None else version - self.uris = [] - - @property - def requested_version(self): - return FlexibleVersion(self.requested_version_string) + # Two-state: None, string. Set to None if version not specified in the GS + self.version = None + # Tri-state: None, True, False. Set to None if this property not specified in the GS. + self.is_version_from_rsm = None + # Tri-state: None, True, False. Set to None if this property not specified in the GS. + self.is_vm_enabled_for_rsm_upgrades = None - @property - def is_requested_version_specified(self): - """ - If we don't get any requested_version from the GS, we default it to 0.0.0.0. - This property identifies if a requested Version was passed in the GS or not. - """ - return self.requested_version > FlexibleVersion(VERSION_0) + self.uris = [] def __repr__(self): return self.__str__() diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index a650f110ac..ed157bdf5b 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -1,146 +1,125 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ import datetime -import glob import os -import shutil from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.logger import LogLevel -from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus, VERSION_0 -from azurelinuxagent.common.utils import fileutil, textutil +from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN -from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState +from azurelinuxagent.common.version import get_daemon_version +from azurelinuxagent.ga.ga_version_updater import RSMUpdates +from azurelinuxagent.ga.rsm_version_updater import RSMVersionUpdater +from azurelinuxagent.ga.self_update_version_updater import SelfUpdateVersionUpdater def get_agent_update_handler(protocol): return AgentUpdateHandler(protocol) -class AgentUpgradeType(object): - """ - Enum for different modes of Agent Upgrade - """ - Hotfix = "Hotfix" - Normal = "Normal" - - -class AgentUpdateHandlerUpdateState(object): +class AgentUpdateHandler(object): """ - This class is primarily used to maintain the in-memory persistent state for the agent updates. - This state will be persisted throughout the current service run. + This class handles two type of agent updates. Handler initializes the updater to SelfUpdateVersionUpdater and switch to appropriate updater based on below conditions: + RSM update: This is the update requested by RSM. The contract between CRP and agent is we get following properties in the goal state: + version: it will have what version to update + isVersionFromRSM: True if the version is from RSM deployment. + isVMEnabledForRSMUpgrades: True if the VM is enabled for RSM upgrades. + if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment + we ignore the update. + Self update: We fallback to this if above is condition not met. This update to the largest version available in the manifest + Note: Self-update don't support downgrade. + + Handler keeps the rsm state of last update is with RSM or not on every new goal state. Once handler decides which updater to use, then + does following steps: + 1. Retrieve the agent version from the goal state. + 2. Check if we allowed to update for that version. + 3. Log the update message. + 4. Purge the extra agents from disk. + 5. Download the new agent. + 6. Proceed with update. + + [Note: 1.0.8.147 is the minimum supported version of HGPA which will have the isVersionFromRSM and isVMEnabledForRSMUpgrades properties in vmsettings.] """ - def __init__(self): - self.last_attempted_requested_version_update_time = datetime.datetime.min - self.last_attempted_hotfix_update_time = datetime.datetime.min - self.last_attempted_normal_update_time = datetime.datetime.min - self.last_attempted_manifest_download_time = datetime.datetime.min - - -class AgentUpdateHandler(object): - def __init__(self, protocol): self._protocol = protocol - self._ga_family = conf.get_autoupdate_gafamily() - self._autoupdate_enabled = conf.get_autoupdate_enabled() self._gs_id = "unknown" - self._is_requested_version_update = True # This is to track the current update type(requested version or self update) - self.update_state = AgentUpdateHandlerUpdateState() - - def __check_if_agent_update_allowed_and_update_next_upgrade_times(self, requested_version): - """ - requested version update: - update is allowed once per (as specified in the conf.get_autoupdate_frequency()) - return false when we don't allow updates. - largest version update(self-update): - update is allowed once per (as specified in the conf.get_hotfix_upgrade_frequency() or conf.get_normal_upgrade_frequency()) - return false when we don't allow updates. - """ - now = datetime.datetime.now() - - if self._is_requested_version_update: - if self.update_state.last_attempted_requested_version_update_time != datetime.datetime.min: - next_attempt_time = self.update_state.last_attempted_requested_version_update_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) - else: - next_attempt_time = now - - if next_attempt_time > now: - return False - # The time limit elapsed for us to allow updates. - self.update_state.last_attempted_requested_version_update_time = now - return True - else: - next_hotfix_time, next_normal_time = self.__get_next_upgrade_times(now) - upgrade_type = self.__get_agent_upgrade_type(requested_version) - - if (upgrade_type == AgentUpgradeType.Hotfix and next_hotfix_time <= now) or ( - upgrade_type == AgentUpgradeType.Normal and next_normal_time <= now): - # Update the last upgrade check time even if no new agent is available for upgrade - self.update_state.last_attempted_hotfix_update_time = now - self.update_state.last_attempted_normal_update_time = now - return True - return False - - def __should_agent_attempt_manifest_download(self): - """ - The agent should attempt to download the manifest if - the agent has not attempted to download the manifest in the last 1 hour - """ - now = datetime.datetime.now() + self._ga_family_type = conf.get_autoupdate_gafamily() + self._daemon_version = self._get_daemon_version_for_update() + self._last_attempted_update_error_msg = "" - if self.update_state.last_attempted_manifest_download_time != datetime.datetime.min: - next_attempt_time = self.update_state.last_attempted_manifest_download_time + datetime.timedelta(seconds=conf.get_autoupdate_frequency()) + # restore the state of rsm update. Default to self-update if last update is not with RSM. + if not self._get_is_last_update_with_rsm(): + self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.min) else: - next_attempt_time = now - - if next_attempt_time > now: - return False - self.update_state.last_attempted_manifest_download_time = now - return True - - @staticmethod - def __get_agent_upgrade_type(requested_version): - # We follow semantic versioning for the agent, if .. is same, then has changed. - # In this case, we consider it as a Hotfix upgrade. Else we consider it a Normal upgrade. - if requested_version.major == CURRENT_VERSION.major and requested_version.minor == CURRENT_VERSION.minor and requested_version.patch == CURRENT_VERSION.patch: - return AgentUpgradeType.Hotfix - return AgentUpgradeType.Normal + self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) @staticmethod - def __get_daemon_version_for_update(): + def _get_daemon_version_for_update(): daemon_version = get_daemon_version() if daemon_version != FlexibleVersion(VERSION_0): return daemon_version - # We return 0.0.0.0 if we failed to retrieve daemon version. In that case, + # We return 0.0.0.0 if daemon version is not specified. In that case, # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. return FlexibleVersion("2.2.53") - def __get_next_upgrade_times(self, now): + @staticmethod + def _get_rsm_update_state_file(): """ - Get the next upgrade times - return: Next Hotfix Upgrade Time, Next Normal Upgrade Time + This file keeps if last attempted update is rsm or not. """ + return os.path.join(conf.get_lib_dir(), "rsm_update.json") - def get_next_process_time(last_val, frequency): - return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) + def _save_rsm_update_state(self): + """ + Save the rsm state empty file when we switch to RSM + """ + try: + with open(self._get_rsm_update_state_file(), "w"): + pass + except Exception as e: + logger.warn("Error creating the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) - next_hotfix_time = get_next_process_time(self.update_state.last_attempted_hotfix_update_time, - conf.get_hotfix_upgrade_frequency()) - next_normal_time = get_next_process_time(self.update_state.last_attempted_normal_update_time, - conf.get_normal_upgrade_frequency()) + def _remove_rsm_update_state(self): + """ + Remove the rsm state file when we switch to self-update + """ + try: + if os.path.exists(self._get_rsm_update_state_file()): + os.remove(self._get_rsm_update_state_file()) + except Exception as e: + logger.warn("Error removing the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) - return next_hotfix_time, next_normal_time + def _get_is_last_update_with_rsm(self): + """ + Returns True if state file exists as this consider as last update with RSM is true + """ + return os.path.exists(self._get_rsm_update_state_file()) - def __get_agent_family_manifests(self, goal_state): + def _get_agent_family_manifest(self, goal_state): """ Get the agent_family from last GS for the given family Returns: first entry of Manifest Exception if no manifests found in the last GS """ - family = self._ga_family + family = self._ga_family_type agent_families = goal_state.extensions_goal_state.agent_families family_found = False agent_family_manifests = [] @@ -151,211 +130,57 @@ def __get_agent_family_manifests(self, goal_state): agent_family_manifests.append(m) if not family_found: - raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family)) + raise AgentUpdateError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update".format(family, self._gs_id)) if len(agent_family_manifests) == 0: raise AgentUpdateError( - u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format( - self._ga_family, self._gs_id)) + u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update".format( + family, self._gs_id)) return agent_family_manifests[0] - @staticmethod - def __get_requested_version(agent_family): - """ - Get the requested version from agent family - Returns: Requested version if supported and available in the GS - None if requested version missing or GA versioning not enabled - """ - if conf.get_enable_ga_versioning() and agent_family.is_requested_version_specified: - if agent_family.requested_version is not None: - return FlexibleVersion(agent_family.requested_version) - return None - - @staticmethod - def __get_largest_version(agent_manifest): - largest_version = FlexibleVersion("0.0.0.0") - for pkg in agent_manifest.pkg_list.versions: - pkg_version = FlexibleVersion(pkg.version) - if pkg_version > largest_version: - largest_version = pkg_version - return largest_version + def run(self, goal_state, ext_gs_updated): - def __download_and_get_agent(self, goal_state, agent_family, agent_manifest, requested_version): - """ - This function downloads the new agent(requested version) and returns the downloaded version. - """ - if agent_manifest is None: # Fetch agent manifest if it's not already done - agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) - package_to_download = self.__get_agent_package_to_download(agent_manifest, requested_version) - is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack - agent = GuestAgent.from_agent_package(package_to_download, self._protocol, is_fast_track_goal_state) - return agent - - def __get_agent_package_to_download(self, agent_manifest, version): - """ - Returns the package of the given Version found in the manifest. If not found, returns exception - """ - for pkg in agent_manifest.pkg_list.versions: - if FlexibleVersion(pkg.version) == version: - # Found a matching package, only download that one - return pkg - - raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, " - "skipping agent update".format(str(version), self._gs_id)) - - @staticmethod - def __purge_extra_agents_from_disk(current_version, known_agents): - """ - Remove from disk all directories and .zip files of unknown agents - (without removing the current, running agent). - """ - path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) - - known_versions = [agent.version for agent in known_agents] - known_versions.append(current_version) - - for agent_path in glob.iglob(path): - try: - name = fileutil.trim_ext(agent_path, "zip") - m = AGENT_DIR_PATTERN.match(name) - if m is not None and FlexibleVersion(m.group(1)) not in known_versions: - if os.path.isfile(agent_path): - logger.info(u"Purging outdated Agent file {0}", agent_path) - os.remove(agent_path) - else: - logger.info(u"Purging outdated Agent directory {0}", agent_path) - shutil.rmtree(agent_path) - except Exception as e: - logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) - - def __proceed_with_update(self, requested_version): - """ - If requested version is specified, upgrade/downgrade to the specified version. - Raises: AgentUpgradeExitException - """ - if requested_version < CURRENT_VERSION: - # In case of a downgrade, we mark the current agent as bad version to avoid starting it back up ever again - # (the expectation here being that if we get request to a downgrade, - # there's a good reason for not wanting the current version). - prefix = "downgrade" - try: - # We should always have an agent directory for the CURRENT_VERSION - agents_on_disk = AgentUpdateHandler.__get_available_agents_on_disk() - current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) - msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ - "suggesting that we really don't want to execute any extensions using this version".format(CURRENT_VERSION) - self.__log_event(LogLevel.INFO, msg) - current_agent.mark_failure(is_fatal=True, reason=msg) - except StopIteration: - logger.warn( - "Could not find a matching agent with current version {0} to blacklist, skipping it".format( - CURRENT_VERSION)) - else: - # In case of an upgrade, we don't need to exclude anything as the daemon will automatically - # start the next available highest version which would be the target version - prefix = "upgrade" - raise AgentUpgradeExitException("Agent update found, exiting current process to {0} to the new Agent version {1}".format(prefix, requested_version)) - - @staticmethod - def __get_available_agents_on_disk(): - available_agents = [agent for agent in AgentUpdateHandler.__get_all_agents_on_disk() if agent.is_available] - return sorted(available_agents, key=lambda agent: agent.version, reverse=True) - - @staticmethod - def __get_all_agents_on_disk(): - path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) - return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - - def __check_if_downgrade_is_requested_and_allowed(self, requested_version): - """ - Don't allow downgrades for self-update version - Note: The intention of this check is to keep the original behavior of self-update as it is. - """ - if not self._is_requested_version_update: - if requested_version < CURRENT_VERSION: - return False - return True - - @staticmethod - def __log_event(level, msg, success=True): - if level == LogLevel.INFO: - logger.info(msg) - elif level == LogLevel.WARNING: - logger.warn(msg) - elif level == LogLevel.ERROR: - logger.error(msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success, message=msg, log_event=False) - - def run(self, goal_state): try: # Ignore new agents if update is disabled. The latter flag only used in e2e tests. - if not self._autoupdate_enabled or not conf.get_download_new_agents(): + if not conf.get_autoupdate_enabled() or not conf.get_download_new_agents(): return - self._gs_id = goal_state.extensions_goal_state.id - agent_family = self.__get_agent_family_manifests(goal_state) - requested_version = self.__get_requested_version(agent_family) - agent_manifest = None # This is to make sure fetch agent manifest once per update - warn_msg = "" - if requested_version is None: - # Do not proceed with update if self-update needs to download the manifest again with in an hour - if not self.__should_agent_attempt_manifest_download(): - return - if conf.get_enable_ga_versioning(): # log the warning only when ga versioning is enabled - # TODO: Need to revisit this msg when version is missing in Goal state. We may need to handle better way to report the error - warn_msg = "Missing requested version in agent family: {0} for incarnation: {1}, fallback to largest version update".format(self._ga_family, self._gs_id) - GAUpdateReportState.report_error_msg = warn_msg - agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) - requested_version = self.__get_largest_version(agent_manifest) - self._is_requested_version_update = False - else: - self._is_requested_version_update = True - # Save the requested version to report back - GAUpdateReportState.report_expected_version = requested_version - # Remove the missing requested version warning once requested version becomes available - if "Missing requested version" in GAUpdateReportState.report_error_msg: - GAUpdateReportState.report_error_msg = "" - - # Check if an update is allowed and update next upgrade times even if no new agent is available for upgrade - if not self.__check_if_agent_update_allowed_and_update_next_upgrade_times(requested_version): + # verify if agent update is allowed this time (RSM checks new goal state; self-update checks manifest download interval) + if not self._updater.is_update_allowed_this_time(ext_gs_updated): return - if requested_version == CURRENT_VERSION: - return + self._gs_id = goal_state.extensions_goal_state.id + agent_family = self._get_agent_family_manifest(goal_state) - if warn_msg != "": - self.__log_event(LogLevel.WARNING, warn_msg) + # updater will return RSM enabled or disabled if we need to switch to self-update or rsm update + updater_mode = self._updater.check_and_switch_updater_if_changed(agent_family, self._gs_id, ext_gs_updated) - # Downgrades are not allowed for self-update version - if not self.__check_if_downgrade_is_requested_and_allowed(requested_version): - return + if updater_mode == RSMUpdates.Disabled: + msg = "VM not enabled for RSM updates, switching to self-update mode" + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.now()) + self._remove_rsm_update_state() - daemon_version = self.__get_daemon_version_for_update() - if requested_version < daemon_version: - # Don't process the update if the requested version is less than daemon version, - # as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with - # installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version. - # - raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than " - "the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version)) + if updater_mode == RSMUpdates.Enabled: + msg = "VM enabled for RSM updates, switching to RSM update mode" + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) + self._save_rsm_update_state() - # Todo: Need to update the message when we fix RSM stuff - msg = "Self-update discovered new agent version:{0} in agent manifest for goal state {1}, will update the agent before processing the goal state.".format( - str(requested_version), self._gs_id) - self.__log_event(LogLevel.INFO, msg) - - agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version) + self._updater.retrieve_agent_version(agent_family, goal_state) + if not self._updater.is_retrieved_version_allowed_to_update(agent_family): + return + self._updater.log_new_agent_update_message() + self._updater.purge_extra_agents_from_disk() + agent = self._updater.download_and_get_new_agent(self._protocol, agent_family, goal_state) if agent.is_blacklisted or not agent.is_downloaded: msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( str(agent.version)) - self.__log_event(LogLevel.WARNING, msg) - return - - # We delete the directory and the zip package from the filesystem except current version and target version - self.__purge_extra_agents_from_disk(CURRENT_VERSION, known_agents=[agent]) - self.__proceed_with_update(requested_version) - + raise AgentUpdateError(msg) + self._updater.proceed_with_update() except Exception as err: if isinstance(err, AgentUpgradeExitException): @@ -364,25 +189,28 @@ def run(self, goal_state): error_msg = ustr(err) else: error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - self.__log_event(LogLevel.WARNING, error_msg, success=False) - if "Missing requested version" not in GAUpdateReportState.report_error_msg: - GAUpdateReportState.report_error_msg = error_msg + logger.warn(error_msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) + self._last_attempted_update_error_msg = error_msg def get_vmagent_update_status(self): """ This function gets the VMAgent update status as per the last attempted update. - Returns: None if fail to report or update never attempted with requested version + Returns: None if fail to report or update never attempted with rsm version specified in GS + Note: We send the status regardless of updater type. Since we call this main loop, want to avoid fetching agent family to decide and send only if + vm enabled for rsm updates. """ try: if conf.get_enable_ga_versioning(): - if not GAUpdateReportState.report_error_msg: + if not self._last_attempted_update_error_msg: status = VMAgentUpdateStatuses.Success code = 0 else: status = VMAgentUpdateStatuses.Error code = 1 - return VMAgentUpdateStatus(expected_version=str(GAUpdateReportState.report_expected_version), status=status, code=code, message=GAUpdateReportState.report_error_msg) + return VMAgentUpdateStatus(expected_version=str(self._updater.version), status=status, code=code, message=self._last_attempted_update_error_msg) except Exception as err: - self.__log_event(LogLevel.WARNING, "Unable to report agent update status: {0}".format( - textutil.format_exception(err)), success=False) + msg = "Unable to report agent update status: {0}".format(textutil.format_exception(err)) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=msg, log_event=True) return None diff --git a/azurelinuxagent/ga/ga_version_updater.py b/azurelinuxagent/ga/ga_version_updater.py new file mode 100644 index 0000000000..0d3f639f25 --- /dev/null +++ b/azurelinuxagent/ga/ga_version_updater.py @@ -0,0 +1,156 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import glob +import os +import shutil + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.exception import AgentUpdateError +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource +from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN +from azurelinuxagent.ga.guestagent import GuestAgent + + +class RSMUpdates(object): + """ + Enum for switching between RSM updates and self updates + """ + Enabled = "Enabled" + Disabled = "Disabled" + + +class GAVersionUpdater(object): + + def __init__(self, gs_id): + self._gs_id = gs_id + self._version = FlexibleVersion("0.0.0.0") # Initialize to zero and retrieve from goal state later stage + self._agent_manifest = None # Initialize to None and fetch from goal state at different stage for different updater + + def is_update_allowed_this_time(self, ext_gs_updated): + """ + This function checks if we allowed to update the agent. + @param ext_gs_updated: True if extension goal state updated else False + @return false when we don't allow updates. + """ + raise NotImplementedError + + def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + """ + checks and raise the updater exception if we need to switch to self-update from rsm update or vice versa + @param agent_family: agent family + @param gs_id: incarnation of the goal state + @param ext_gs_updated: True if extension goal state updated else False + @return: RSMUpdates.Disabled: return when agent need to stop rsm updates and switch to self-update + RSMUpdates.Enabled: return when agent need to switch to rsm update + None: return when no need to switch + """ + raise NotImplementedError + + def retrieve_agent_version(self, agent_family, goal_state): + """ + This function fetches the agent version from the goal state for the given family. + @param agent_family: agent family + @param goal_state: goal state + """ + raise NotImplementedError + + def is_retrieved_version_allowed_to_update(self, agent_family): + """ + Checks all base condition if new version allow to update. + @param agent_family: agent family + @return: True if allowed to update else False + """ + raise NotImplementedError + + def log_new_agent_update_message(self): + """ + This function logs the update message after we check agent allowed to update. + """ + raise NotImplementedError + + def purge_extra_agents_from_disk(self): + """ + Method remove the extra agents from disk. + """ + raise NotImplementedError + + def proceed_with_update(self): + """ + performs upgrade/downgrade + @return: AgentUpgradeExitException + """ + raise NotImplementedError + + @property + def version(self): + """ + Return version + """ + return self._version + + def download_and_get_new_agent(self, protocol, agent_family, goal_state): + """ + Function downloads the new agent and returns the downloaded version. + @param protocol: protocol object + @param agent_family: agent family + @param goal_state: goal state + @return: GuestAgent: downloaded agent + """ + if self._agent_manifest is None: # Fetch agent manifest if it's not already done + self._agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + package_to_download = self._get_agent_package_to_download(self._agent_manifest, self._version) + is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack + agent = GuestAgent.from_agent_package(package_to_download, protocol, is_fast_track_goal_state) + return agent + + def _get_agent_package_to_download(self, agent_manifest, version): + """ + Returns the package of the given Version found in the manifest. If not found, returns exception + """ + for pkg in agent_manifest.pkg_list.versions: + if FlexibleVersion(pkg.version) == version: + # Found a matching package, only download that one + return pkg + + raise AgentUpdateError("No matching package found in the agent manifest for version: {0} in goal state incarnation: {1}, " + "skipping agent update".format(str(version), self._gs_id)) + + @staticmethod + def _purge_unknown_agents_from_disk(known_agents): + """ + Remove from disk all directories and .zip files of unknown agents + """ + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + + for agent_path in glob.iglob(path): + try: + name = fileutil.trim_ext(agent_path, "zip") + m = AGENT_DIR_PATTERN.match(name) + if m is not None and FlexibleVersion(m.group(1)) not in known_agents: + if os.path.isfile(agent_path): + logger.info(u"Purging outdated Agent file {0}", agent_path) + os.remove(agent_path) + else: + logger.info(u"Purging outdated Agent directory {0}", agent_path) + shutil.rmtree(agent_path) + except Exception as e: + logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) diff --git a/azurelinuxagent/ga/guestagent.py b/azurelinuxagent/ga/guestagent.py index 56f3142447..35472c74a6 100644 --- a/azurelinuxagent/ga/guestagent.py +++ b/azurelinuxagent/ga/guestagent.py @@ -18,15 +18,6 @@ MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent -class GAUpdateReportState(object): - """ - This class is primarily used to maintain the in-memory persistent state for the agent updates. - This state will be persisted throughout the current service run and might be modified by external classes. - """ - report_error_msg = "" - report_expected_version = FlexibleVersion("0.0.0.0") - - class GuestAgent(object): def __init__(self, path, pkg, protocol, is_fast_track_goal_state): """ @@ -74,8 +65,6 @@ def __init__(self, path, pkg, protocol, is_fast_track_goal_state): msg = u"Agent {0} install failed with exception:".format( self.name) detailed_msg = '{0} {1}'.format(msg, textutil.format_exception(e)) - if "Missing requested version" not in GAUpdateReportState.report_error_msg: - GAUpdateReportState.report_error_msg = detailed_msg # capture the download errors to report back add_event( AGENT_NAME, version=self.version, diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py new file mode 100644 index 0000000000..dc972c1c7a --- /dev/null +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -0,0 +1,147 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import glob +import os + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates +from azurelinuxagent.ga.guestagent import GuestAgent + + +class RSMVersionUpdater(GAVersionUpdater): + def __init__(self, gs_id, daemon_version): + super(RSMVersionUpdater, self).__init__(gs_id) + self._daemon_version = daemon_version + + @staticmethod + def _get_all_agents_on_disk(): + path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) + return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if + os.path.isdir(agent_dir)] + + def _get_available_agents_on_disk(self): + available_agents = [agent for agent in self._get_all_agents_on_disk() if agent.is_available] + return sorted(available_agents, key=lambda agent: agent.version, reverse=True) + + def is_update_allowed_this_time(self, ext_gs_updated): + """ + RSM update allowed if we have a new goal state + """ + return ext_gs_updated + + def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + """ + Checks if there is a new goal state and decide if we need to continue with rsm update or switch to self-update. + Firstly it checks agent supports GA versioning or not. If not, we return rsm updates disabled to switch to self-update. + if vm is enabled for RSM updates and continue with rsm update, otherwise we return rsm updates disabled to switch to self-update. + if either isVersionFromRSM or isVMEnabledForRSMUpgrades or version is missing in the goal state, we ignore the update as we consider it as invalid goal state. + """ + if ext_gs_updated: + self._gs_id = gs_id + if not conf.get_enable_ga_versioning(): + return RSMUpdates.Disabled + + if agent_family.is_vm_enabled_for_rsm_upgrades is None: + raise AgentUpdateError( + "Received invalid goal state:{0}, missing isVMEnabledForRSMUpgrades property. So, skipping agent update".format( + self._gs_id)) + elif not agent_family.is_vm_enabled_for_rsm_upgrades: + return RSMUpdates.Disabled + else: + if agent_family.is_version_from_rsm is None: + raise AgentUpdateError( + "Received invalid goal state:{0}, missing isVersionFromRSM property. So, skipping agent update".format( + self._gs_id)) + if agent_family.version is None: + raise AgentUpdateError( + "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( + self._gs_id)) + + return None + + def retrieve_agent_version(self, agent_family, goal_state): + """ + Get the agent version from the goal state + """ + self._version = FlexibleVersion(agent_family.version) + + def is_retrieved_version_allowed_to_update(self, agent_family): + """ + Once version retrieved from goal state, we check if we allowed to update for that version + allow update If new version not same as current version, not below than daemon version and if version is from rsm request + """ + + if not agent_family.is_version_from_rsm or self._version < self._daemon_version or self._version == CURRENT_VERSION: + return False + + return True + + def log_new_agent_update_message(self): + """ + This function logs the update message after we check version allowed to update. + """ + msg = "New agent version:{0} requested by RSM in Goal state {1}, will update the agent before processing the goal state.".format( + str(self._version), self._gs_id) + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + + def purge_extra_agents_from_disk(self): + """ + Remove the agents( including rsm version if exists) from disk except current version. There is a chance that rsm version could exist and/or blacklisted + on previous update attempts. So we should remove it from disk in order to honor current rsm version update. + """ + known_agents = [CURRENT_VERSION] + self._purge_unknown_agents_from_disk(known_agents) + + def proceed_with_update(self): + """ + upgrade/downgrade to the new version. + Raises: AgentUpgradeExitException + """ + if self._version < CURRENT_VERSION: + # In case of a downgrade, we mark the current agent as bad version to avoid starting it back up ever again + # (the expectation here being that if we get request to a downgrade, + # there's a good reason for not wanting the current version). + prefix = "downgrade" + try: + # We should always have an agent directory for the CURRENT_VERSION + agents_on_disk = self._get_available_agents_on_disk() + current_agent = next(agent for agent in agents_on_disk if agent.version == CURRENT_VERSION) + msg = "Marking the agent {0} as bad version since a downgrade was requested in the GoalState, " \ + "suggesting that we really don't want to execute any extensions using this version".format( + CURRENT_VERSION) + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + current_agent.mark_failure(is_fatal=True, reason=msg) + except StopIteration: + logger.warn( + "Could not find a matching agent with current version {0} to blacklist, skipping it".format( + CURRENT_VERSION)) + else: + # In case of an upgrade, we don't need to exclude anything as the daemon will automatically + # start the next available highest version which would be the target version + prefix = "upgrade" + raise AgentUpgradeExitException( + "Agent completed all update checks, exiting current process to {0} to the new Agent version {1}".format( + prefix, + self._version)) diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py new file mode 100644 index 0000000000..2b04fd5c22 --- /dev/null +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -0,0 +1,186 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import datetime + +from azurelinuxagent.common import conf, logger +from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.version import CURRENT_VERSION +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates + + +class SelfUpdateType(object): + """ + Enum for different modes of Self updates + """ + Hotfix = "Hotfix" + Regular = "Regular" + + +class SelfUpdateVersionUpdater(GAVersionUpdater): + def __init__(self, gs_id, last_attempted_manifest_download_time): + super(SelfUpdateVersionUpdater, self).__init__(gs_id) + self._last_attempted_manifest_download_time = last_attempted_manifest_download_time + self._last_attempted_self_update_time = datetime.datetime.min + + @staticmethod + def _get_largest_version(agent_manifest): + """ + Get the largest version from the agent manifest + """ + largest_version = FlexibleVersion("0.0.0.0") + for pkg in agent_manifest.pkg_list.versions: + pkg_version = FlexibleVersion(pkg.version) + if pkg_version > largest_version: + largest_version = pkg_version + return largest_version + + @staticmethod + def _get_agent_upgrade_type(version): + # We follow semantic versioning for the agent, if .. is same, then has changed. + # In this case, we consider it as a Hotfix upgrade. Else we consider it a Regular upgrade. + if version.major == CURRENT_VERSION.major and version.minor == CURRENT_VERSION.minor and version.patch == CURRENT_VERSION.patch: + return SelfUpdateType.Hotfix + return SelfUpdateType.Regular + + @staticmethod + def _get_next_process_time(last_val, frequency, now): + """ + Get the next upgrade time + """ + return now if last_val == datetime.datetime.min else last_val + datetime.timedelta(seconds=frequency) + + def _is_new_agent_allowed_update(self): + """ + This method ensure that update is allowed only once per (hotfix/Regular) upgrade frequency + """ + now = datetime.datetime.now() + upgrade_type = self._get_agent_upgrade_type(self._version) + if upgrade_type == SelfUpdateType.Hotfix: + next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, + conf.get_self_update_hotfix_frequency(), now) + else: + next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, + conf.get_self_update_regular_frequency(), now) + + if next_update_time <= now: + # Update the last upgrade check time even if no new agent is available for upgrade + self._last_attempted_self_update_time = now + return True + return False + + def _should_agent_attempt_manifest_download(self): + """ + The agent should attempt to download the manifest if + the agent has not attempted to download the manifest in the last 1 hour + If we allow update, we update the last attempted manifest download time + """ + now = datetime.datetime.now() + + if self._last_attempted_manifest_download_time != datetime.datetime.min: + next_attempt_time = self._last_attempted_manifest_download_time + datetime.timedelta( + seconds=conf.get_autoupdate_frequency()) + else: + next_attempt_time = now + + if next_attempt_time > now: + return False + self._last_attempted_manifest_download_time = now + return True + + def is_update_allowed_this_time(self, ext_gs_updated): + """ + Checks if we allowed download manifest as per manifest download frequency + """ + if not self._should_agent_attempt_manifest_download(): + return False + return True + + def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + """ + Checks if there is a new goal state and decide if we need to continue with self-update or switch to rsm update. + if vm is not enabled for RSM updates or agent not supports GA versioning then we continue with self update, otherwise we rsm enabled to switch to rsm update. + if isVersionFromRSM is missing but isVMEnabledForRSMUpgrades is present in the goal state, we ignore the update as we consider it as invalid goal state. + """ + if ext_gs_updated: + self._gs_id = gs_id + if conf.get_enable_ga_versioning() and agent_family.is_vm_enabled_for_rsm_upgrades is not None and agent_family.is_vm_enabled_for_rsm_upgrades: + if agent_family.is_version_from_rsm is None: + raise AgentUpdateError( + "Received invalid goal state:{0}, missing isVersionFromRSM property. So, skipping agent update".format( + self._gs_id)) + else: + if agent_family.version is None: + raise AgentUpdateError( + "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( + self._gs_id)) + return RSMUpdates.Enabled + + return None + + def retrieve_agent_version(self, agent_family, goal_state): + """ + Get the largest version from the agent manifest + """ + self._agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + largest_version = self._get_largest_version(self._agent_manifest) + self._version = largest_version + + def is_retrieved_version_allowed_to_update(self, agent_family): + """ + checks update is spread per (as specified in the conf.get_self_update_hotfix_frequency() or conf.get_self_update_regular_frequency()) + or if version below than current version + return false when we don't allow updates. + """ + if not self._is_new_agent_allowed_update(): + return False + + if self._version <= CURRENT_VERSION: + return False + + return True + + def log_new_agent_update_message(self): + """ + This function logs the update message after we check version allowed to update. + """ + msg = "Self-update discovered new agent version:{0} in agent manifest for goal state {1}, will update the agent before processing the goal state.".format( + str(self._version), self._gs_id) + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + + def purge_extra_agents_from_disk(self): + """ + Remove the agents from disk except current version and new agent version if exists + """ + known_agents = [CURRENT_VERSION, self._version] + self._purge_unknown_agents_from_disk(known_agents) + + def proceed_with_update(self): + """ + upgrade to largest version. Downgrade is not supported. + Raises: AgentUpgradeExitException + """ + if self._version > CURRENT_VERSION: + # In case of an upgrade, we don't need to exclude anything as the daemon will automatically + # start the next available highest version which would be the target version + raise AgentUpgradeExitException( + "Agent completed all update checks, exiting current process to upgrade to the new Agent version {0}".format( + self._version)) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 6e7b5b917c..88267b75e2 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -547,21 +547,20 @@ def _processing_new_extensions_goal_state(self): """ True if we are currently processing a new extensions goal state """ - egs = self._goal_state.extensions_goal_state - return self._goal_state is not None and egs.id != self._last_extensions_gs_id and not egs.is_outdated + return self._goal_state is not None and self._goal_state.extensions_goal_state.id != self._last_extensions_gs_id and not self._goal_state.extensions_goal_state.is_outdated def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_update_handler): protocol = exthandlers_handler.protocol # update self._goal_state if not self._try_update_goal_state(protocol): - agent_update_handler.run(self._goal_state) + agent_update_handler.run(self._goal_state, self._processing_new_extensions_goal_state()) # status reporting should be done even when the goal state is not updated self._report_status(exthandlers_handler, agent_update_handler) return # check for agent updates - agent_update_handler.run(self._goal_state) + agent_update_handler.run(self._goal_state, self._processing_new_extensions_goal_state()) try: if self._processing_new_extensions_goal_state(): diff --git a/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py b/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py index 61380a46f7..2a9acff659 100644 --- a/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py +++ b/tests/common/protocol/test_extensions_goal_state_from_extensions_config.py @@ -60,3 +60,43 @@ def test_its_source_channel_should_be_wire_server(self): extensions_goal_state = protocol.get_goal_state().extensions_goal_state self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.channel, "The channel is incorrect") + + def test_it_should_parse_is_version_from_rsm_properly(self): + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertIsNone(family.is_version_from_rsm, "is_version_from_rsm should be None") + + data_file = wire_protocol_data.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-agent_family_version.xml" + with mock_wire_protocol(data_file) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertTrue(family.is_version_from_rsm, "is_version_from_rsm should be True") + + data_file = wire_protocol_data.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-rsm_version_properties_false.xml" + with mock_wire_protocol(data_file) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertFalse(family.is_version_from_rsm, "is_version_from_rsm should be False") + + def test_it_should_parse_is_vm_enabled_for_rsm_upgrades(self): + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertIsNone(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be None") + + data_file = wire_protocol_data.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-agent_family_version.xml" + with mock_wire_protocol(data_file) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertTrue(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be True") + + data_file = wire_protocol_data.DATA_FILE.copy() + data_file["ext_conf"] = "hostgaplugin/ext_conf-rsm_version_properties_false.xml" + with mock_wire_protocol(data_file) as protocol: + agent_families = protocol.get_goal_state().extensions_goal_state.agent_families + for family in agent_families: + self.assertFalse(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be False") diff --git a/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py b/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py index bea1063f70..771fa22068 100644 --- a/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py +++ b/tests/common/protocol/test_extensions_goal_state_from_vm_settings.py @@ -53,16 +53,66 @@ def test_it_should_parse_requested_version_properly(self): goal_state = GoalState(protocol.client) families = goal_state.extensions_goal_state.agent_families for family in families: - self.assertEqual(family.requested_version_string, "0.0.0.0", "Version should be None") + self.assertIsNone(family.version, "Version should be None") data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() - data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version.json" + data_file["vm_settings"] = "hostgaplugin/vm_settings-agent_family_version.json" with mock_wire_protocol(data_file) as protocol: protocol.mock_wire_data.set_etag(888) goal_state = GoalState(protocol.client) families = goal_state.extensions_goal_state.agent_families for family in families: - self.assertEqual(family.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") + self.assertEqual(family.version, "9.9.9.9", "Version should be 9.9.9.9") + + def test_it_should_parse_is_version_from_rsm_properly(self): + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertIsNone(family.is_version_from_rsm, "is_version_from_rsm should be None") + + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-agent_family_version.json" + with mock_wire_protocol(data_file) as protocol: + protocol.mock_wire_data.set_etag(888) + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertTrue(family.is_version_from_rsm, "is_version_from_rsm should be True") + + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version_properties_false.json" + with mock_wire_protocol(data_file) as protocol: + protocol.mock_wire_data.set_etag(888) + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertFalse(family.is_version_from_rsm, "is_version_from_rsm should be False") + + def test_it_should_parse_is_vm_enabled_for_rsm_upgrades_properly(self): + with mock_wire_protocol(wire_protocol_data.DATA_FILE_VM_SETTINGS) as protocol: + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertIsNone(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be None") + + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-agent_family_version.json" + with mock_wire_protocol(data_file) as protocol: + protocol.mock_wire_data.set_etag(888) + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertTrue(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be True") + + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() + data_file["vm_settings"] = "hostgaplugin/vm_settings-requested_version_properties_false.json" + with mock_wire_protocol(data_file) as protocol: + protocol.mock_wire_data.set_etag(888) + goal_state = GoalState(protocol.client) + families = goal_state.extensions_goal_state.agent_families + for family in families: + self.assertFalse(family.is_vm_enabled_for_rsm_upgrades, "is_vm_enabled_for_rsm_upgrades should be False") def test_it_should_parse_missing_status_upload_blob_as_none(self): data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS.copy() diff --git a/tests/data/hostgaplugin/ext_conf-requested_version.xml b/tests/data/hostgaplugin/ext_conf-agent_family_version.xml similarity index 97% rename from tests/data/hostgaplugin/ext_conf-requested_version.xml rename to tests/data/hostgaplugin/ext_conf-agent_family_version.xml index 48cc95cc9f..5c9e0028fe 100644 --- a/tests/data/hostgaplugin/ext_conf-requested_version.xml +++ b/tests/data/hostgaplugin/ext_conf-agent_family_version.xml @@ -4,6 +4,8 @@ Prod 9.9.9.10 + true + true https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml @@ -12,6 +14,8 @@ Test 9.9.9.10 + true + true https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml diff --git a/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml b/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml new file mode 100644 index 0000000000..e1f1d6ba8c --- /dev/null +++ b/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml @@ -0,0 +1,152 @@ + + + + + Prod + 9.9.9.10 + false + false + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml + + + + Test + 9.9.9.10 + false + false + + https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml + + + + CentralUSEUAP + CRP + + + + MultipleExtensionsPerHandler + + +https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w + + + + https://zrdfepirv2cbn09pr02a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml + + + + + https://zrdfepirv2cbn06prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml + + + + + https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml + + + + + https://umsawqtlsshtn5v2nfgh.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml + + + + + https://umsah3cwjlctnmhsvzqv.blob.core.windows.net/2bbece4f-0283-d415-b034-cc0adc6997a1/2bbece4f-0283-d415-b034-cc0adc6997a1_manifest.xml + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": {"GCS_AUTO_CONFIG":true} + } + } + ] +} + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": {"enableGenevaUpload":true} + } + } + ] +} + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"commandToExecute":"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'"} + } + } + ] +} + + + + + + + + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo '4abb1e88-f349-41f8-8442-247d9fdfcac5'"}} + } + } + ] +} + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo 'e865c9bc-a7b3-42c6-9a79-cfa98a1ee8b3'"}} + } + } + ] +} + { + "runtimeSettings": [ + { + "handlerSettings": { + "publicSettings": {"source":{"script":"echo 'f923e416-0340-485c-9243-8b84fb9930c6'"}} + } + } + ] +} + + + { + "runtimeSettings": [ + { + "handlerSettings": { + "protectedSettingsCertThumbprint": "59A10F50FFE2A0408D3F03FE336C8FD5716CF25C", + "protectedSettings": "*** REDACTED ***" + } + } + ] +} + + +https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.vmSettings?sv=2018-03-28&sr=b&sk=system-1&sig=PaiLic%3d&se=9999-01-01T00%3a00%3a00Z&sp=r + diff --git a/tests/data/hostgaplugin/vm_settings-requested_version.json b/tests/data/hostgaplugin/vm_settings-agent_family_version.json similarity index 97% rename from tests/data/hostgaplugin/vm_settings-requested_version.json rename to tests/data/hostgaplugin/vm_settings-agent_family_version.json index 0f73cb255e..734cc8147b 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version.json +++ b/tests/data/hostgaplugin/vm_settings-agent_family_version.json @@ -29,6 +29,8 @@ { "name": "Prod", "version": "9.9.9.9", + "isVersionFromRSM": true, + "isVMEnabledForRSMUpgrades": true, "uris": [ "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" @@ -37,6 +39,8 @@ { "name": "Test", "version": "9.9.9.9", + "isVersionFromRSM": true, + "isVMEnabledForRSMUpgrades": true, "uris": [ "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" diff --git a/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json b/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json new file mode 100644 index 0000000000..3a6eb8b1a5 --- /dev/null +++ b/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json @@ -0,0 +1,145 @@ +{ + "hostGAPluginVersion": "1.0.8.133", + "vmSettingsSchemaVersion": "0.0", + "activityId": "a33f6f53-43d6-4625-b322-1a39651a00c9", + "correlationId": "9a47a2a2-e740-4bfc-b11b-4f2f7cfe7d2e", + "inSvdSeqNo": 1, + "extensionsLastModifiedTickCount": 637726699999999999, + "extensionGoalStatesSource": "FastTrack", + "onHold": true, + "statusUploadBlob": { + "statusBlobType": "BlockBlob", + "value": "https://dcrcl3a0xs.blob.core.windows.net/$system/edp0plkw2b.86f4ae0a-61f8-48ae-9199-40f402d56864.status?sv=2018-03-28&sr=b&sk=system-1&sig=KNWgC2%3d&se=9999-01-01T00%3a00%3a00Z&sp=w" + }, + "inVMMetadata": { + "subscriptionId": "8e037ad4-618f-4466-8bc8-5099d41ac15b", + "resourceGroupName": "rg-dc-86fjzhp", + "vmName": "edp0plkw2b", + "location": "CentralUSEUAP", + "vmId": "86f4ae0a-61f8-48ae-9199-40f402d56864", + "vmSize": "Standard_B2s", + "osType": "Linux" + }, + "requiredFeatures": [ + { + "name": "MultipleExtensionsPerHandler" + } + ], + "gaFamilies": [ + { + "name": "Prod", + "version": "9.9.9.9", + "isVersionFromRSM": false, + "isVMEnabledForRSMUpgrades": false, + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Prod_uscentraleuap_manifest.xml" + ] + }, + { + "name": "Test", + "version": "9.9.9.9", + "isVersionFromRSM": false, + "isVMEnabledForRSMUpgrades": false, + "uris": [ + "https://zrdfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml", + "https://ardfepirv2cdm03prdstr01a.blob.core.windows.net/7d89d439b79f4452950452399add2c90/Microsoft.OSTCLinuxAgent_Test_uscentraleuap_manifest.xml" + ] + } + ], + "extensionGoalStates": [ + { + "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent", + "version": "1.9.1", + "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml", + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "settings": [ + { + "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" + } + ] + }, + { + "name": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent", + "version": "2.15.112", + "location": "https://zrdfepirv2cbn04prdstr01a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml", + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "settings": [ + { + "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", + "publicSettings": "{\"enableGenevaUpload\":true}" + } + ] + }, + { + "name": "Microsoft.Azure.Extensions.CustomScript", + "version": "2.1.6", + "location": "https://umsavwggj2v40kvqhc0w.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "failoverlocation": "https://umsafwzhkbm1rfrhl0ws.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml", + "additionalLocations": [ + "https://umsanh4b5rfz0q0p4pwm.blob.core.windows.net/5237dd14-0aad-f051-0fad-1e33e1b63091/5237dd14-0aad-f051-0fad-1e33e1b63091_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": false, + "settings": [ + { + "publicSettings": "{\"commandToExecute\":\"echo 'cee174d4-4daa-4b07-9958-53b9649445c2'\"}" + } + ] + }, + { + "name": "Microsoft.CPlat.Core.RunCommandHandlerLinux", + "version": "1.2.0", + "location": "https://umsavbvncrpzbnxmxzmr.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml", + "failoverlocation": "https://umsajbjtqrb3zqjvgb2z.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml", + "additionalLocations": [ + "https://umsawqtlsshtn5v2nfgh.blob.core.windows.net/f4086d41-69f9-3103-78e0-8a2c7e789d0f/f4086d41-69f9-3103-78e0-8a2c7e789d0f_manifest.xml" + ], + "state": "enabled", + "autoUpgrade": true, + "runAsStartupTask": false, + "isJson": true, + "useExactVersion": true, + "settingsSeqNo": 0, + "isMultiConfig": true, + "settings": [ + { + "publicSettings": "{\"source\":{\"script\":\"echo '4abb1e88-f349-41f8-8442-247d9fdfcac5'\"}}", + "seqNo": 0, + "extensionName": "MCExt1", + "extensionState": "enabled" + }, + { + "publicSettings": "{\"source\":{\"script\":\"echo 'e865c9bc-a7b3-42c6-9a79-cfa98a1ee8b3'\"}}", + "seqNo": 0, + "extensionName": "MCExt2", + "extensionState": "enabled" + }, + { + "publicSettings": "{\"source\":{\"script\":\"echo 'f923e416-0340-485c-9243-8b84fb9930c6'\"}}", + "seqNo": 0, + "extensionName": "MCExt3", + "extensionState": "enabled" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/data/wire/ext_conf_requested_version.xml b/tests/data/wire/ext_conf_rsm_version.xml similarity index 89% rename from tests/data/wire/ext_conf_requested_version.xml rename to tests/data/wire/ext_conf_rsm_version.xml index d12352c297..806063541a 100644 --- a/tests/data/wire/ext_conf_requested_version.xml +++ b/tests/data/wire/ext_conf_rsm_version.xml @@ -3,6 +3,8 @@ Prod 9.9.9.10 + True + True http://mock-goal-state/manifest_of_ga.xml @@ -10,6 +12,8 @@ Test 9.9.9.10 + True + True http://mock-goal-state/manifest_of_ga.xml diff --git a/tests/data/wire/ext_conf_version_missing_in_agent_family.xml b/tests/data/wire/ext_conf_version_missing_in_agent_family.xml new file mode 100644 index 0000000000..3f81ed1195 --- /dev/null +++ b/tests/data/wire/ext_conf_version_missing_in_agent_family.xml @@ -0,0 +1,31 @@ + + + + Prod + True + True + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + True + True + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + + + +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml b/tests/data/wire/ext_conf_version_missing_in_manifest.xml similarity index 89% rename from tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml rename to tests/data/wire/ext_conf_version_missing_in_manifest.xml index 84043e2d75..c750d5d3a2 100644 --- a/tests/data/wire/ext_conf_requested_version_missing_in_manifest.xml +++ b/tests/data/wire/ext_conf_version_missing_in_manifest.xml @@ -4,6 +4,8 @@ Prod 5.2.1.0 + True + True http://mock-goal-state/manifest_of_ga.xml @@ -11,6 +13,8 @@ Test 5.2.1.0 + True + True http://mock-goal-state/manifest_of_ga.xml diff --git a/tests/data/wire/ext_conf_version_not_from_rsm.xml b/tests/data/wire/ext_conf_version_not_from_rsm.xml new file mode 100644 index 0000000000..9da8f5da72 --- /dev/null +++ b/tests/data/wire/ext_conf_version_not_from_rsm.xml @@ -0,0 +1,33 @@ + + + + Prod + 9.9.9.10 + False + True + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + 9.9.9.10 + False + True + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + + + +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml b/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml new file mode 100644 index 0000000000..384723f461 --- /dev/null +++ b/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml @@ -0,0 +1,33 @@ + + + + Prod + 9.9.9.10 + False + False + + http://mock-goal-state/manifest_of_ga.xml + + + + Test + 9.9.9.10 + False + False + + http://mock-goal-state/manifest_of_ga.xml + + + + + + + + + + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + + + +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo + diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index d91cbb8019..6c069bfc84 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -11,7 +11,6 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.version import CURRENT_VERSION from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler -from azurelinuxagent.ga.guestagent import GAUpdateReportState from tests.ga.test_update import UpdateTestCase from tests.lib.http_request_predicates import HttpRequestPredicates from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse @@ -28,7 +27,7 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) @contextlib.contextmanager - def __get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False): + def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False): # Default to DATA_FILE of test_data parameter raises the pylint warning # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) test_data = DATA_FILE if test_data is None else test_data @@ -58,74 +57,84 @@ def put_handler(url, *args, **_): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): - with patch("azurelinuxagent.ga.agent_update_handler.add_event") as mock_telemetry: + with patch("azurelinuxagent.common.event.EventLogger.add_event") as mock_telemetry: agent_update_handler = get_agent_update_handler(protocol) agent_update_handler._protocol = protocol yield agent_update_handler, mock_telemetry - - def __assert_agent_directories_available(self, versions): + def _assert_agent_directories_available(self, versions): for version in versions: self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version)) - def __assert_agent_directories_exist_and_others_dont_exist(self, versions): - self.__assert_agent_directories_available(versions=versions) + def _assert_agent_directories_exist_and_others_dont_exist(self, versions): + self._assert_agent_directories_available(versions=versions) other_agents = [agent_dir for agent_dir in self.agent_dirs() if agent_dir not in [self.agent_dir(version) for version in versions]] self.assertFalse(any(other_agents), "All other agents should be purged from agent dir: {0}".format(other_agents)) - def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): + def _assert_agent_rsm_version_in_goal_state(self, mock_telemetry, inc=1, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'discovered new agent version:{0} in agent manifest for goal state incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ + 'New agent version:{0} requested by RSM in Goal state incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent requested version found. Got: {0}".format( mock_telemetry.call_args_list)) - def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): + def _assert_update_discovered_from_agent_manifest(self, mock_telemetry, inc=1, version="9.9.9.10"): + upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + 'Self-update discovered new agent version:{0} in agent manifest for goal state incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade] + self.assertEqual(1, len(upgrade_event_msgs), + "Did not find the event indicating that the new version found. Got: {0}".format( + mock_telemetry.call_args_list)) + + def _assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[ + 'No matching package found in the agent manifest for version: {0}'.format(version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the agent package not found. Got: {0}".format( mock_telemetry.call_args_list)) + def _assert_agent_exit_process_telemetry_emitted(self, message): + self.assertIn("Agent completed all update checks, exiting current process", message) + def test_it_should_not_update_when_autoupdate_disabled(self): self.prepare_agents(count=1) - with self.__get_agent_update_handler(autoupdate_enabled=False) as (agent_update_handler, mock_telemetry): - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + with self._get_agent_update_handler(autoupdate_enabled=False) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "requesting a new agent version" in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "should not check for requested version") + 'op'] == WALAEventOperation.AgentUpgrade]), "should not check for rsm version") def test_it_should_update_to_largest_version_if_ga_versioning_disabled(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): with patch.object(conf, "get_enable_ga_versioning", return_value=False): with self.assertRaises(AgentUpgradeExitException) as context: - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_update_discovered_from_agent_manifest(mock_telemetry, version="99999.0.0.0") + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) - def test_it_should_update_to_largest_version_if_time_window_not_elapsed(self): + def test_it_should_not_update_to_largest_version_if_time_window_not_elapsed(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + with self._get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") @@ -134,68 +143,50 @@ def test_it_should_update_to_largest_version_if_time_window_elapsed(self): data_file = DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_uris.xml" - with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=0.001): - with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=0.001): - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=0.001): + with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=0.001): + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): with self.assertRaises(AgentUpgradeExitException) as context: - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") agent_update_handler._protocol.mock_wire_data.set_ga_manifest("wire/ga_manifest.xml") agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_update_discovered_from_agent_manifest(mock_telemetry, inc=2, version="99999.0.0.0") + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) def test_it_should_not_allow_update_if_largest_version_below_current_version(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() data_file["ga_manifest"] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) - - def test_it_should_not_agent_update_if_last_attempted_update_time_not_elapsed(self): - self.prepare_agents(count=1) - data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" - version = "5.2.0.1" - with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) - agent_update_handler._protocol.mock_wire_data.set_incarnation(2) - agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) - self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) - # Now we shouldn't check for download if update not allowed.This run should not add new logs - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) - self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) - def test_it_should_update_to_largest_version_if_requested_version_not_available(self): + def test_it_should_update_to_largest_version_if_rsm_version_not_available(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() data_file['ext_conf'] = "wire/ext_conf.xml" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): with self.assertRaises(AgentUpgradeExitException) as context: - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version="99999.0.0.0") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) - self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_update_discovered_from_agent_manifest(mock_telemetry, version="99999.0.0.0") + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) def test_it_should_not_download_manifest_again_if_last_attempted_download_time_not_elapsed(self): self.prepare_agents(count=1) data_file = DATA_FILE.copy() data_file['ext_conf'] = "wire/ext_conf.xml" - with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=10, protocol_get_error=True) as (agent_update_handler, _): + with self._get_agent_update_handler(test_data=data_file, autoupdate_frequency=10, protocol_get_error=True) as (agent_update_handler, _): # making multiple agent update attempts - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) mock_wire_data = agent_update_handler._protocol.mock_wire_data self.assertEqual(1, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should not be downloaded again") @@ -205,53 +196,53 @@ def test_it_should_download_manifest_if_last_attempted_download_time_is_elapsed( data_file = DATA_FILE.copy() data_file['ext_conf'] = "wire/ext_conf.xml" - with self.__get_agent_update_handler(test_data=data_file, autoupdate_frequency=0.00001, protocol_get_error=True) as (agent_update_handler, _): + with self._get_agent_update_handler(test_data=data_file, autoupdate_frequency=0.00001, protocol_get_error=True) as (agent_update_handler, _): # making multiple agent update attempts - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) mock_wire_data = agent_update_handler._protocol.mock_wire_data self.assertEqual(3, mock_wire_data.call_counts['manifest_of_ga.xml'], "Agent manifest should be downloaded in all attempts") - def test_it_should_not_agent_update_if_requested_version_is_same_as_current_version(self): + def test_it_should_not_agent_update_if_rsm_version_is_same_as_current_version(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family( str(CURRENT_VERSION)) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "requesting a new agent version" in kwarg['message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "requested version should be same as current version") + 'op'] == WALAEventOperation.AgentUpgrade]), "rsm version should be same as current version") self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_upgrade_agent_if_requested_version_is_available_greater_than_current_version(self): + def test_it_should_upgrade_agent_if_rsm_version_is_available_greater_than_current_version(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): with self.assertRaises(AgentUpgradeExitException) as context: - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, version="9.9.9.10") - self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) - self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, version="9.9.9.10") + self._assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) - def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_current_version(self): + def test_it_should_downgrade_agent_if_rsm_version_is_available_less_than_current_version(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() @@ -259,20 +250,38 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c downgraded_version = "2.5.0" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family(downgraded_version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() with self.assertRaises(AgentUpgradeExitException) as context: - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) - self.__assert_agent_directories_exist_and_others_dont_exist( + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, inc=2, version=downgraded_version) + self._assert_agent_directories_exist_and_others_dont_exist( versions=[downgraded_version, str(CURRENT_VERSION)]) - self.assertIn("Agent update found, exiting current process", ustr(context.exception.reason)) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) + + def test_it_should_not_do_rsm_update_if_gs_not_updated_in_next_attempt(self): + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + version = "5.2.0.1" + with self._get_agent_update_handler(test_data=data_file, autoupdate_frequency=10) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family(version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, inc=2, version=version) + self._assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + # Now we shouldn't check for download if update not allowed(GS not updated).This run should not add new logs + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), False) + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, inc=2, version=version) + self._assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) def test_it_should_not_downgrade_below_daemon_version(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() @@ -280,21 +289,45 @@ def test_it_should_not_downgrade_below_daemon_version(self): downgraded_version = "1.2.0" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family(downgraded_version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)), "New agent directory should not be found") - self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - "The Agent received a request to downgrade to version" in kwarg[ - 'message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version") - def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): + def test_it_should_update_to_largest_version_if_vm_not_enabled_for_rsm_upgrades(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml" + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_update_discovered_from_agent_manifest(mock_telemetry, version="99999.0.0.0") + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) + + def test_it_should_not_update_to_version_if_version_not_from_rsm(self): + self.prepare_agents(count=1) data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_version_not_from_rsm.xml" + downgraded_version = "2.5.0" + + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family(downgraded_version) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_directories_exist_and_others_dont_exist( + versions=[str(CURRENT_VERSION)]) + self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)), + "New agent directory should not be found") + + def test_handles_if_rsm_version_not_found_in_pkgs_to_download(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() @@ -302,17 +335,17 @@ def test_handles_if_requested_version_not_found_in_pkgs_to_download(self): version = "5.2.0.4" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(version) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family(version) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) - self.__assert_agent_requested_version_in_goal_state(mock_telemetry, inc=2, version=version) + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, inc=2, version=version) self.assertFalse(os.path.exists(self.agent_dir(version)), "New agent directory should not be found") - self.__assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) + self._assert_no_agent_package_telemetry_emitted(mock_telemetry, version=version) def test_handles_missing_agent_family(self): data_file = DATA_FILE.copy() @@ -322,8 +355,8 @@ def test_handles_missing_agent_family(self): self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") @@ -335,15 +368,14 @@ def test_handles_missing_agent_family(self): def test_it_should_report_update_status_with_success(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): - GAUpdateReportState.report_error_msg = "" - agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version( + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family( str(CURRENT_VERSION)) agent_update_handler._protocol.mock_wire_data.set_incarnation(2) agent_update_handler._protocol.client.update_goal_state() - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) self.assertEqual(0, vm_agent_update_status.code) @@ -351,28 +383,26 @@ def test_it_should_report_update_status_with_success(self): def test_it_should_report_update_status_with_error_on_download_fail(self): data_file = DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" - with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): - GAUpdateReportState.report_error_msg = "" - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + with self._get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) self.assertEqual(1, vm_agent_update_status.code) self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) - self.assertIn("Unable to download Agent", vm_agent_update_status.message) + self.assertIn("Downloaded agent version is in bad state", vm_agent_update_status.message) - def test_it_should_report_update_status_with_missing_requested_version_error(self): + def test_it_should_report_update_status_with_missing_rsm_version_error(self): data_file = DATA_FILE.copy() - data_file['ext_conf'] = "wire/ext_conf.xml" + data_file['ext_conf'] = "wire/ext_conf_version_missing_in_agent_family.xml" - with self.__get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): - GAUpdateReportState.report_error_msg = "" - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + with self._get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) vm_agent_update_status = agent_update_handler.get_vmagent_update_status() self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) self.assertEqual(1, vm_agent_update_status.code) - self.assertIn("Missing requested version", vm_agent_update_status.message) + self.assertIn("missing version property. So, skipping agent update", vm_agent_update_status.message) def test_it_should_not_log_same_error_next_hours(self): data_file = DATA_FILE.copy() @@ -382,8 +412,8 @@ def test_it_should_not_log_same_error_next_hours(self): self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") - with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") @@ -393,9 +423,29 @@ def test_it_should_not_log_same_error_next_hours(self): 'message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") - agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "No manifest links found for agent family" in kwarg[ 'message'] and kwarg[ - 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") \ No newline at end of file + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + def test_it_should_save_rsm_state_of_the_most_recent_goal_state(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + with self.assertRaises(AgentUpgradeExitException): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + + state_file = os.path.join(conf.get_lib_dir(), "rsm_update.json") + self.assertTrue(os.path.exists(state_file), "The rsm state file was not saved (can't find {0})".format(state_file)) + + # check if state gets updated if most recent goal state has different values + agent_update_handler._protocol.mock_wire_data.set_extension_config_is_vm_enabled_for_rsm_upgrades("False") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + with self.assertRaises(AgentUpgradeExitException): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + + self.assertFalse(os.path.exists(state_file), "The rsm file should be removed (file: {0})".format(state_file)) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 8bac67746c..286dfb0b70 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -21,7 +21,7 @@ from datetime import datetime, timedelta from threading import current_thread from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ - AGENT_ERROR_FILE, GAUpdateReportState + AGENT_ERROR_FILE from tests.common.osutil.test_default import TestOSUtil import azurelinuxagent.common.osutil.default as osutil @@ -1268,12 +1268,11 @@ def put_handler(url, *args, **_): protocol.aggregate_status = json.loads(args[0]) return MockHttpResponse(status=201) - def update_goal_state_and_run_handler(autoupdate_enabled = True): + def update_goal_state_and_run_handler(autoupdate_enabled=True): protocol.incarnation += 1 protocol.mock_wire_data.set_incarnation(protocol.incarnation) self._add_write_permission_to_goal_state_files() with _get_update_handler(iterations=1, protocol=protocol, autoupdate_enabled=autoupdate_enabled) as (update_handler, _): - GAUpdateReportState.report_error_msg = "" update_handler.run(debug=True) self.assertEqual(0, update_handler.get_exit_code(), "Exit code should be 0; List of all warnings logged by the agent: {0}".format( @@ -1281,20 +1280,19 @@ def update_goal_state_and_run_handler(autoupdate_enabled = True): protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) - # Case 1: Requested version removed in GS; report missing requested version errr - protocol.mock_wire_data.set_extension_config("wire/ext_conf.xml") - protocol.mock_wire_data.reload() + # Case 1: rsm version missing in GS when vm opt-in for rsm upgrades; report missing rsm version error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_version_missing_in_agent_family.xml") update_goal_state_and_run_handler() self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], "updateStatus should be reported") update_status = protocol.aggregate_status['aggregateStatus']['guestAgentStatus']["updateStatus"] self.assertEqual(VMAgentUpdateStatuses.Error, update_status['status'], "Status should be an error") self.assertEqual(update_status['code'], 1, "incorrect code reported") - self.assertIn("Missing requested version", update_status['formattedMessage']['message'], "incorrect message reported") + self.assertIn("missing version property. So, skipping agent update", update_status['formattedMessage']['message'], "incorrect message reported") - # Case 2: Requested version in GS == Current Version; updateStatus should be Success - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + # Case 2: rsm version in GS == Current Version; updateStatus should be Success + protocol.mock_wire_data.set_extension_config("wire/ext_conf_rsm_version.xml") + protocol.mock_wire_data.set_version_in_agent_family(str(CURRENT_VERSION)) update_goal_state_and_run_handler() self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], "updateStatus should be reported if asked in GS") @@ -1303,9 +1301,9 @@ def update_goal_state_and_run_handler(autoupdate_enabled = True): self.assertEqual(update_status['expectedVersion'], str(CURRENT_VERSION), "incorrect version reported") self.assertEqual(update_status['code'], 0, "incorrect code reported") - # Case 3: Requested version in GS != Current Version; update fail and report error - protocol.mock_wire_data.set_extension_config("wire/ext_conf_requested_version.xml") - protocol.mock_wire_data.set_extension_config_requested_version("5.2.0.1") + # Case 3: rsm version in GS != Current Version; update fail and report error + protocol.mock_wire_data.set_extension_config("wire/ext_conf_rsm_version.xml") + protocol.mock_wire_data.set_version_in_agent_family("5.2.0.1") update_goal_state_and_run_handler() self.assertTrue("updateStatus" in protocol.aggregate_status['aggregateStatus']['guestAgentStatus'], "updateStatus should be in status blob. Warns: {0}".format(patch_warn.call_args_list)) @@ -1436,8 +1434,8 @@ def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequ # Disabling extension processing to speed up tests as this class deals with testing agent upgrades with patch("azurelinuxagent.common.conf.get_extensions_enabled", return_value=False): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): - with patch("azurelinuxagent.common.conf.get_hotfix_upgrade_frequency", return_value=hotfix_frequency): - with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency", return_value=normal_frequency): + with patch("azurelinuxagent.common.conf.get_self_update_hotfix_frequency", return_value=hotfix_frequency): + with patch("azurelinuxagent.common.conf.get_self_update_regular_frequency", return_value=normal_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): with patch("azurelinuxagent.common.conf.get_enable_ga_versioning", return_value=True): yield @@ -1480,7 +1478,7 @@ def __assert_exit_code_successful(self, update_handler): def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent update found, exiting current process to {0} to the new Agent version {1}'.format( + 'Agent completed all update checks, exiting current process to {0} to the new Agent version {1}'.format( "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), @@ -1507,7 +1505,7 @@ def __assert_ga_version_in_status(self, aggregate_status, version=str(CURRENT_VE def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" with self.__get_update_handler(test_data=data_file, iterations=10) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1516,16 +1514,17 @@ def test_it_should_upgrade_agent_on_process_start_if_auto_upgrade_enabled(self): self.__assert_agent_directories_available(versions=["9.9.9.10"]) self.__assert_upgrade_telemetry_emitted(mock_telemetry) - def test_it_should_not_update_agent_if_last_update_time_not_permitted(self): + def test_it_should_not_update_agent_with_rsm_if_gs_not_updated_in_next_attempts(self): no_of_iterations = 10 data_file = DATA_FILE.copy() - data_file['ext_conf'] = "wire/ext_conf_requested_version.xml" + data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml" self.prepare_agents(1) test_frequency = 10 with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, autoupdate_frequency=test_frequency) as (update_handler, _): - update_handler._protocol.mock_wire_data.set_ga_manifest_version_version("5.2.0.1") + # Given version which will fail on first attempt, then rsm shouldn't make any futher attempts since GS is not updated + update_handler._protocol.mock_wire_data.set_version_in_agent_family("5.2.1.0") update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1533,6 +1532,8 @@ def test_it_should_not_update_agent_if_last_update_time_not_permitted(self): self.assertEqual(no_of_iterations, update_handler.get_iterations(), "Update handler should've run its course") self.assertFalse(os.path.exists(self.agent_dir("5.2.0.1")), "New agent directory should not be found") + self.assertGreaterEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, + "only 1 agent manifest call should've been made - 1 per incarnation") def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): with self.__get_update_handler(iterations=10) as (update_handler, _): @@ -1544,9 +1545,9 @@ def test_it_should_not_auto_upgrade_if_auto_update_disabled(self): self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_download_only_requested_version_if_available(self): + def test_it_should_download_only_rsm_version_if_available(self): data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1556,7 +1557,7 @@ def test_it_should_download_only_requested_version_if_available(self): def test_it_should_download_largest_version_if_ga_versioning_disabled(self): data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): with patch.object(conf, "get_enable_ga_versioning", return_value=False): update_handler.run(debug=True) @@ -1565,9 +1566,9 @@ def test_it_should_download_largest_version_if_ga_versioning_disabled(self): self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0"]) - def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self): + def test_it_should_cleanup_all_agents_except_rsm_version_and_current_version(self): data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() @@ -1580,10 +1581,10 @@ def test_it_should_cleanup_all_agents_except_requested_version_and_current_versi self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) - def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): + def test_it_should_not_update_if_rsm_version_not_found_in_manifest(self): self.prepare_agents(1) data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version_missing_in_manifest.xml" + data_file["ext_conf"] = "wire/ext_conf_version_missing_in_manifest.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): update_handler.run(debug=True) @@ -1592,18 +1593,18 @@ def test_it_should_not_update_if_requested_version_not_found_in_manifest(self): agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)] # This will throw if corresponding message not found so not asserting on that - requested_version_found = next(kwarg for kwarg in agent_msgs if - "discovered new agent version:5.2.1.0 in agent manifest for goal state incarnation_1, will update the agent before processing the goal state" in kwarg['message']) - self.assertTrue(requested_version_found['is_success'], - "The requested version found op should be reported as a success") + rsm_version_found = next(kwarg for kwarg in agent_msgs if + "New agent version:5.2.1.0 requested by RSM in Goal state incarnation_1, will update the agent before processing the goal state" in kwarg['message']) + self.assertTrue(rsm_version_found['is_success'], + "The rsm version found op should be reported as a success") skipping_update = next(kwarg for kwarg in agent_msgs if - "No matching package found in the agent manifest for requested version: 5.2.1.0 in goal state incarnation: incarnation_1, skipping agent update" in kwarg['message']) + "No matching package found in the agent manifest for version: 5.2.1.0 in goal state incarnation: incarnation_1, skipping agent update" in kwarg['message']) self.assertEqual(skipping_update['version'], str(CURRENT_VERSION), "The not found message should be reported from current agent version") self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure") - def test_it_should_try_downloading_requested_version_on_new_incarnation(self): + def test_it_should_try_downloading_rsm_version_on_new_incarnation(self): no_of_iterations = 1000 # Set the test environment by adding 20 random agents to the agent directory @@ -1620,8 +1621,8 @@ def reload_conf(url, protocol): # Ensure we didn't try to download any agents except during the incarnation change self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) - # Update the requested version to "99999.0.0.0" - update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0") + # Update the rsm version to "99999.0.0.0" + update_handler._protocol.mock_wire_data.set_version_in_agent_family("99999.0.0.0") reload_conf.call_count += 1 self._add_write_permission_to_goal_state_files() reload_conf.incarnation += 1 @@ -1631,9 +1632,9 @@ def reload_conf(url, protocol): reload_conf.incarnation = 2 data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_version_in_agent_family(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1646,7 +1647,7 @@ def reload_conf(url, protocol): self.assertGreaterEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1, "only 1 agent manifest call should've been made - 1 per incarnation") - def test_it_should_update_to_largest_version_if_requested_version_not_available(self): + def test_it_should_update_to_largest_version_if_rsm_version_not_available(self): no_of_iterations = 100 # Set the test environment by adding 20 random agents to the agent directory @@ -1661,12 +1662,12 @@ def reload_conf(url, protocol): "goalstate"] >= 5: reload_conf.call_count += 1 - # By this point, the GS with requested version should've been executed. Verify that + # By this point, the GS with rsm version should've been executed. Verify that self.__assert_agent_directories_available(versions=[str(CURRENT_VERSION)]) - # Update the ext-conf and incarnation and remove requested versions from GS, - # this should download all versions requested in config - mock_wire_data.data_files["ext_conf"] = "wire/ext_conf.xml" + # Update the ga_manifest and incarnation to send largest version manifest + # this should download largest version requested in config + mock_wire_data.data_files["ga_manifest"] = "wire/ga_manifest.xml" mock_wire_data.reload() self._add_write_permission_to_goal_state_files() reload_conf.incarnation += 1 @@ -1676,9 +1677,9 @@ def reload_conf(url, protocol): reload_conf.incarnation = 2 data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf.xml" + data_file["ga_manifest"] = "wire/ga_manifest_no_upgrade.xml" with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1766,16 +1767,16 @@ def reload_conf(url, protocol): self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) - def test_it_should_not_download_anything_if_requested_version_is_current_version(self): + def test_it_should_not_download_anything_if_rsm_version_is_current_version(self): data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" # Set the test environment by adding 20 random agents to the agent directory self.prepare_agents() self.assertEqual(20, self.agent_count(), "Agent directories not set properly") with self.__get_update_handler(test_data=data_file) as (update_handler, _): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_version_in_agent_family(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1783,7 +1784,7 @@ def test_it_should_not_download_anything_if_requested_version_is_current_version self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_skip_wait_to_update_if_requested_version_available(self): + def test_it_should_skip_wait_to_update_if_rsm_version_available(self): no_of_iterations = 100 def reload_conf(url, protocol): @@ -1796,8 +1797,8 @@ def reload_conf(url, protocol): # Assert GA version from status to ensure agent is running fine from the current version self.__assert_ga_version_in_status(protocol.aggregate_status) - # Update the ext-conf and incarnation and add requested version from GS - mock_wire_data.data_files["ext_conf"] = "wire/ext_conf_requested_version.xml" + # Update the ext-conf and incarnation and add rsm version from GS + mock_wire_data.data_files["ext_conf"] = "wire/ext_conf_rsm_version.xml" data_file['ga_manifest'] = "wire/ga_manifest.xml" mock_wire_data.reload() self._add_write_permission_to_goal_state_files() @@ -1814,7 +1815,7 @@ def reload_conf(url, protocol): self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") self.assertLess(update_handler.get_iterations(), no_of_iterations, - "The code should've exited as soon as requested version was found") + "The code should've exited as soon as rsm version was found") self.__assert_exit_code_successful(update_handler) self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="9.9.9.10") @@ -1827,9 +1828,9 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): downgraded_version = "2.5.0" data_file = wire_protocol_data.DATA_FILE.copy() - data_file["ext_conf"] = "wire/ext_conf_requested_version.xml" + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry): - update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version) + update_handler._protocol.mock_wire_data.set_version_in_agent_family(downgraded_version) update_handler._protocol.mock_wire_data.set_incarnation(2) update_handler.run(debug=True) @@ -1843,6 +1844,43 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self): "Invalid reason specified for blacklisting agent") self.__assert_agent_directories_exist_and_others_dont_exist(versions=[downgraded_version, str(CURRENT_VERSION)]) + def test_it_should_do_self_update_if_vm_opt_out_rsm_upgrades_later(self): + no_of_iterations = 100 + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + def reload_conf(url, protocol): + mock_wire_data = protocol.mock_wire_data + + # This function reloads the conf mid-run to mimic an actual customer scenario + if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts["goalstate"] >= 5: + reload_conf.call_count += 1 + + # Assert GA version from status to ensure agent is running fine from the current version + self.__assert_ga_version_in_status(protocol.aggregate_status) + + # Update is_vm_enabled_for_rsm_upgrades flag to False + update_handler._protocol.mock_wire_data.set_extension_config_is_vm_enabled_for_rsm_upgrades("False") + self._add_write_permission_to_goal_state_files() + mock_wire_data.set_incarnation(2) + + reload_conf.call_count = 0 + + data_file = wire_protocol_data.DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml" + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + update_handler._protocol.mock_wire_data.set_version_in_agent_family(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_incarnation(20) + update_handler.run(debug=True) + + self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated") + self.assertLess(update_handler.get_iterations(), no_of_iterations, + "The code should've exited as soon as version was found") + self.__assert_exit_code_successful(update_handler) + self.__assert_upgrade_telemetry_emitted(mock_telemetry, version="99999.0.0.0") + self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)]) + @patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler') @patch('azurelinuxagent.ga.update.get_send_telemetry_events_handler') @@ -2539,4 +2577,4 @@ def test_inequality_operator_should_return_false_on_items_with_same_value(self): if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/tests/lib/wire_protocol_data.py b/tests/lib/wire_protocol_data.py index 2bc18e34f1..9502a64133 100644 --- a/tests/lib/wire_protocol_data.py +++ b/tests/lib/wire_protocol_data.py @@ -463,8 +463,11 @@ def set_extension_config(self, ext_conf_file): def set_ga_manifest(self, ga_manifest): self.ga_manifest = load_data(ga_manifest) - def set_extension_config_requested_version(self, version): + def set_version_in_agent_family(self, version): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "Version", version) + def set_extension_config_is_vm_enabled_for_rsm_upgrades(self, is_vm_enabled_for_rsm_upgrades): + self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "IsVMEnabledForRSMUpgrades", is_vm_enabled_for_rsm_upgrades) + def set_ga_manifest_version_version(self, version): self.ga_manifest = WireProtocolData.replace_xml_element_value(self.ga_manifest, "Version", version) diff --git a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py index 016bcd8c62..c65047903a 100755 --- a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py +++ b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py @@ -35,8 +35,8 @@ def get_requested_version(gs: GoalState) -> str: raise Exception( u"No manifest links found for agent family Test, skipping agent update verification") manifest = agent_family_manifests[0] - if manifest.is_requested_version_specified and manifest.requested_version is not None: - return str(manifest.requested_version) + if manifest.is_requested_version_specified and manifest.version is not None: + return str(manifest.version) return "" From 2848dad7f49c8f21a434f04556b142391273c199 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:56:24 -0800 Subject: [PATCH 111/240] Run remote date command to get test case start time (#2993) * Run remote date command to get test case start time * Remove unused import --- .../tests/agent_ext_workflow/extension_workflow.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py index 98f14e2832..b5a377e726 100644 --- a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -19,7 +19,6 @@ from azure.mgmt.compute.models import VirtualMachineExtensionInstanceView from assertpy import assert_that, soft_assertions -from datetime import datetime from random import choice import uuid @@ -175,7 +174,7 @@ def run(self): log.info("*******Verifying the extension install scenario*******") # Record the time we start the test - start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + start_time = self._ssh_client.run_command("date '+%Y-%m-%dT%TZ'").rstrip() # Create DcrTestExtension with version 1.1.5 dcr_test_ext_id_1_1 = VmExtensionIdentifier( @@ -224,7 +223,7 @@ def run(self): log.info("*******Verifying the extension enable scenario*******") # Record the time we start the test - start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + start_time = self._ssh_client.run_command("date '+%Y-%m-%dT%TZ'").rstrip() # Enable test extension on the VM dcr_ext.modify_ext_settings_and_enable() @@ -281,7 +280,7 @@ def run(self): log.info("*******Verifying the extension uninstall scenario*******") # Record the time we start the test - start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + start_time = self._ssh_client.run_command("date '+%Y-%m-%dT%TZ'").rstrip() # Remove the test extension on the VM log.info("Delete %s from VM", dcr_test_ext_client) @@ -306,7 +305,7 @@ def run(self): log.info("*******Verifying the extension update with install scenario*******") # Record the time we start the test - start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + start_time = self._ssh_client.run_command("date '+%Y-%m-%dT%TZ'").rstrip() # Version 1.2.0 of the test extension has the same functionalities as 1.1.5 with # "updateMode": "UpdateWithInstall" in HandlerManifest.json to test update case @@ -373,7 +372,7 @@ def run(self): log.info("*******Verifying the extension update without install scenario*******") # Record the time we start the test - start_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + start_time = self._ssh_client.run_command("date '+%Y-%m-%dT%TZ'").rstrip() # Version 1.3.0 of the test extension has the same functionalities as 1.1.5 with # "updateMode": "UpdateWithoutInstall" in HandlerManifest.json to test update case From e74677b3f2a7559cfd132f2f0390d76640050a71 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 22 Dec 2023 09:13:53 -0800 Subject: [PATCH 112/240] ext_sequencing scenario: get enable time from extension status files (#2992) * Get enable time from extension status files * Check for empty array * add status example in comments --- .../tests/ext_sequencing/ext_sequencing.py | 7 +- .../ext_sequencing-get_ext_enable_time.py | 92 ++++++++++--------- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 3af9e64fe6..042ae8a99f 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -95,11 +95,14 @@ def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensi for ext in extensions: # Only check extensions which succeeded provisioning if "succeeded" in ext.statuses_summary[0].code: - enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext_type '{extension_full_names[ext.name]}' --start_time '{str(test_case_start)}'", use_sudo=True) + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext '{extension_full_names[ext.name]}'", use_sudo=True) + formatted_time = datetime.strptime(enabled_time.strip(), u'%Y-%m-%dT%H:%M:%SZ') + if formatted_time < test_case_start: + fail("Extension {0} was not enabled".format(extension_full_names[ext.name])) enabled_times.append( { "name": ext.name, - "enabled_time": datetime.strptime(enabled_time.strip(), u'%Y-%m-%d %H:%M:%S') + "enabled_time": formatted_time } ) diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py index b9b2c66cb5..f65da676be 100755 --- a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -20,10 +20,10 @@ # import argparse -import re +import json +import os import sys -from datetime import datetime from pathlib import Path @@ -32,55 +32,57 @@ def main(): Returns the timestamp of when the provided extension was enabled """ parser = argparse.ArgumentParser() - parser.add_argument("--ext_type", dest='ext_type', required=True) - parser.add_argument("--start_time", dest='start_time', required=True) + parser.add_argument("--ext", dest='ext', required=True) args, _ = parser.parse_known_args() - # Extension enabled time is in extension CommandExecution.log - command_exec_log_path = Path('/var/log/azure/' + args.ext_type + '/CommandExecution.log') - command_exec_log = open(command_exec_log_path, 'r') - enabled_match = None - for line in command_exec_log.readlines(): - line = line.rstrip() - if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": - # AMA logs enable succeeded and its timestamp to the command execution log: - # 2023-11-01T23:22:53.124603Z INFO ExtHandler [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.11] Command: ./shim.sh -enable - # [stdout] - # 2023/09/26 04:07:33 [Microsoft.Azure.Monitor.AzureMonitorLinuxAgent-1.28.5] Enable,success,0,Enable succeeded - enable_pattern = r'.*(?P\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}) \[Microsoft\.Azure\.Monitor\.AzureMonitorLinuxAgent\-.*] .*Enable succeeded.*' - match = re.match(enable_pattern, line) - if match: - enabled_match = match - else: - # For RC and CSE, we can determine when enable succeeded from the stdout of the enable command execution from - # the command execution log: - # 2023-09-26T04:07:39.042948Z INFO ExtHandler [Microsoft.CPlat.Core.RunCommandLinux-1.0.5] Command: bin/run-command-shim enable - # [stdout] - # ... - # time=2023-09-26T04:07:37Z version=v1.0.4/git@b3be41d-dirty operation=enable seq=0 event=enabledevent=enabled - enable_pattern = r'time=(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z).*event=enabled' - match = re.match(enable_pattern, line) - if match: - enabled_match = match - - if not enabled_match: - # Try to get enabled time from extension command execution logs - print("Agent log does not show extension was enabled", file=sys.stderr) + # Extension enabled time is in extension extension status file + ext_dirs = [item for item in os.listdir(Path('/var/lib/waagent')) if item.startswith(args.ext)] + if not ext_dirs: + print("Extension {0} directory does not exist".format(args.ext), file=sys.stderr) sys.exit(1) - - if args.ext_type == "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": - enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y/%m/%d %H:%M:%S') - else: - enable_time = datetime.strptime(enabled_match.group('timestamp'), u'%Y-%m-%dT%H:%M:%SZ') - - start_time = datetime.strptime(args.start_time, u'%Y-%m-%d %H:%M:%S.%f') - if enable_time < start_time: - print("Agent log does not show extension was enabled after this test case started. Last enabled time was {0}. This test case started at {1}".format(enable_time, start_time), file=sys.stderr) + ext_status_path = Path('/var/lib/waagent/' + ext_dirs[0] + '/status') + ext_status_files = os.listdir(ext_status_path) + ext_status_files.sort() + if not ext_status_files: + # Extension did not report a status + print("Extension {0} did not report a status".format(args.ext), file=sys.stderr) sys.exit(1) + latest_ext_status_path = os.path.join(ext_status_path, ext_status_files[-1]) + ext_status_file = open(latest_ext_status_path, 'r') + ext_status = json.loads(ext_status_file.read()) + + # Example status file + # [ + # { + # "status": { + # "status": "success", + # "formattedMessage": { + # "lang": "en-US", + # "message": "Enable succeeded" + # }, + # "operation": "Enable", + # "code": "0", + # "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent" + # }, + # "version": "1.0", + # "timestampUTC": "2023-12-12T23:14:45Z" + # } + # ] + msg = "" + if len(ext_status) == 0 or not ext_status[0]['status']: + msg = "Extension {0} did not report a status".format(args.ext) + elif not ext_status[0]['status']['operation'] or ext_status[0]['status']['operation'] != 'Enable': + msg = "Extension {0} did not report a status for enable operation".format(args.ext) + elif ext_status[0]['status']['status'] != 'success': + msg = "Extension {0} did not report success for the enable operation".format(args.ext) + elif not ext_status[0]['timestampUTC']: + msg = "Extension {0} did not report the time the enable operation succeeded".format(args.ext) else: - print(enable_time) + print(ext_status[0]['timestampUTC']) + sys.exit(0) - sys.exit(0) + print(msg, file=sys.stderr) + sys.exit(1) if __name__ == "__main__": From 847bb08f6079c53458b5059899b701bd506a46fc Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 27 Dec 2023 09:39:59 -0800 Subject: [PATCH 113/240] ssh connection retry on restarts (#3001) --- tests_e2e/tests/lib/virtual_machine_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index bc38b1b35a..5b1f18aff3 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -174,7 +174,7 @@ def restart( return log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time) except CommandError as e: - if (e.exit_code == 255 and "Connection refused" in str(e)) or "Unprivileged users are not permitted to log in yet" in str(e): + if (e.exit_code == 255 and ("Connection refused" in str(e) or "Connection timed out" in str(e))) or "Unprivileged users are not permitted to log in yet" in str(e): log.info("VM %s is not yet accepting SSH connections", self) else: raise From 284fbd5ac28041af0b1e8ba51915a6accf9b371f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:35:23 -0800 Subject: [PATCH 114/240] Add e2e test scenario for hostname monitoring (#3003) * Validate hostname is published * Run on distro without known issues * Add comment about debugging network down * Create e2e scenario for hostname monitoring * Remove unused import * Increase timeout for hostname change * Add password to VM and check for agent status if ssh fails * run scenario on all endorsed distros * Use getdistro() to check distro * Add comment to get_distro * Add publish_hostname to runbook * Make get_distro.py executable * Address first round of PR comments * Do not enable hostname monitoring on distros where it is disabled * Skip test on ubuntu * Update get-waagent-conf-value to remove unused variable --- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/publish_hostname.yml | 8 + tests_e2e/tests/lib/virtual_machine_client.py | 12 + .../publish_hostname/publish_hostname.py | 207 ++++++++++++++++++ .../tests/scripts/get-waagent-conf-value | 41 ++++ tests_e2e/tests/scripts/get_distro.py | 35 +++ 6 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 tests_e2e/test_suites/publish_hostname.yml create mode 100644 tests_e2e/tests/publish_hostname/publish_hostname.py create mode 100755 tests_e2e/tests/scripts/get-waagent-conf-value create mode 100755 tests_e2e/tests/scripts/get_distro.py diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 5bc48a5dfb..3e5929f359 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/publish_hostname.yml b/tests_e2e/test_suites/publish_hostname.yml new file mode 100644 index 0000000000..09864a4d66 --- /dev/null +++ b/tests_e2e/test_suites/publish_hostname.yml @@ -0,0 +1,8 @@ +# +# Changes hostname and checks that the agent published the updated hostname to dns. +# +name: "PublishHostname" +tests: + - "publish_hostname/publish_hostname.py" +images: + - "endorsed" \ No newline at end of file diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index 5b1f18aff3..5d6e471b9c 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -65,6 +65,18 @@ def get_ip_address(self) -> str: public_ip_address_name=nic.ip_configurations[0].public_ip_address.id.split('/')[-1]) # the name of the ip address is the last component of the id return public_ip.ip_address + def get_private_ip_address(self) -> str: + """ + Retrieves the private IP address of the virtual machine + """ + vm_model = self.get_model() + nic: NetworkInterface = self._network_client.network_interfaces.get( + resource_group_name=self.resource_group, + network_interface_name=vm_model.network_profile.network_interfaces[0].id.split('/')[ + -1]) # the name of the interface is the last component of the id + private_ip = nic.ip_configurations[0].private_ip_address + return private_ip + def get_model(self) -> VirtualMachine: """ Retrieves the model of the virtual machine. diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py new file mode 100644 index 0000000000..cae1a2383e --- /dev/null +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test updates the hostname and checks that the agent published the hostname to DNS. It also checks that the +# primary network is up after publishing the hostname. This test was added in response to a bug in publishing the +# hostname on fedora distros, where there was a race condition between NetworkManager restart and Network Interface +# restart which caused the primary interface to go down. +# + +import datetime +import re + +from assertpy import fail +from time import sleep + +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.agent_test import AgentVmTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log + + +class PublishHostname(AgentVmTest): + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._context = context + self._ssh_client = context.create_ssh_client() + self._private_ip = context.vm.get_private_ip_address() + self._vm_password = "" + + def add_vm_password(self): + # Add password to VM to help with debugging in case of failure + # REMOVE PWD FROM LOGS IF WE EVER MAKE THESE RUNS/LOGS PUBLIC + username = self._ssh_client.username + pwd = self._ssh_client.run_command("openssl rand -base64 32 | tr : .").rstrip() + self._vm_password = pwd + log.info("VM Username: {0}; VM Password: {1}".format(username, pwd)) + self._ssh_client.run_command("echo '{0}:{1}' | sudo -S chpasswd".format(username, pwd)) + + def check_and_install_dns_tools(self): + lookup_cmd = "dig -x {0}".format(self._private_ip) + dns_regex = r"[\S\s]*;; ANSWER SECTION:\s.*PTR\s*(?P.*).internal.cloudapp.net.[\S\s]*" + + # Not all distros come with dig. Install dig if not on machine + try: + self._ssh_client.run_command("dig -v") + except CommandError as e: + if "dig: command not found" in e.stderr: + distro = self._ssh_client.run_command("get_distro.py").rstrip().lower() + if "debian_9" in distro: + # Debian 9 hostname look up needs to be done with "host" instead of dig + lookup_cmd = "host {0}".format(self._private_ip) + dns_regex = r".*pointer\s(?P.*).internal.cloudapp.net." + elif "debian" in distro: + self._ssh_client.run_command("apt install -y dnsutils", use_sudo=True) + elif "alma" in distro or "rocky" in distro: + self._ssh_client.run_command("dnf install -y bind-utils", use_sudo=True) + else: + raise + else: + raise + + return lookup_cmd, dns_regex + + def check_agent_reports_status(self): + status_updated = False + last_agent_status_time = self._context.vm.get_instance_view().vm_agent.statuses[0].time + log.info("Agent reported status at {0}".format(last_agent_status_time)) + retries = 3 + + while retries > 0 and not status_updated: + agent_status_time = self._context.vm.get_instance_view().vm_agent.statuses[0].time + if agent_status_time != last_agent_status_time: + status_updated = True + log.info("Agent reported status at {0}".format(last_agent_status_time)) + else: + retries -= 1 + sleep(60) + + if not status_updated: + fail("Agent hasn't reported status since {0} and ssh connection failed. Use the serial console in portal " + "to check the contents of '/sys/class/net/eth0/operstate'. If the contents of this file are 'up', " + "no further action is needed. If contents are 'down', that indicates the network interface is down " + "and more debugging needs to be done to confirm this is not caused by the agent.\n VM: {1}\n RG: {2}" + "\nSubscriptionId: {3}\nUsername: {4}\nPassword: {5}".format(last_agent_status_time, + self._context.vm, + self._context.vm.resource_group, + self._context.vm.subscription, + self._context.username, + self._vm_password)) + + def retry_ssh_if_connection_reset(self, command: str, use_sudo=False): + # The agent may bring the network down and back up to publish the hostname, which can reset the ssh connection. + # Adding retry here for connection reset. + retries = 3 + while retries > 0: + try: + return self._ssh_client.run_command(command, use_sudo=use_sudo) + except CommandError as e: + retries -= 1 + retryable = e.exit_code == 255 and "Connection reset by peer" in e.stderr + if not retryable or retries == 0: + raise + log.warning("The SSH operation failed, retrying in 30 secs") + sleep(30) + + def run(self): + # TODO: Investigate why hostname is not being published on Ubuntu as expected + if "ubuntu" in self._ssh_client.run_command("get_distro.py").lower(): + raise TestSkipped("Known issue with hostname publishing on ubuntu. Will skip test until we continue " + "investigation.") + + # Add password to VM and log. This allows us to debug with serial console if necessary + self.add_vm_password() + + # This test looks up what hostname is published to dns. Check that the tools necessary to get hostname are + # installed, and if not install them. + lookup_cmd, dns_regex = self.check_and_install_dns_tools() + + # Check if this distro monitors hostname changes. If it does, we should check that the agent detects the change + # and publishes the host name. If it doesn't, we should check that the hostname is automatically published. + monitors_hostname = self._ssh_client.run_command("get-waagent-conf-value Provisioning.MonitorHostName", use_sudo=True).rstrip().lower() + + hostname_change_ctr = 0 + # Update the hostname 3 times + while hostname_change_ctr < 3: + try: + hostname = "hostname-monitor-{0}".format(hostname_change_ctr) + log.info("Update hostname to {0}".format(hostname)) + self.retry_ssh_if_connection_reset("hostnamectl set-hostname {0}".format(hostname), use_sudo=True) + + # Wait for the agent to detect the hostname change for up to 2 minutes if hostname monitoring is enabled + if monitors_hostname == "y" or monitors_hostname == "yes": + log.info("Agent hostname monitoring is enabled") + timeout = datetime.datetime.now() + datetime.timedelta(minutes=2) + hostname_detected = "" + while datetime.datetime.now() <= timeout: + try: + hostname_detected = self.retry_ssh_if_connection_reset("grep -n 'Detected hostname change:.*-> {0}' /var/log/waagent.log".format(hostname), use_sudo=True) + if hostname_detected: + log.info("Agent detected hostname change: {0}".format(hostname_detected)) + break + except CommandError as e: + # Exit code 1 indicates grep did not find a match. Sleep if exit code is 1, otherwise raise. + if e.exit_code != 1: + raise + sleep(15) + + if not hostname_detected: + fail("Agent did not detect hostname change: {0}".format(hostname)) + else: + log.info("Agent hostname monitoring is disabled") + + # Check that the expected hostname is published with 4 minute timeout + timeout = datetime.datetime.now() + datetime.timedelta(minutes=4) + published_hostname = "" + while datetime.datetime.now() <= timeout: + try: + dns_info = self.retry_ssh_if_connection_reset(lookup_cmd) + actual_hostname = re.match(dns_regex, dns_info) + if actual_hostname: + # Compare published hostname to expected hostname + published_hostname = actual_hostname.group('hostname') + if hostname == published_hostname: + log.info("SUCCESS Hostname {0} was published successfully".format(hostname)) + break + else: + log.info("Unable to parse the dns info: {0}".format(dns_info)) + except CommandError as e: + if "NXDOMAIN" in e.stdout: + log.info("DNS Lookup could not find domain. Will try again.") + else: + raise + sleep(30) + + if published_hostname == "" or published_hostname != hostname: + fail("Hostname {0} was not published successfully. Actual host name is: {1}".format(hostname, published_hostname)) + + hostname_change_ctr += 1 + + except CommandError as e: + # If failure is ssh issue, we should confirm that the VM did not lose network connectivity due to the + # agent's operations on the network. If agent reports status after this failure, then we know the + # network is up. + if e.exit_code == 255 and ("Connection timed out" in e.stderr or "Connection refused" in e.stderr): + self.check_agent_reports_status() + raise + + +if __name__ == "__main__": + PublishHostname.run_from_command_line() diff --git a/tests_e2e/tests/scripts/get-waagent-conf-value b/tests_e2e/tests/scripts/get-waagent-conf-value new file mode 100755 index 0000000000..663ca18119 --- /dev/null +++ b/tests_e2e/tests/scripts/get-waagent-conf-value @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Echos the value in waagent.conf for the specified setting if it exists. +# + +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: get-waagent-conf-value " + exit 1 +fi + +PYTHON=$(get-agent-python) +waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') + +cat $waagent_conf | while read line +do + if [[ $line == $1* ]]; then + IFS='=' read -a values <<< "$line" + echo ${values[1]} + exit 0 + fi +done diff --git a/tests_e2e/tests/scripts/get_distro.py b/tests_e2e/tests/scripts/get_distro.py new file mode 100755 index 0000000000..e9151f6531 --- /dev/null +++ b/tests_e2e/tests/scripts/get_distro.py @@ -0,0 +1,35 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prints the distro and version of the machine +# + +import sys + +from azurelinuxagent.common.version import get_distro + + +def main(): + # Prints '_' + distro = get_distro() + print(distro[0] + "_" + distro[1].replace('.', '')) + sys.exit(0) + + +if __name__ == "__main__": + main() From 5dcb2e14764cc3a21162deb2197f43d7f6cce5fd Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:40:10 -0800 Subject: [PATCH 115/240] AMA is not supported on cbl-mariner 1.0 (#3002) * Cbl-mariner 1.0 is not supported by AMA * Use get distro to check distro * Add comment to get_distro --- tests_e2e/tests/agent_bvt/vm_access.py | 2 +- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 2 +- .../tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py | 2 +- tests_e2e/tests/lib/vm_extension_identifier.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests_e2e/tests/agent_bvt/vm_access.py b/tests_e2e/tests/agent_bvt/vm_access.py index c36aef132b..1c231809ff 100755 --- a/tests_e2e/tests/agent_bvt/vm_access.py +++ b/tests_e2e/tests/agent_bvt/vm_access.py @@ -39,7 +39,7 @@ class VmAccessBvt(AgentVmTest): def run(self): ssh_client: SshClient = self._context.create_ssh_client() - if not VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): + if not VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("get_distro.py").rstrip()): raise TestSkipped("Currently VMAccess is not supported on this distro") # Try to use a unique username for each test run (note that we truncate to 32 chars to diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 042ae8a99f..e50b0d6abc 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -159,7 +159,7 @@ def run(self): for instance in instances_ip_address: ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) - if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("uname -a")): + if not VmExtensionIds.AzureMonitorLinuxAgent.supports_distro(next(iter(ssh_clients.values())).run_command("get_distro.py").rstrip()): raise TestSkipped("Currently AzureMonitorLinuxAgent is not supported on this distro") # This is the base ARM template that's used for deploying extensions for this scenario diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 809c32f592..01a687f634 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -42,7 +42,7 @@ def run(self): # Extensions we will create events for extensions = ["Microsoft.Azure.Extensions.CustomScript"] - if VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("uname -a")): + if VmExtensionIds.VmAccess.supports_distro(ssh_client.run_command("get_distro.py").rstrip()): extensions.append("Microsoft.OSTCExtensions.VMAccessForLinux") # Set the etp collection period to 30 seconds instead of default 5 minutes diff --git a/tests_e2e/tests/lib/vm_extension_identifier.py b/tests_e2e/tests/lib/vm_extension_identifier.py index fa304cb766..afbee1e245 100644 --- a/tests_e2e/tests/lib/vm_extension_identifier.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -34,7 +34,7 @@ def __init__(self, publisher: str, ext_type: str, version: str): unsupported_distros: Dict[str, List[str]] = { "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"], - "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar"] + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar", "mariner_1"] } def supports_distro(self, system_info: str) -> bool: From 0c03cb1e520b87c51f819a48b02d71be4068ca8f Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 2 Jan 2024 10:03:54 -0800 Subject: [PATCH 116/240] log update time for self updater (#3004) * add update time log * log new agent update time * fix tests --- azurelinuxagent/ga/self_update_version_updater.py | 12 +++++++++--- tests/ga/test_agent_update_handler.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index 2b04fd5c22..2dc87bf857 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -71,7 +71,7 @@ def _is_new_agent_allowed_update(self): """ This method ensure that update is allowed only once per (hotfix/Regular) upgrade frequency """ - now = datetime.datetime.now() + now = datetime.datetime.utcnow() upgrade_type = self._get_agent_upgrade_type(self._version) if upgrade_type == SelfUpdateType.Hotfix: next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, @@ -80,6 +80,12 @@ def _is_new_agent_allowed_update(self): next_update_time = self._get_next_process_time(self._last_attempted_self_update_time, conf.get_self_update_regular_frequency(), now) + if self._version > CURRENT_VERSION: + message = "Self-update discovered new {0} upgrade WALinuxAgent-{1}; Will upgrade on or after {2}".format( + upgrade_type, str(self._version), next_update_time.strftime(logger.Logger.LogTimeFormatInUTC)) + logger.info(message) + add_event(op=WALAEventOperation.AgentUpgrade, message=message, log_event=False) + if next_update_time <= now: # Update the last upgrade check time even if no new agent is available for upgrade self._last_attempted_self_update_time = now @@ -92,7 +98,7 @@ def _should_agent_attempt_manifest_download(self): the agent has not attempted to download the manifest in the last 1 hour If we allow update, we update the last attempted manifest download time """ - now = datetime.datetime.now() + now = datetime.datetime.utcnow() if self._last_attempted_manifest_download_time != datetime.datetime.min: next_attempt_time = self._last_attempted_manifest_download_time + datetime.timedelta( @@ -161,7 +167,7 @@ def log_new_agent_update_message(self): """ This function logs the update message after we check version allowed to update. """ - msg = "Self-update discovered new agent version:{0} in agent manifest for goal state {1}, will update the agent before processing the goal state.".format( + msg = "Self-update is ready to upgrade the new agent: {0} now before processing the goal state: {1}".format( str(self._version), self._gs_id) logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 6c069bfc84..561628974d 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -83,7 +83,7 @@ def _assert_agent_rsm_version_in_goal_state(self, mock_telemetry, inc=1, version def _assert_update_discovered_from_agent_manifest(self, mock_telemetry, inc=1, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Self-update discovered new agent version:{0} in agent manifest for goal state incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ + 'Self-update is ready to upgrade the new agent: {0} now before processing the goal state: incarnation_{1}'.format(version, inc) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), "Did not find the event indicating that the new version found. Got: {0}".format( From 6b70af507b9aaf82ffe3b97e318a4bad7b552d34 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:10:36 -0800 Subject: [PATCH 117/240] Fix publish hostname in china and gov clouds (#3005) * Fix regex to parse china/gov domain names * Improve regex * Improve regex --- tests_e2e/tests/publish_hostname/publish_hostname.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index cae1a2383e..cc74a596b6 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -55,7 +55,7 @@ def add_vm_password(self): def check_and_install_dns_tools(self): lookup_cmd = "dig -x {0}".format(self._private_ip) - dns_regex = r"[\S\s]*;; ANSWER SECTION:\s.*PTR\s*(?P.*).internal.cloudapp.net.[\S\s]*" + dns_regex = r"[\S\s]*;; ANSWER SECTION:\s.*PTR\s*(?P.*)\.internal\.(cloudapp\.net|chinacloudapp\.cn|usgovcloudapp\.net).*[\S\s]*" # Not all distros come with dig. Install dig if not on machine try: @@ -66,7 +66,7 @@ def check_and_install_dns_tools(self): if "debian_9" in distro: # Debian 9 hostname look up needs to be done with "host" instead of dig lookup_cmd = "host {0}".format(self._private_ip) - dns_regex = r".*pointer\s(?P.*).internal.cloudapp.net." + dns_regex = r".*pointer\s(?P.*)\.internal\.(cloudapp\.net|chinacloudapp\.cn|usgovcloudapp\.net).*" elif "debian" in distro: self._ssh_client.run_command("apt install -y dnsutils", use_sudo=True) elif "alma" in distro or "rocky" in distro: From bd4f12d8777418ea1a24a41d1266db8be487b5d1 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 2 Jan 2024 12:53:54 -0800 Subject: [PATCH 118/240] Self update e2e test (#3000) * self-update test * addressed comments * fix tests * log * added comment * merge conflicts --- azurelinuxagent/ga/rsm_version_updater.py | 5 +- .../ga/self_update_version_updater.py | 2 +- tests/ga/test_agent_update_handler.py | 2 +- tests/ga/test_update.py | 2 +- tests_e2e/orchestrator/runbook.yml | 2 +- tests_e2e/test_suites/agent_update.yml | 10 +- tests_e2e/tests/agent_update/self_update.py | 123 ++++++++++++++++++ .../scripts/agent_update-self_update_check.py | 62 +++++++++ ...agent_update-self_update_latest_version.py | 69 ++++++++++ .../agent_update-self_update_test_setup | 66 ++++++++++ 10 files changed, 334 insertions(+), 9 deletions(-) create mode 100644 tests_e2e/tests/agent_update/self_update.py create mode 100755 tests_e2e/tests/scripts/agent_update-self_update_check.py create mode 100755 tests_e2e/tests/scripts/agent_update-self_update_latest_version.py create mode 100755 tests_e2e/tests/scripts/agent_update-self_update_test_setup diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py index dc972c1c7a..6df7b6e30e 100644 --- a/azurelinuxagent/ga/rsm_version_updater.py +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -142,6 +142,5 @@ def proceed_with_update(self): # start the next available highest version which would be the target version prefix = "upgrade" raise AgentUpgradeExitException( - "Agent completed all update checks, exiting current process to {0} to the new Agent version {1}".format( - prefix, - self._version)) + "Current Agent {0} completed all update checks, exiting current process to {1} to the new Agent version {2}".format(CURRENT_VERSION, + prefix, self._version)) diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index 2dc87bf857..ca27c4399a 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -188,5 +188,5 @@ def proceed_with_update(self): # In case of an upgrade, we don't need to exclude anything as the daemon will automatically # start the next available highest version which would be the target version raise AgentUpgradeExitException( - "Agent completed all update checks, exiting current process to upgrade to the new Agent version {0}".format( + "Current Agent {0} completed all update checks, exiting current process to upgrade to the new Agent version {1}".format(CURRENT_VERSION, self._version)) diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 561628974d..0ac373a6b2 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -98,7 +98,7 @@ def _assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9. mock_telemetry.call_args_list)) def _assert_agent_exit_process_telemetry_emitted(self, message): - self.assertIn("Agent completed all update checks, exiting current process", message) + self.assertIn("Current Agent {0} completed all update checks, exiting current process".format(CURRENT_VERSION), message) def test_it_should_not_update_when_autoupdate_disabled(self): self.prepare_agents(count=1) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 286dfb0b70..37fb75796e 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1478,7 +1478,7 @@ def __assert_exit_code_successful(self, update_handler): def __assert_upgrade_telemetry_emitted(self, mock_telemetry, upgrade=True, version="9.9.9.10"): upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if - 'Agent completed all update checks, exiting current process to {0} to the new Agent version {1}'.format( + 'Current Agent {0} completed all update checks, exiting current process to {1} to the new Agent version {2}'.format(CURRENT_VERSION, "upgrade" if upgrade else "downgrade", version) in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade] self.assertEqual(1, len(upgrade_event_msgs), diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 3e5929f359..9f3007c723 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 7ef477e00b..df25a92213 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -1,7 +1,13 @@ +# Scenario validates RSM and Self-updates paths +# RSM update: If vm enrolled into RSM, it will validate agent uses RSM to update to target version +# Self-update: If vm not enrolled into RSM, it will validate agent uses self-update to update to latest version published name: "AgentUpdate" tests: - - "agent_update/rsm_update.py" -images: "random(endorsed, 10)" +# - "agent_update/rsm_update.py" will enable this test once we have a new test version published + - "agent_update/self_update.py" +images: + - "random(endorsed, 10)" + - "random(endorsed-arm64, 2)" locations: "AzureCloud:eastus2euap" owns_vm: true skip_on_clouds: diff --git a/tests_e2e/tests/agent_update/self_update.py b/tests_e2e/tests/agent_update/self_update.py new file mode 100644 index 0000000000..65fec9df5d --- /dev/null +++ b/tests_e2e/tests/agent_update/self_update.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from pathlib import Path +from threading import RLock + +from assertpy import fail + +import azurelinuxagent +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false +from tests_e2e.tests.lib.shell import run_command + + +class SelfUpdateBvt(AgentVmTest): + """ + This test case is to verify that the agent can update itself to the latest version using self-update path when vm not enrolled to RSM updates + """ + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + self._test_version = "2.8.9.9" + self._test_pkg_name = f"WALinuxAgent-{self._test_version}.zip" + + _setup_lock = RLock() + + def run(self): + self._test_setup() + self._verify_agent_updated_to_latest_version() + + def _test_setup(self) -> None: + """ + Builds the custom test agent pkg as some lower version and installs it on the vm + """ + self._build_custom_test_agent() + self._ssh_client.run_command(f"agent_update-self_update_test_setup --package ~/tmp/{self._test_pkg_name} --version {self._test_version}", use_sudo=True) + + def _build_custom_test_agent(self) -> None: + """ + Builds the custom test pkg + """ + with self._setup_lock: + agent_source_path: Path = self._context.working_directory / "source" + source_pkg_path: Path = agent_source_path / "eggs" / f"{self._test_pkg_name}" + if source_pkg_path.exists(): + log.info("The test pkg already exists at %s, skipping build", source_pkg_path) + else: + if agent_source_path.exists(): + os.rmdir(agent_source_path) # Remove if partial build exists + source_directory: Path = Path(azurelinuxagent.__path__[0]).parent + copy_cmd: str = f"cp -r {source_directory} {agent_source_path}" + log.info("Copying agent source %s to %s", source_directory, agent_source_path) + run_command(copy_cmd, shell=True) + if not agent_source_path.exists(): + raise Exception( + f"The agent source was not copied to the expected path {agent_source_path}") + version_file: Path = agent_source_path / "azurelinuxagent" / "common" / "version.py" + version_cmd = rf"""sed -E -i "s/^AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '{self._test_version}'/g" {version_file}""" + log.info("Setting agent version to %s to build new pkg", self._test_version) + run_command(version_cmd, shell=True) + makepkg_file: Path = agent_source_path / "makepkg.py" + build_cmd: str = f"env PYTHONPATH={agent_source_path} python3 {makepkg_file} -o {agent_source_path}" + log.info("Building custom test agent pkg version %s", self._test_version) + run_command(build_cmd, shell=True) + if not source_pkg_path.exists(): + raise Exception( + f"The test pkg was not created at the expected path {source_pkg_path}") + target_path: Path = Path("~") / "tmp" + log.info("Copying %s to %s:%s", source_pkg_path, self._context.vm, target_path) + self._ssh_client.copy_to_node(source_pkg_path, target_path) + + def _verify_agent_updated_to_latest_version(self) -> None: + """ + Verifies the agent updated to latest version from custom test version. + We retrieve latest version from goal state and compare with current agent version running as that latest version + """ + latest_version: str = self._ssh_client.run_command("agent_update-self_update_latest_version.py", use_sudo=True).rstrip() + self._verify_guest_agent_update(latest_version) + # Verify agent updated to latest version by custom test agent + self._ssh_client.run_command("agent_update-self_update_check.py --latest-version {0} --current-version {1}".format(latest_version, self._test_version)) + + def _verify_guest_agent_update(self, latest_version: str) -> None: + """ + Verify current agent version running on latest version + """ + + def _check_agent_version(latest_version: str) -> bool: + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + expected_version = f"Goal state agent: {latest_version}" + if expected_version in waagent_version: + return True + else: + return False + + waagent_version: str = "" + log.info("Verifying agent updated to latest version: {0}".format(latest_version)) + success: bool = retry_if_false(lambda: _check_agent_version(latest_version), delay=60) + if not success: + fail("Guest agent didn't update to latest version {0} but found \n {1}".format( + latest_version, waagent_version)) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info( + f"Successfully verified agent updated to latest version. Current agent version running:\n {waagent_version}") diff --git a/tests_e2e/tests/scripts/agent_update-self_update_check.py b/tests_e2e/tests/scripts/agent_update-self_update_check.py new file mode 100755 index 0000000000..b205c94ab4 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_update-self_update_check.py @@ -0,0 +1,62 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Script verifies agent update was done by test agent +# +import argparse +import re + +from assertpy import fail + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + + +#2023-12-28T04:34:23.535652Z INFO ExtHandler ExtHandler Current Agent 2.8.9.9 completed all update checks, exiting current process to upgrade to the new Agent version 2.10.0.7 +_UPDATE_PATTERN = re.compile(r'Current Agent (\S*) completed all update checks, exiting current process to upgrade to the new Agent version (\S*)') + + +def verify_agent_update_from_log(latest_version, current_version) -> bool: + """ + Checks if the agent updated to the latest version from current version + """ + agentlog = AgentLog() + + for record in agentlog.read(): + update_match = re.match(_UPDATE_PATTERN, record.message) + if update_match: + log.info('found the agent update log: %s', record.text) + if update_match.groups()[0] == current_version and update_match.groups()[1] == latest_version: + return True + return False + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument('-l', '--latest-version', required=True) + parser.add_argument('-c', '--current-version', required=True) + args = parser.parse_args() + + found: bool = retry_if_false(lambda: verify_agent_update_from_log(args.latest_version, args.current_version)) + if not found: + fail('agent update was not found in the logs for latest version {0} from current version {1}'.format(args.latest_version, args.current_version)) + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py b/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py new file mode 100755 index 0000000000..4be0f0dc3d --- /dev/null +++ b/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py @@ -0,0 +1,69 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# returns the agent latest version published +# + +from azurelinuxagent.common.protocol.goal_state import GoalStateProperties +from azurelinuxagent.common.protocol.util import get_protocol_util +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from tests_e2e.tests.lib.retry import retry + + +def get_agent_family_manifest(goal_state): + """ + Get the agent_family from last GS for Test Family + """ + agent_families = goal_state.extensions_goal_state.agent_families + agent_family_manifests = [] + for m in agent_families: + if m.name == 'Test': + if len(m.uris) > 0: + agent_family_manifests.append(m) + return agent_family_manifests[0] + + +def get_largest_version(agent_manifest): + """ + Get the largest version from the agent manifest + """ + largest_version = FlexibleVersion("0.0.0.0") + for pkg in agent_manifest.pkg_list.versions: + pkg_version = FlexibleVersion(pkg.version) + if pkg_version > largest_version: + largest_version = pkg_version + return largest_version + + +def main(): + + try: + protocol = get_protocol_util().get_protocol(init_goal_state=False) + retry(lambda: protocol.client.reset_goal_state( + goal_state_properties=GoalStateProperties.ExtensionsGoalState)) + goal_state = protocol.client.get_goal_state() + agent_family = get_agent_family_manifest(goal_state) + agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) + largest_version = get_largest_version(agent_manifest) + print(str(largest_version)) + except Exception as e: + raise Exception("Unable to verify agent updated to latest version since test failed to get the which is the latest version from the agent manifest: {0}".format(e)) + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_update-self_update_test_setup b/tests_e2e/tests/scripts/agent_update-self_update_test_setup new file mode 100755 index 0000000000..bc5a1d9155 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_update-self_update_test_setup @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script prepares the new agent and install it on the vm +# + +set -euo pipefail + +usage() ( + echo "Usage: agent_update-self_update_test_setup -p|--package -v|--version " + exit 1 +) + +while [[ $# -gt 0 ]]; do + case $1 in + -p|--package) + shift + if [ "$#" -lt 1 ]; then + usage + fi + package=$1 + shift + ;; + -v|--version) + shift + if [ "$#" -lt 1 ]; then + usage + fi + version=$1 + shift + ;; + *) + usage + esac +done +if [ "$#" -ne 0 ] || [ -z ${package+x} ] || [ -z ${version+x} ]; then + usage +fi + +echo "updating the related to self-update flags" +update-waagent-conf Debug.EnableGAVersioning=n Debug.SelfUpdateHotfixFrequency=120 Debug.SelfUpdateRegularFrequency=120 Autoupdate.Frequency=120 +agent-service stop +mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log + +echo "Cleaning up the existing agents" +rm -rf /var/lib/waagent/WALinuxAgent-* + +echo "Installing $package as version $version..." +unzip.py $package /var/lib/waagent/WALinuxAgent-$version +agent-service restart + From c450fd8834283878d24ef3d82f952c7f3adb0b88 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 2 Jan 2024 13:12:04 -0800 Subject: [PATCH 119/240] Lisa should not cleanup failed environment if keep_environment=failed (#3006) * Throw exception for test suite if a test failure occurs * Remove unused import * Clean up * Add comment --- .../orchestrator/lib/agent_test_suite.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index bed6210763..271fbebacb 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -105,6 +105,14 @@ class KeepEnvironment(object): No = 'no' # Always delete resources created by the test suite +class TestFailedException(Exception): + def __init__(self, env_name: str, test_cases: List[str]): + msg = "Test suite {0} failed.".format(env_name) + if test_cases: + msg += " Failed tests: " + ','.join(test_cases) + super().__init__(msg) + + class _TestNode(object): """ Name and IP address of a test VM @@ -539,6 +547,7 @@ def _execute(self) -> None: log_path: Path = self._log_path / f"env-{self._environment_name}.log" with set_current_thread_log(log_path): start_time: datetime.datetime = datetime.datetime.now() + failed_cases = [] try: # Log the environment's name and the variables received from the runbook (note that we need to expand the names of the test suites) @@ -563,7 +572,10 @@ def _execute(self) -> None: for suite in self._test_suites: log.info("Executing test suite %s", suite.name) self._lisa_log.info("Executing Test Suite %s", suite.name) - test_suite_success = self._execute_test_suite(suite, test_context) and test_suite_success + case_success = self._execute_test_suite(suite, test_context) + test_suite_success = case_success and test_suite_success + if not case_success: + failed_cases.append(suite.name) finally: if self._collect_logs == CollectLogs.Always or self._collect_logs == CollectLogs.Failed and not test_suite_success: @@ -588,6 +600,13 @@ def _execute(self) -> None: if unexpected_error: self._mark_log_as_failed() + # Check if any test failures or unexpected errors occurred. If so, raise an Exception here so that + # lisa marks the environment as failed. Otherwise, lisa would mark this environment as passed and + # clean up regardless of the value of 'keep_environment'. This should be the last thing that + # happens during suite execution. + if not test_suite_success or unexpected_error: + raise TestFailedException(self._environment_name, failed_cases) + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext) -> bool: """ Executes the given test suite and returns True if all the tests in the suite succeeded. From 2a698557062ce9c0f000ead2d5407bb7874abb54 Mon Sep 17 00:00:00 2001 From: d1r3ct0r Date: Thu, 4 Jan 2024 21:25:37 +0300 Subject: [PATCH 120/240] fix(ubuntu): Point to correct dhcp lease files (#2979) From Ubuntu 18.04, the default dhcp client was systemd-networkd. However, WALA has been checking for the dhclient lease files. This PR seeks to correct this bug.Interestingly, it was already configuring systemd-networkd but checking for dhclient lease files. Co-authored-by: Norberto Arrieta --- azurelinuxagent/common/osutil/ubuntu.py | 26 ++++++++++++++++++++ azurelinuxagent/ga/logcollector_manifests.py | 1 + azurelinuxagent/pa/deprovision/default.py | 4 +++ tests/common/osutil/test_default.py | 16 +++++++++++- tests/data/2 | 14 +++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tests/data/2 diff --git a/azurelinuxagent/common/osutil/ubuntu.py b/azurelinuxagent/common/osutil/ubuntu.py index 2b82d8ca31..2959464d04 100644 --- a/azurelinuxagent/common/osutil/ubuntu.py +++ b/azurelinuxagent/common/osutil/ubuntu.py @@ -16,6 +16,8 @@ # Requires Python 2.6+ and Openssl 1.0+ # +import glob +import textwrap import time import azurelinuxagent.common.logger as logger @@ -132,6 +134,30 @@ def start_agent_service(self): def stop_agent_service(self): return shellutil.run("systemctl stop {0}".format(self.service_name), chk_err=False) + def get_dhcp_lease_endpoint(self): + pathglob = "/run/systemd/netif/leases/*" + logger.info("looking for leases in path [{0}]".format(pathglob)) + endpoint = None + for lease_file in glob.glob(pathglob): + try: + with open(lease_file) as f: + lease = f.read() + for line in lease.splitlines(): + if line.startswith("OPTION_245"): + option_245 = line.split("=")[1] + options = [int(i, 16) for i in textwrap.wrap(option_245, 2)] + endpoint = "{0}.{1}.{2}.{3}".format(*options) + logger.info("found endpoint [{0}]".format(endpoint)) + except Exception as e: + logger.info( + "Failed to parse {0}: {1}".format(lease_file, str(e)) + ) + if endpoint is not None: + logger.info("cached endpoint found [{0}]".format(endpoint)) + else: + logger.info("cached endpoint not found") + return endpoint + class UbuntuOSUtil(Ubuntu16OSUtil): def __init__(self): # pylint: disable=W0235 diff --git a/azurelinuxagent/ga/logcollector_manifests.py b/azurelinuxagent/ga/logcollector_manifests.py index b22316be95..fc240d30ca 100644 --- a/azurelinuxagent/ga/logcollector_manifests.py +++ b/azurelinuxagent/ga/logcollector_manifests.py @@ -83,6 +83,7 @@ copy,/var/lib/dhcp/dhclient.eth0.leases copy,/var/lib/dhclient/dhclient-eth0.leases copy,/var/lib/wicked/lease-eth0-dhcp-ipv4.xml +copy,/run/systemd/netif/leases/2 echo, echo,### Gathering Log Files ### diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index 286858f5a1..edf736811b 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -131,6 +131,10 @@ def del_dhcp_lease(self, warnings, actions): actions.append(DeprovisionAction(fileutil.rm_files, ["/var/lib/NetworkManager/dhclient-*.lease"])) + # For Ubuntu >= 18.04, using systemd-networkd + actions.append(DeprovisionAction(fileutil.rm_files, + ["/run/systemd/netif/leases/*"])) + def del_ext_handler_files(self, warnings, actions): # pylint: disable=W0613 ext_dirs = [d for d in os.listdir(conf.get_lib_dir()) if os.path.isdir(os.path.join(conf.get_lib_dir(), d)) diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 794cd449ea..68bd282d70 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -298,7 +298,7 @@ def test_no_primary_does_not_throw(self): def test_dhcp_lease_default(self): self.assertTrue(osutil.DefaultOSUtil().get_dhcp_lease_endpoint() is None) - def test_dhcp_lease_ubuntu(self): + def test_dhcp_lease_older_ubuntu(self): with patch.object(glob, "glob", return_value=['/var/lib/dhcp/dhclient.eth0.leases']): with patch(open_patch(), mock.mock_open(read_data=load_data("dhcp.leases"))): endpoint = get_osutil(distro_name='ubuntu', distro_version='12.04').get_dhcp_lease_endpoint() # pylint: disable=assignment-from-none @@ -313,6 +313,20 @@ def test_dhcp_lease_ubuntu(self): self.assertTrue(endpoint is not None) self.assertEqual(endpoint, "168.63.129.16") + endpoint = get_osutil(distro_name='ubuntu', distro_version='18.04').get_dhcp_lease_endpoint() # pylint: disable=assignment-from-none + self.assertTrue(endpoint is None) + + def test_dhcp_lease_newer_ubuntu(self): + with patch.object(glob, "glob", return_value=['/run/systemd/netif/leases/2']): + with patch(open_patch(), mock.mock_open(read_data=load_data("2"))): + endpoint = get_osutil(distro_name='ubuntu', distro_version='18.04').get_dhcp_lease_endpoint() # pylint: disable=assignment-from-none + self.assertTrue(endpoint is not None) + self.assertEqual(endpoint, "168.63.129.16") + + endpoint = get_osutil(distro_name='ubuntu', distro_version='20.04').get_dhcp_lease_endpoint() # pylint: disable=assignment-from-none + self.assertTrue(endpoint is not None) + self.assertEqual(endpoint, "168.63.129.16") + def test_dhcp_lease_custom_dns(self): """ Validate that the wireserver address is coming from option 245 diff --git a/tests/data/2 b/tests/data/2 new file mode 100644 index 0000000000..38d8196691 --- /dev/null +++ b/tests/data/2 @@ -0,0 +1,14 @@ +# This is private data. Do not parse. +ADDRESS=10.0.0.69 +NETMASK=255.255.255.0 +ROUTER=10.0.0.1 +SERVER_ADDRESS=168.63.129.16 +NEXT_SERVER=168.63.129.16 +T1=4294967295 +T2=4294967295 +LIFETIME=4294967295 +DNS=168.63.129.16 +DOMAINNAME=2rdlxelcdvjkok2emfc.bx.internal.cloudapp.net +ROUTES=0.0.0.0/0,10.0.0.1 168.63.129.16/32,10.0.0.1 169.254.169.254/32,10.0.0.1 +CLIENTID=ff0406a3a3000201120dc9092eccd2344 +OPTION_245=a83f8110 From 860fbb441c302439f21faf6933580d5cce1e3e87 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 5 Jan 2024 09:49:58 -0800 Subject: [PATCH 121/240] Use self-hosted pool for automation runs (#3007) Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 13 ++- .../lib/agent_test_suite_combinator.py | 7 +- .../lib/update_arm_template_hook.py | 15 ++-- tests_e2e/orchestrator/runbook.yml | 8 ++ tests_e2e/pipeline/pipeline.yml | 4 +- tests_e2e/pipeline/scripts/execute_tests.sh | 21 ++++- tests_e2e/pipeline/scripts/setup-agent.sh | 50 +++++++++++ ...rity_group.py => network_security_rule.py} | 90 +++++++++---------- tests_e2e/tests/lib/update_arm_template.py | 12 ++- .../deny_outbound_connections.py | 46 ++++------ 10 files changed, 169 insertions(+), 97 deletions(-) create mode 100755 tests_e2e/pipeline/scripts/setup-agent.sh rename tests_e2e/tests/lib/{add_network_security_group.py => network_security_rule.py} (77%) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 271fbebacb..7b2becd593 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -47,17 +47,16 @@ import makepkg from azurelinuxagent.common.version import AGENT_VERSION -from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient import tests_e2e from tests_e2e.orchestrator.lib.agent_test_loader import TestSuiteInfo -from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.agent_log import AgentLog, AgentLogRecord from tests_e2e.tests.lib.agent_test import TestSkipped, RemoteTestError from tests_e2e.tests.lib.agent_test_context import AgentTestContext, AgentVmTestContext, AgentVmssTestContext from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log -from tests_e2e.tests.lib.agent_log import AgentLogRecord +from tests_e2e.tests.lib.network_security_rule import NetworkSecurityRule from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient from tests_e2e.tests.lib.shell import run_command, CommandError from tests_e2e.tests.lib.ssh_client import SshClient @@ -161,6 +160,9 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._user: str self._identity_file: str + # If not empty, adds a Network Security Rule allowing SSH access from the specified IP address to any test VMs created by the test suite. + self._allow_ssh: str + self._skip_setup: bool # If True, skip the setup of the test VMs self._collect_logs: str # Whether to collect logs from the test VMs (one of 'always', 'failed', or 'no') self._keep_environment: str # Whether to skip deletion of the resources created by the test suite (one of 'always', 'failed', or 'no') @@ -218,6 +220,8 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ self._user = variables["user"] self._identity_file = variables["identity_file"] + self._allow_ssh = variables["allow_ssh"] + self._skip_setup = variables["skip_setup"] self._keep_environment = variables["keep_environment"] self._collect_logs = variables["collect_logs"] @@ -923,7 +927,8 @@ def read_file(path: str) -> str: "publisher": "[parameters('publisher')]" } - AddNetworkSecurityGroup().update(template, is_lisa_template=False) + if self._allow_ssh != '': + NetworkSecurityRule(template, is_lisa_template=False).add_allow_ssh_rule(self._allow_ssh) return template, { "username": {"value": self._user}, diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index ad25151b5f..fbe53a1bdc 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -37,6 +37,7 @@ class AgentTestSuitesCombinatorSchema(schema.Combinator): The runbook is a static document and always passes all these parameters to the combinator, so they are all marked as required. Optional parameters can pass an empty value to indicate that they are not specified. """ + allow_ssh: str = field(default_factory=str, metadata=field_metadata(required=True)) cloud: str = field(default_factory=str, metadata=field_metadata(required=True)) identity_file: str = field(default_factory=str, metadata=field_metadata(required=True)) image: str = field(default_factory=str, metadata=field_metadata(required=True)) @@ -332,14 +333,16 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: # # Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template - # hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "template" and pass the + # hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "templates" and pass the # names of the custom templates in its value. The hook can then retrieve the value from the Platform object (see wiki for more details). # We also use a dummy item, "vm_tags" in the environment dictionary in order to concatenate templates from multiple test suites when they - # share the same test environment. + # share the same test environment. Similarly, we use a dummy VM tag named "allow_ssh" to pass the value of the "allow_ssh" runbook parameter. # vm_tags = {} if test_suite_info.template != '': vm_tags["templates"] = test_suite_info.template + if self.runbook.allow_ssh != '': + vm_tags["allow_ssh"] = self.runbook.allow_ssh return { "c_platform": [ { diff --git a/tests_e2e/orchestrator/lib/update_arm_template_hook.py b/tests_e2e/orchestrator/lib/update_arm_template_hook.py index fee943de14..801583ff7b 100644 --- a/tests_e2e/orchestrator/lib/update_arm_template_hook.py +++ b/tests_e2e/orchestrator/lib/update_arm_template_hook.py @@ -30,7 +30,7 @@ # pylint: enable=E0401 import tests_e2e -from tests_e2e.tests.lib.add_network_security_group import AddNetworkSecurityGroup +from tests_e2e.tests.lib.network_security_rule import NetworkSecurityRule from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate @@ -42,17 +42,20 @@ class UpdateArmTemplateHook: def azure_update_arm_template(self, template: Any, environment: Environment) -> None: log: logging.Logger = logging.getLogger("lisa") + azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema) + vm_tags = azure_runbook.vm_tags + # - # Add the network security group for the test VM. This group includes a rule allowing SSH access from the current machine. + # Add the allow SSH security rule if requested by the runbook # - log.info("******** Waagent: Adding network security rule to the ARM template") - AddNetworkSecurityGroup().update(template, is_lisa_template=True) + allow_ssh: str = vm_tags.get("allow_ssh") + if allow_ssh is not None: + log.info("******** Waagent: Adding network security rule to allow SSH connections from %s", allow_ssh) + NetworkSecurityRule(template, is_lisa_template=True).add_allow_ssh_rule(allow_ssh) # # Apply any template customizations provided by the tests. # - azure_runbook: AzurePlatformSchema = environment.platform.runbook.get_extended_runbook(AzurePlatformSchema) - vm_tags = azure_runbook.vm_tags # The "templates" tag is a comma-separated list of the template customizations provided by the tests test_templates = vm_tags.get("templates") if test_templates is not None: diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 9f3007c723..9181e9189c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -102,6 +102,13 @@ variable: value: false is_case_visible: true + # + # Takes an IP address as value; if not empty, it adds a Network Security Rule allowing SSH access from the specified IP address to any test VMs created by the runbook execution. + # + - name: allow_ssh + value: "" + is_case_visible: true + # # These variables are handled by LISA to use an SSH proxy when executing the runbook # @@ -177,6 +184,7 @@ platform: $(c_platform) combinator: type: agent_test_suites + allow_ssh: $(allow_ssh) cloud: $(cloud) identity_file: $(identity_file) image: $(image) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 9dc1062751..4ea7565276 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -46,7 +46,7 @@ parameters: - name: collect_lisa_logs displayName: Collect LISA logs type: boolean - default: true + default: false - name: keep_environment displayName: Keep the test VMs (do not delete them) @@ -58,7 +58,7 @@ parameters: - no pool: - vmImage: ubuntu-latest + name: waagent-pool jobs: - job: "ExecuteTests" diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index a822e2dd94..9c185b333c 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -2,6 +2,9 @@ set -euxo pipefail +echo "Hostname: $(hostname)" +echo "\$USER: $USER" + # # UID of 'waagent' in the Docker container # @@ -10,7 +13,7 @@ WAAGENT_UID=1000 # # Set the correct mode and owner for the private SSH key and generate the public key. # -cd "$HOME" +cd "$AGENT_TEMPDIRECTORY" mkdir ssh cp "$DOWNLOADSSHKEY_SECUREFILEPATH" ssh chmod 700 ssh/id_rsa @@ -26,11 +29,17 @@ chmod a+w "$BUILD_SOURCESDIRECTORY" # # Create the directory where the Docker container will create the test logs and give ownership to 'waagent' # -LOGS_DIRECTORY="$HOME/logs" +LOGS_DIRECTORY="$AGENT_TEMPDIRECTORY/logs" echo "##vso[task.setvariable variable=logs_directory]$LOGS_DIRECTORY" mkdir "$LOGS_DIRECTORY" sudo chown "$WAAGENT_UID" "$LOGS_DIRECTORY" +# +# Give the current user access to the Docker daemon +# +sudo usermod -aG docker $USER +newgrp docker < /dev/null + # # Pull the container image used to execute the tests # @@ -55,9 +64,14 @@ if [[ $VM_SIZE == "-" ]]; then VM_SIZE="" fi +# +# Get the external IP address of the VM. +# +IP_ADDRESS=$(curl -4 ifconfig.io/ip) + docker run --rm \ --volume "$BUILD_SOURCESDIRECTORY:/home/waagent/WALinuxAgent" \ - --volume "$HOME"/ssh:/home/waagent/.ssh \ + --volume "$AGENT_TEMPDIRECTORY"/ssh:/home/waagent/.ssh \ --volume "$LOGS_DIRECTORY":/home/waagent/logs \ --env AZURE_CLIENT_ID \ --env AZURE_CLIENT_SECRET \ @@ -77,4 +91,5 @@ docker run --rm \ -v image:\"$IMAGE\" \ -v location:\"$LOCATION\" \ -v vm_size:\"$VM_SIZE\" \ + -v allow_ssh:\"$IP_ADDRESS\" \ $TEST_SUITES" diff --git a/tests_e2e/pipeline/scripts/setup-agent.sh b/tests_e2e/pipeline/scripts/setup-agent.sh new file mode 100755 index 0000000000..5990ada0ac --- /dev/null +++ b/tests_e2e/pipeline/scripts/setup-agent.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI and the Docker Engine. +# + +set -euox pipefail + +# Add delay per Azure Pipelines documentation +sleep 30 + +# Install Azure CLI +curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + +# Add Docker's official GPG key: +sudo apt-get update +sudo apt-get install ca-certificates curl gnupg +sudo install -m 0755 -d /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg +sudo chmod a+r /etc/apt/keyrings/docker.gpg + +# Add the repository to Apt sources: +echo \ +"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ +$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ +sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update + +# Install Docker Engine +sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# Verify that Docker Engine is installed correctly by running the hello-world image. +sudo docker run hello-world diff --git a/tests_e2e/tests/lib/add_network_security_group.py b/tests_e2e/tests/lib/network_security_rule.py similarity index 77% rename from tests_e2e/tests/lib/add_network_security_group.py rename to tests_e2e/tests/lib/network_security_rule.py index 4d46cf7ca5..8df51b2048 100644 --- a/tests_e2e/tests/lib/add_network_security_group.py +++ b/tests_e2e/tests/lib/network_security_rule.py @@ -19,59 +19,71 @@ from typing import Any, Dict, List -from azurelinuxagent.common.utils import shellutil -from tests_e2e.tests.lib.logging import log -from tests_e2e.tests.lib.retry import retry from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate -# Name of the security group added by this class -NETWORK_SECURITY_GROUP: str = "waagent-nsg" - -class AddNetworkSecurityGroup(UpdateArmTemplate): +class NetworkSecurityRule: """ - Updates the ARM template to add a network security group allowing SSH access from the current machine. + Provides methods to add network security rules to the given ARM template. + + The security rules are added under _NETWORK_SECURITY_GROUP, which is also added to the template. """ - def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: - resources: List[Dict[str, Any]] = template["resources"] + def __init__(self, template: Dict[str, Any], is_lisa_template: bool): + self._template = template + self._is_lisa_template = is_lisa_template - # Append the NSG to the list of resources - network_security_group = json.loads(f"""{{ - "type": "Microsoft.Network/networkSecurityGroups", - "name": "{NETWORK_SECURITY_GROUP}", - "location": "[resourceGroup().location]", - "apiVersion": "2020-05-01", - "properties": {{ - "securityRules": [] - }} - }}""") - resources.append(network_security_group) + _NETWORK_SECURITY_GROUP: str = "waagent-nsg" - # Add the SSH rule, but if anything fails just go ahead without it - try: - network_security_group["properties"]["securityRules"].append(json.loads(f"""{{ + def add_allow_ssh_rule(self, ip_address: str) -> None: + self.add_security_rule( + json.loads(f"""{{ "name": "waagent-ssh", "properties": {{ "description": "Allows inbound SSH connections from the orchestrator machine.", "protocol": "Tcp", "sourcePortRange": "*", "destinationPortRange": "22", - "sourceAddressPrefix": "{self._my_ip_address}", + "sourceAddressPrefix": "{ip_address}", "destinationAddressPrefix": "*", "access": "Allow", "priority": 100, "direction": "Inbound" }} }}""")) - except Exception as e: - log.warning("******** Waagent: Failed to create Allow security rule for SSH, skipping rule: %s", e) + + def add_security_rule(self, security_rule: Dict[str, Any]) -> None: + self._get_network_security_group()["properties"]["securityRules"].append(security_rule) + + def _get_network_security_group(self) -> Dict[str, Any]: + resources: List[Dict[str, Any]] = self._template["resources"] + # + # If the NSG already exists, just return it + # + try: + return UpdateArmTemplate.get_resource_by_name(resources, self._NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups") + except KeyError: + pass + + # + # Otherwise, create it and append it to the list of resources + # + network_security_group = json.loads(f"""{{ + "type": "Microsoft.Network/networkSecurityGroups", + "name": "{self._NETWORK_SECURITY_GROUP}", + "location": "[resourceGroup().location]", + "apiVersion": "2020-05-01", + "properties": {{ + "securityRules": [] + }} + }}""") + resources.append(network_security_group) # # Add a dependency on the NSG to the virtual network # - network_resource = self._get_resource(resources, "Microsoft.Network/virtualNetworks") + network_resource = UpdateArmTemplate.get_resource(resources, "Microsoft.Network/virtualNetworks") network_resource_dependencies = network_resource.get("dependsOn") - nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" + nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]" if network_resource_dependencies is None: network_resource["dependsOn"] = [nsg_reference] else: @@ -82,11 +94,11 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # nsg_reference = json.loads(f"""{{ "networkSecurityGroup": {{ - "id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{NETWORK_SECURITY_GROUP}')]" + "id": "[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]" }} }}""") - if is_lisa_template: + if self._is_lisa_template: # The subnets are a copy property of the virtual network in LISA's ARM template: # # { @@ -167,18 +179,4 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: else: subnets_properties.update(nsg_reference) - @property - def _my_ip_address(self) -> str: - """ - Gets the IP address of the current machine. - """ - if self.__my_ip_address is None: - def get_my_address(): - # Forcing -4 option to fetch the ipv4 address - cmd = ["curl", "-4", "ifconfig.io/ip"] - stdout = shellutil.run_command(cmd) - return stdout.strip() - self.__my_ip_address = retry(get_my_address, attempts=3, delay=10) - return self.__my_ip_address - - __my_ip_address: str = None + return network_security_group diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index c50f7b74c7..af69fba048 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -32,23 +32,27 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: """ @staticmethod - def _get_resource(resources: List[Dict[str, Any]], type_name: str) -> Any: + def get_resource(resources: List[Dict[str, Any]], type_name: str) -> Any: """ Returns the first resource of the specified type in the given 'resources' list. + + Raises KeyError if no resource of the specified type is found. """ for item in resources: if item["type"] == type_name: return item - raise Exception(f"Cannot find a resource of type {type_name} in the ARM template") + raise KeyError(f"Cannot find a resource of type {type_name} in the ARM template") @staticmethod - def _get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, type_name: str) -> Any: + def get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, type_name: str) -> Any: """ Returns the first resource of the specified type and name in the given 'resources' list. + + Raises KeyError if no resource of the specified type and name is found. """ for item in resources: if item["type"] == type_name and item["name"] == resource_name: return item - raise Exception(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") + raise KeyError(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") diff --git a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py index 838082d345..b7cc878866 100755 --- a/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py +++ b/tests_e2e/tests/no_outbound_connections/deny_outbound_connections.py @@ -21,7 +21,7 @@ from typing import Any, Dict -from tests_e2e.tests.lib.add_network_security_group import NETWORK_SECURITY_GROUP +from tests_e2e.tests.lib.network_security_rule import NetworkSecurityRule from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate @@ -30,32 +30,18 @@ class DenyOutboundConnections(UpdateArmTemplate): Updates the ARM template to add a security rule that denies all outbound connections. """ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: - if not is_lisa_template: - raise Exception('This test can only customize LISA ARM templates.') - - resources = template["resources"] - nsg = self._get_resource_by_name(resources, NETWORK_SECURITY_GROUP, "Microsoft.Network/networkSecurityGroups") - properties = nsg.get("properties") - - if properties is None: - raise Exception("Cannot find the properties of the Network Security Group in the ARM template") - - security_rules = properties.get("securityRules") - if security_rules is None: - raise Exception("Cannot find the security rules of the Network Security Group in the ARM template") - - security_rules.append(json.loads("""{ - "name": "waagent-no-outbound", - "properties": { - "description": "Denies all outbound connections.", - "protocol": "*", - "sourcePortRange": "*", - "destinationPortRange": "*", - "sourceAddressPrefix": "*", - "destinationAddressPrefix": "Internet", - "access": "Deny", - "priority": 200, - "direction": "Outbound" - } - }""")) - + NetworkSecurityRule(template, is_lisa_template).add_security_rule( + json.loads("""{ + "name": "waagent-no-outbound", + "properties": { + "description": "Denies all outbound connections.", + "protocol": "*", + "sourcePortRange": "*", + "destinationPortRange": "*", + "sourceAddressPrefix": "*", + "destinationAddressPrefix": "Internet", + "access": "Deny", + "priority": 200, + "direction": "Outbound" + } + }""")) From ddfa35f4c576f8e9482412c67f9ca2944ac04899 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 5 Jan 2024 14:38:28 -0800 Subject: [PATCH 122/240] Add distros which use Python 2.6 (for reference only) (#3009) Co-authored-by: narrieta --- tests_e2e/test_suites/images.yml | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 02392b375b..a04de7449b 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -5,9 +5,8 @@ image-sets: # Endorsed distros that are tested on the daily runs endorsed: # -# TODO: Add CentOS 6.10 and Debian 8 +# TODO: Add Debian 8 # -# - "centos_610" # - "debian_8" # - "alma_9" @@ -46,6 +45,12 @@ image-sets: - "ubuntu_1804" - "ubuntu_2004" + # These distros use Python 2.6. Currently they are not tested on the daily runs; this image set is here just for reference. + python-26: + - "centos_610" + - "oracle_610" + - "rhel_610" + # # An image can be specified by a string giving its urn, as in # @@ -115,13 +120,8 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] - rocky_9: - urn: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" - locations: - AzureChinaCloud: [] - AzureUSGovernment: [] - suse_12: "SUSE sles-12-sp5-basic gen1 latest" - suse_15: "SUSE sles-15-sp2-basic gen2 latest" + oracle_610: "Oracle Oracle-Linux 6.10 latest" + rhel_610: "RedHat RHEL 6.10 latest" rhel_79: urn: "RedHat RHEL 7_9 latest" locations: @@ -142,6 +142,13 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] + rocky_9: + urn: "erockyenterprisesoftwarefoundationinc1653071250513 rockylinux-9 rockylinux-9 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] + suse_12: "SUSE sles-12-sp5-basic gen1 latest" + suse_15: "SUSE sles-15-sp2-basic gen2 latest" ubuntu_1604: "Canonical UbuntuServer 16.04-LTS latest" ubuntu_1804: "Canonical UbuntuServer 18.04-LTS latest" ubuntu_2004: "Canonical 0001-com-ubuntu-server-focal 20_04-lts latest" From 3e8d18048d951699acdc3469653d1301349c5925 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 5 Jan 2024 14:44:40 -0800 Subject: [PATCH 123/240] Move cleanup pipeline to self-hosted pool (#3010) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline-cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index c679d29724..c800c1c6ab 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -23,7 +23,7 @@ parameters: - azuremanagement.government pool: - vmImage: ubuntu-latest + name: waagent-pool steps: - ${{ each service_connection in parameters.service_connections }}: From 5b9219ef484adc79f51108193f6c2662548a27c4 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 9 Jan 2024 10:35:13 -0800 Subject: [PATCH 124/240] NM should not be restarted during hostname publish if NM_CONTROLLED=y (#3008) * Only restart NM if NM_controlled=n * Clean up code * Clean up code * improve logging * Make check on NM_CONTROLLED value sctrict --- azurelinuxagent/common/osutil/redhat.py | 32 +++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py index 312dd16084..05a4b659df 100644 --- a/azurelinuxagent/common/osutil/redhat.py +++ b/azurelinuxagent/common/osutil/redhat.py @@ -117,11 +117,39 @@ def set_hostname(self, hostname): logger.warn("[{0}] failed, attempting fallback".format(' '.join(hostnamectl_cmd))) DefaultOSUtil.set_hostname(self, hostname) + def get_nm_controlled(self): + ifname = self.get_if_name() + filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname) + nm_controlled_cmd = ['grep', 'NM_CONTROLLED=', filepath] + try: + result = shellutil.run_command(nm_controlled_cmd, log_error=False, encode_output=False).rstrip() + + if result and len(result.split('=')) > 1: + # Remove trailing white space and ' or " characters + value = result.split('=')[1].replace("'", '').replace('"', '').rstrip() + if value == "n" or value == "no": + return False + except shellutil.CommandError as e: + # Command might fail because NM_CONTROLLED value is not in interface config file (exit code 1). + # Log warning for any other exit code. + # NM_CONTROLLED=y by default if not specified. + if e.returncode != 1: + logger.warn("[{0}] failed: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(' '.join(nm_controlled_cmd), e)) + except Exception as e: + logger.warn("Unexpected error while retrieving value of NM_CONTROLLED in {0}: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(filepath, e)) + + return True + def publish_hostname(self, hostname): """ - Restart NetworkManager first before publishing hostname + Restart NetworkManager first before publishing hostname, only if the network interface is not controlled by the + NetworkManager service (as determined by NM_CONTROLLED=n in the interface configuration). If the NetworkManager + service is restarted before the agent publishes the hostname, and NM_controlled=y, a race condition may happen + between the NetworkManager service and the Guest Agent making changes to the network interface configuration + simultaneously. """ - shellutil.run("service NetworkManager restart") + if not self.get_nm_controlled(): + shellutil.run("service NetworkManager restart") super(RedhatOSUtil, self).publish_hostname(hostname) def register_agent_service(self): From 70e2f548aa70041075aefb841e268979948f429a Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 9 Jan 2024 10:42:26 -0800 Subject: [PATCH 125/240] Install missing dependency (jq) on Azure Pipeline Agents (#3013) * Install missing dependency (jq) on Azure Pipeline Agents * use if statement * remove if statement --------- Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline-cleanup.yml | 8 ++++---- tests_e2e/pipeline/scripts/setup-agent.sh | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index c800c1c6ab..7f9f57a6c6 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -45,12 +45,12 @@ steps: rest_endpoint=$(az cloud show --query "endpoints.resourceManager" -o tsv) + pattern="${{ parameters.name_pattern }}" + az rest --method GET \ --url "${rest_endpoint}/subscriptions/${subscription_id}/resourcegroups" \ --url-parameters api-version=2021-04-01 \$expand=createdTime \ --output json \ --query value \ - | jq --arg date "$date" '.[] | select (.createdTime < $date).name' \ - | grep -i '${{ parameters.name_pattern }}' \ - | xargs -l -t -r az group delete --no-wait -y -n \ - || echo "No resource groups found to delete" + | jq --arg date "$date" '.[] | select (.createdTime < $date).name | match("'${pattern}'"; "g").string' \ + | xargs -l -t -r az group delete --subscription "${subscription_id}" --no-wait -y -n diff --git a/tests_e2e/pipeline/scripts/setup-agent.sh b/tests_e2e/pipeline/scripts/setup-agent.sh index 5990ada0ac..9b1316059a 100755 --- a/tests_e2e/pipeline/scripts/setup-agent.sh +++ b/tests_e2e/pipeline/scripts/setup-agent.sh @@ -18,7 +18,7 @@ # # -# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI and the Docker Engine. +# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI, the Docker Engine and jq. # set -euox pipefail @@ -48,3 +48,7 @@ sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plug # Verify that Docker Engine is installed correctly by running the hello-world image. sudo docker run hello-world + +# Install jq; it is used by the cleanup pipeline to parse the JSON output of the Azure CLI +sudo apt-get install -y jq + From 5d2da888dae8c33954d83b5c9841145f79dadff3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 10 Jan 2024 15:07:07 -0800 Subject: [PATCH 126/240] Do not reset the mode of a extension's log directory (#3014) Co-authored-by: narrieta --- azurelinuxagent/common/utils/fileutil.py | 12 +++++++----- azurelinuxagent/ga/exthandlers.py | 2 +- tests/ga/test_exthandlers.py | 22 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/azurelinuxagent/common/utils/fileutil.py b/azurelinuxagent/common/utils/fileutil.py index 03090a427d..94eb5cf1bf 100644 --- a/azurelinuxagent/common/utils/fileutil.py +++ b/azurelinuxagent/common/utils/fileutil.py @@ -99,13 +99,15 @@ def get_line_startingwith(prefix, filepath): return None -def mkdir(dirpath, mode=None, owner=None): +def mkdir(dirpath, mode=None, owner=None, reset_mode_and_owner=True): if not os.path.isdir(dirpath): os.makedirs(dirpath) - if mode is not None: - chmod(dirpath, mode) - if owner is not None: - chowner(dirpath, owner) + reset_mode_and_owner = True # force setting the mode and owner + if reset_mode_and_owner: + if mode is not None: + chmod(dirpath, mode) + if owner is not None: + chowner(dirpath, owner) def chowner(path, owner): diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index a09a81e96c..fcb14d22b9 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -1051,7 +1051,7 @@ def get_extension_full_name(self, extension=None): def __set_command_execution_log(self, extension, execution_log_max_size): try: - fileutil.mkdir(self.get_log_dir(), mode=0o755) + fileutil.mkdir(self.get_log_dir(), mode=0o755, reset_mode_and_owner=False) except IOError as e: self.logger.error(u"Failed to create extension log dir: {0}", e) else: diff --git a/tests/ga/test_exthandlers.py b/tests/ga/test_exthandlers.py index 2f03396599..f56ebce14b 100644 --- a/tests/ga/test_exthandlers.py +++ b/tests/ga/test_exthandlers.py @@ -287,6 +287,28 @@ def test_command_extension_log_truncates_correctly(self, mock_log_dir): with open(log_file_path) as truncated_log_file: self.assertEqual(truncated_log_file.read(), "{second_line}\n".format(second_line=second_line)) + def test_set_logger_should_not_reset_the_mode_of_the_log_directory(self): + ext_log_dir = os.path.join(self.tmp_dir, "log_directory") + + with patch("azurelinuxagent.common.conf.get_ext_log_dir", return_value=ext_log_dir): + ext_handler = Extension(name='foo') + ext_handler.version = "1.2.3" + ext_handler_instance = ExtHandlerInstance(ext_handler=ext_handler, protocol=None) + ext_handler_log_dir = os.path.join(ext_log_dir, ext_handler.name) + + # Double-check the initial mode + get_mode = lambda f: os.stat(f).st_mode & 0o777 + mode = get_mode(ext_handler_log_dir) + if mode != 0o755: + raise Exception("The initial mode of the log directory should be 0o755, got 0{0:o}".format(mode)) + + new_mode = 0o700 + os.chmod(ext_handler_log_dir, new_mode) + ext_handler_instance.set_logger() + + mode = get_mode(ext_handler_log_dir) + self.assertEqual(new_mode, mode, "The mode of the log directory should not have changed") + def test_it_should_report_the_message_in_the_hearbeat(self): def heartbeat_with_message(): return {'code': 0, 'formattedMessage': {'lang': 'en-US', 'message': 'This is a heartbeat message'}, From 150de9b657091d3c775458215194bd505ee56a95 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 11 Jan 2024 09:59:59 -0800 Subject: [PATCH 127/240] Daemon should remove stale published_hostname file and log useful warning (#3016) * Daemon should remove published_hostname file and log useful warning * Clean up fast track file if vm id has changed * Clean up initial_goal_state file if vm id has changed * Clean up rsm_update file if vm id has changed --- azurelinuxagent/pa/deprovision/default.py | 6 +++++- azurelinuxagent/pa/provision/default.py | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index edf736811b..35b4ae82ed 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -158,7 +158,11 @@ def del_lib_dir_files(self, warnings, actions): # pylint: disable=W0613 'partition', 'Protocol', 'SharedConfig.xml', - 'WireServerEndpoint' + 'WireServerEndpoint', + 'published_hostname', + 'fast_track.json', + 'initial_goal_state', + 'rsm_update.json' ] known_files_glob = [ 'Extensions.*.xml', diff --git a/azurelinuxagent/pa/provision/default.py b/azurelinuxagent/pa/provision/default.py index 91fe04edab..a872d70fd4 100644 --- a/azurelinuxagent/pa/provision/default.py +++ b/azurelinuxagent/pa/provision/default.py @@ -172,9 +172,11 @@ def check_provisioned_file(self): s = fileutil.read_file(ProvisionHandler.provisioned_file_path()).strip() if not self.osutil.is_current_instance_id(s): if len(s) > 0: - logger.warn("VM is provisioned, " - "but the VM unique identifier has changed -- " - "clearing cached state") + msg = "VM is provisioned, but the VM unique identifier has changed. This indicates the VM may be " \ + "created from an image that was not properly deprovisioned or generalized, which can result in " \ + "unexpected behavior from the guest agent -- clearing cached state" + logger.warn(msg) + self.report_event(msg) from azurelinuxagent.pa.deprovision \ import get_deprovision_handler deprovision_handler = get_deprovision_handler() From 24b9f5aff7eace97643f7c57cf4e5a313b6bd45f Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 12 Jan 2024 07:35:00 -0800 Subject: [PATCH 128/240] Do not report TestFailedException in test results (#3019) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_junit.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests_e2e/orchestrator/lib/agent_junit.py b/tests_e2e/orchestrator/lib/agent_junit.py index 78b7e35845..47a5e7d697 100644 --- a/tests_e2e/orchestrator/lib/agent_junit.py +++ b/tests_e2e/orchestrator/lib/agent_junit.py @@ -55,6 +55,9 @@ def _received_message(self, message: MessageBase) -> None: if "Unexpected error in AgentTestSuite" in message.message: # Ignore these errors, they are already reported as AgentTestResultMessages return + if "TestFailedException" in message.message: + # Ignore these errors, they are already reported as test failures + return # Change the suite name to "_Runbook_" for LISA messages in order to separate them # from actual test results. message.suite_full_name = "_Runbook_" From ddfb24e1d99a168c6b64af6409134b33ce598d50 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:47:09 -0800 Subject: [PATCH 129/240] skip agent update run on arm64 distros (#3018) --- tests_e2e/test_suites/agent_update.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index df25a92213..b78f4109f5 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -3,11 +3,11 @@ # Self-update: If vm not enrolled into RSM, it will validate agent uses self-update to update to latest version published name: "AgentUpdate" tests: -# - "agent_update/rsm_update.py" will enable this test once we have a new test version published +# - "agent_update/rsm_update.py" TODO: will enable this test once we have a new test version published - "agent_update/self_update.py" images: - "random(endorsed, 10)" - - "random(endorsed-arm64, 2)" +# - "random(endorsed-arm64, 2)" TODO: HGPA not deployed on some arm64 hosts(so agent stuck on Vmesttings calls as per contract) and will enable once HGPA deployed there locations: "AzureCloud:eastus2euap" owns_vm: true skip_on_clouds: From c24a9b6d5b53c8afbd6f8385124c3d48b66f809f Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 12 Jan 2024 11:21:12 -0800 Subject: [PATCH 130/240] Clean test VMs older than 12 hours (#3021) Co-authored-by: narrieta --- tests_e2e/pipeline/pipeline-cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index 7f9f57a6c6..69e929be50 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -13,7 +13,7 @@ parameters: - name: older_than displayName: Delete resources older than (use the syntax of the "date -d" command) type: string - default: 1 day ago + default: 12 hours ago - name: service_connections type: object From c7757023668aa2ebce07c3ae42f353c490320b2a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:59:58 -0800 Subject: [PATCH 131/240] honor rsm update with no time when agent receives new GS (#3015) * honor rsm update immediately * pylint * improve msg * address comments * address comments * address comments * added verbose logging --- azurelinuxagent/common/exception.py | 9 ++++ azurelinuxagent/ga/agent_update_handler.py | 52 ++++++++++++------- azurelinuxagent/ga/ga_version_updater.py | 25 ++++----- azurelinuxagent/ga/rsm_version_updater.py | 15 +++--- .../ga/self_update_version_updater.py | 15 +++--- tests/ga/test_agent_update_handler.py | 10 ++++ tests/ga/test_update.py | 6 ++- 7 files changed, 81 insertions(+), 51 deletions(-) diff --git a/azurelinuxagent/common/exception.py b/azurelinuxagent/common/exception.py index 603ed1aa21..42170db854 100644 --- a/azurelinuxagent/common/exception.py +++ b/azurelinuxagent/common/exception.py @@ -84,6 +84,15 @@ def __init__(self, msg=None, inner=None): super(AgentUpdateError, self).__init__(msg, inner) +class AgentFamilyMissingError(AgentError): + """ + When agent family is missing. + """ + + def __init__(self, msg=None, inner=None): + super(AgentFamilyMissingError, self).__init__(msg, inner) + + class CGroupsException(AgentError): """ Exception to classify any cgroups related issue. diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index ed157bdf5b..f342357028 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -15,18 +15,16 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ -import datetime import os from azurelinuxagent.common import conf, logger from azurelinuxagent.common.event import add_event, WALAEventOperation -from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError +from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError, AgentFamilyMissingError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus, VERSION_0 from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_daemon_version -from azurelinuxagent.ga.ga_version_updater import RSMUpdates from azurelinuxagent.ga.rsm_version_updater import RSMVersionUpdater from azurelinuxagent.ga.self_update_version_updater import SelfUpdateVersionUpdater @@ -67,7 +65,7 @@ def __init__(self, protocol): # restore the state of rsm update. Default to self-update if last update is not with RSM. if not self._get_is_last_update_with_rsm(): - self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.min) + self._updater = SelfUpdateVersionUpdater(self._gs_id) else: self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) @@ -117,7 +115,7 @@ def _get_agent_family_manifest(self, goal_state): """ Get the agent_family from last GS for the given family Returns: first entry of Manifest - Exception if no manifests found in the last GS + Exception if no manifests found in the last GS and log it only on new goal state """ family = self._ga_family_type agent_families = goal_state.extensions_goal_state.agent_families @@ -130,11 +128,13 @@ def _get_agent_family_manifest(self, goal_state): agent_family_manifests.append(m) if not family_found: - raise AgentUpdateError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update".format(family, self._gs_id)) + raise AgentFamilyMissingError(u"Agent family: {0} not found in the goal state: {1}, skipping agent update \n" + u"[Note: This error is permanent for this goal state and Will not log same error until we receive new goal state]".format(family, self._gs_id)) if len(agent_family_manifests) == 0: - raise AgentUpdateError( - u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update".format( + raise AgentFamilyMissingError( + u"No manifest links found for agent family: {0} for goal state: {1}, skipping agent update \n" + u"[Note: This error is permanent for this goal state and will not log same error until we receive new goal state]".format( family, self._gs_id)) return agent_family_manifests[0] @@ -145,30 +145,38 @@ def run(self, goal_state, ext_gs_updated): if not conf.get_autoupdate_enabled() or not conf.get_download_new_agents(): return - # verify if agent update is allowed this time (RSM checks new goal state; self-update checks manifest download interval) - if not self._updater.is_update_allowed_this_time(ext_gs_updated): - return + # Update the state only on new goal state + if ext_gs_updated: + self._gs_id = goal_state.extensions_goal_state.id + self._updater.sync_new_gs_id(self._gs_id) - self._gs_id = goal_state.extensions_goal_state.id agent_family = self._get_agent_family_manifest(goal_state) - # updater will return RSM enabled or disabled if we need to switch to self-update or rsm update - updater_mode = self._updater.check_and_switch_updater_if_changed(agent_family, self._gs_id, ext_gs_updated) + # Updater will return True or False if we need to switch the updater + # If self-updater receives RSM update enabled, it will switch to RSM updater + # If RSM updater receives RSM update disabled, it will switch to self-update + # No change in updater if GS not updated + is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated) - if updater_mode == RSMUpdates.Disabled: + if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater): msg = "VM not enabled for RSM updates, switching to self-update mode" logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - self._updater = SelfUpdateVersionUpdater(self._gs_id, datetime.datetime.now()) + self._updater = SelfUpdateVersionUpdater(self._gs_id) self._remove_rsm_update_state() - if updater_mode == RSMUpdates.Enabled: + if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater): msg = "VM enabled for RSM updates, switching to RSM update mode" logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) self._save_rsm_update_state() + # If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition + # RSM checks new goal state; self-update checks manifest download interval + if not self._updater.is_update_allowed_this_time(ext_gs_updated): + return + self._updater.retrieve_agent_version(agent_family, goal_state) if not self._updater.is_retrieved_version_allowed_to_update(agent_family): @@ -183,14 +191,20 @@ def run(self, goal_state, ext_gs_updated): self._updater.proceed_with_update() except Exception as err: + log_error = True if isinstance(err, AgentUpgradeExitException): raise err elif isinstance(err, AgentUpdateError): error_msg = ustr(err) + elif isinstance(err, AgentFamilyMissingError): + error_msg = ustr(err) + # Agent family missing error is permanent in the given goal state, so we don't want to log it on every iteration of main loop if there is no new goal state + log_error = ext_gs_updated else: error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err)) - logger.warn(error_msg) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) + if log_error: + logger.warn(error_msg) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) self._last_attempted_update_error_msg = error_msg def get_vmagent_update_status(self): diff --git a/azurelinuxagent/ga/ga_version_updater.py b/azurelinuxagent/ga/ga_version_updater.py index 0d3f639f25..46ae1f31f1 100644 --- a/azurelinuxagent/ga/ga_version_updater.py +++ b/azurelinuxagent/ga/ga_version_updater.py @@ -30,14 +30,6 @@ from azurelinuxagent.ga.guestagent import GuestAgent -class RSMUpdates(object): - """ - Enum for switching between RSM updates and self updates - """ - Enabled = "Enabled" - Disabled = "Disabled" - - class GAVersionUpdater(object): def __init__(self, gs_id): @@ -53,15 +45,13 @@ def is_update_allowed_this_time(self, ext_gs_updated): """ raise NotImplementedError - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ - checks and raise the updater exception if we need to switch to self-update from rsm update or vice versa + return True if we need to switch to RSM-update from self-update and vice versa. @param agent_family: agent family - @param gs_id: incarnation of the goal state @param ext_gs_updated: True if extension goal state updated else False - @return: RSMUpdates.Disabled: return when agent need to stop rsm updates and switch to self-update - RSMUpdates.Enabled: return when agent need to switch to rsm update - None: return when no need to switch + @return: False when agent need to stop rsm updates + True: when agent need to switch to rsm update """ raise NotImplementedError @@ -107,6 +97,13 @@ def version(self): """ return self._version + def sync_new_gs_id(self, gs_id): + """ + Update gs_id + @param gs_id: goal state id + """ + self._gs_id = gs_id + def download_and_get_new_agent(self, protocol, agent_family, goal_state): """ Function downloads the new agent and returns the downloaded version. diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py index 6df7b6e30e..a7a8bd97d1 100644 --- a/azurelinuxagent/ga/rsm_version_updater.py +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME -from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater from azurelinuxagent.ga.guestagent import GuestAgent @@ -49,24 +49,23 @@ def is_update_allowed_this_time(self, ext_gs_updated): """ return ext_gs_updated - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ Checks if there is a new goal state and decide if we need to continue with rsm update or switch to self-update. - Firstly it checks agent supports GA versioning or not. If not, we return rsm updates disabled to switch to self-update. - if vm is enabled for RSM updates and continue with rsm update, otherwise we return rsm updates disabled to switch to self-update. + Firstly it checks agent supports GA versioning or not. If not, we return false to switch to self-update. + if vm is enabled for RSM updates and continue with rsm update, otherwise we return false to switch to self-update. if either isVersionFromRSM or isVMEnabledForRSMUpgrades or version is missing in the goal state, we ignore the update as we consider it as invalid goal state. """ if ext_gs_updated: - self._gs_id = gs_id if not conf.get_enable_ga_versioning(): - return RSMUpdates.Disabled + return False if agent_family.is_vm_enabled_for_rsm_upgrades is None: raise AgentUpdateError( "Received invalid goal state:{0}, missing isVMEnabledForRSMUpgrades property. So, skipping agent update".format( self._gs_id)) elif not agent_family.is_vm_enabled_for_rsm_upgrades: - return RSMUpdates.Disabled + return False else: if agent_family.is_version_from_rsm is None: raise AgentUpdateError( @@ -77,7 +76,7 @@ def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_update "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( self._gs_id)) - return None + return True def retrieve_agent_version(self, agent_family, goal_state): """ diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index ca27c4399a..6605a28eb3 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -23,7 +23,7 @@ from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import CURRENT_VERSION -from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater, RSMUpdates +from azurelinuxagent.ga.ga_version_updater import GAVersionUpdater class SelfUpdateType(object): @@ -35,9 +35,9 @@ class SelfUpdateType(object): class SelfUpdateVersionUpdater(GAVersionUpdater): - def __init__(self, gs_id, last_attempted_manifest_download_time): + def __init__(self, gs_id): super(SelfUpdateVersionUpdater, self).__init__(gs_id) - self._last_attempted_manifest_download_time = last_attempted_manifest_download_time + self._last_attempted_manifest_download_time = datetime.datetime.min self._last_attempted_self_update_time = datetime.datetime.min @staticmethod @@ -119,14 +119,13 @@ def is_update_allowed_this_time(self, ext_gs_updated): return False return True - def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_updated): + def is_rsm_update_enabled(self, agent_family, ext_gs_updated): """ Checks if there is a new goal state and decide if we need to continue with self-update or switch to rsm update. - if vm is not enabled for RSM updates or agent not supports GA versioning then we continue with self update, otherwise we rsm enabled to switch to rsm update. + if vm is not enabled for RSM updates or agent not supports GA versioning then we continue with self update, otherwise we return true to switch to rsm update. if isVersionFromRSM is missing but isVMEnabledForRSMUpgrades is present in the goal state, we ignore the update as we consider it as invalid goal state. """ if ext_gs_updated: - self._gs_id = gs_id if conf.get_enable_ga_versioning() and agent_family.is_vm_enabled_for_rsm_upgrades is not None and agent_family.is_vm_enabled_for_rsm_upgrades: if agent_family.is_version_from_rsm is None: raise AgentUpdateError( @@ -137,9 +136,9 @@ def check_and_switch_updater_if_changed(self, agent_family, gs_id, ext_gs_update raise AgentUpdateError( "Received invalid goal state:{0}, missing version property. So, skipping agent update".format( self._gs_id)) - return RSMUpdates.Enabled + return True - return None + return False def retrieve_agent_version(self, agent_family, goal_state): """ diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 0ac373a6b2..5ba7f3c70f 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -366,6 +366,16 @@ def test_handles_missing_agent_family(self): 'message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + # making multiple agent update attempts and assert only one time logged + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), False) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), False) + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), + "Agent manifest error should be logged once if it's same goal state") + def test_it_should_report_update_status_with_success(self): data_file = DATA_FILE.copy() data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 37fb75796e..c25585f143 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1784,13 +1784,14 @@ def test_it_should_not_download_anything_if_rsm_version_is_current_version(self) self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), "New agent directory should not be found") - def test_it_should_skip_wait_to_update_if_rsm_version_available(self): + def test_it_should_skip_wait_to_update_immediately_if_rsm_version_available(self): no_of_iterations = 100 def reload_conf(url, protocol): mock_wire_data = protocol.mock_wire_data # This function reloads the conf mid-run to mimic an actual customer scenario + # Setting the rsm request to be sent after some iterations if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts["goalstate"] >= 5: reload_conf.call_count += 1 @@ -1808,7 +1809,8 @@ def reload_conf(url, protocol): data_file = wire_protocol_data.DATA_FILE.copy() data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" - with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf) as (update_handler, mock_telemetry): + # Setting the prod frequency to mimic a real scenario + with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, autoupdate_frequency=6000) as (update_handler, mock_telemetry): update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(20) update_handler.run(debug=True) From 5b4166c98f820349d3f5a5904ab2b476b126b2b8 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 16 Jan 2024 17:59:38 -0800 Subject: [PATCH 132/240] Don't check Agent log from the top after each test suite (#3022) * Don't check Agent log from the top after each test suite * fix initialization of override --------- Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_suite.py | 47 +++++++++---------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 7b2becd593..2a7241d78b 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -573,10 +573,12 @@ def _execute(self) -> None: test_suite_success = False raise + check_log_start_time = datetime.datetime.min + for suite in self._test_suites: log.info("Executing test suite %s", suite.name) self._lisa_log.info("Executing Test Suite %s", suite.name) - case_success = self._execute_test_suite(suite, test_context) + case_success, check_log_start_time = self._execute_test_suite(suite, test_context, check_log_start_time) test_suite_success = case_success and test_suite_success if not case_success: failed_cases.append(suite.name) @@ -611,13 +613,15 @@ def _execute(self) -> None: if not test_suite_success or unexpected_error: raise TestFailedException(self._environment_name, failed_cases) - def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext) -> bool: + def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestContext, check_log_start_time: datetime.datetime) -> Tuple[bool, datetime.datetime]: """ - Executes the given test suite and returns True if all the tests in the suite succeeded. + Executes the given test suite and returns a tuple of a bool indicating whether all the tests in the suite succeeded, and the timestamp that should be used + for the next check of the agent log. """ suite_name = suite.name suite_full_name = f"{suite_name}-{self._environment_name}" suite_start_time: datetime.datetime = datetime.datetime.now() + check_log_start_time_override = datetime.datetime.max # tests can override the timestamp for the agent log check with the get_ignore_errors_before_timestamp() method with set_thread_name(suite_full_name): # The thread name is added to the LISA log log_path: Path = self._log_path / f"{suite_full_name}.log" @@ -631,7 +635,6 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte summary: List[str] = [] ignore_error_rules: List[Dict[str, Any]] = [] - before_timestamp = datetime.datetime.min for test in suite.tests: test_full_name = f"{suite_name}-{test.name}" @@ -705,13 +708,11 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte ignore_error_rules.extend(test_instance.get_ignore_error_rules()) - # If the test has a timestamp before which errors should be ignored in the agent log, use that timestamp - # if multiple tests have this setting, use the earliest timestamp - if test_instance.get_ignore_errors_before_timestamp() != datetime.datetime.min: - if before_timestamp != datetime.datetime.min: - before_timestamp = min(before_timestamp, test_instance.get_ignore_errors_before_timestamp()) - else: - before_timestamp = test_instance.get_ignore_errors_before_timestamp() + # Check if the test is requesting to override the timestamp for the agent log check. + # Note that if multiple tests in the suite provide an override, we'll use the earliest timestamp. + test_check_log_start_time = test_instance.get_ignore_errors_before_timestamp() + if test_check_log_start_time != datetime.datetime.min: + check_log_start_time_override = min(check_log_start_time_override, test_check_log_start_time) if not test_success and test.blocks_suite: log.warning("%s failed and blocks the suite. Stopping suite execution.", test.name) @@ -737,11 +738,12 @@ def _execute_test_suite(self, suite: TestSuiteInfo, test_context: AgentTestConte if not suite_success: self._mark_log_as_failed() - suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, before_timestamp) + next_check_log_start_time = datetime.datetime.utcnow() + suite_success = suite_success and self._check_agent_log_on_test_nodes(ignore_error_rules, check_log_start_time_override if check_log_start_time_override != datetime.datetime.max else check_log_start_time) - return suite_success + return suite_success, next_check_log_start_time - def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]], before_timestamp: datetime) -> bool: + def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any]], check_log_start_time: datetime.datetime) -> bool: """ Checks the agent log on the test nodes for errors; returns true on success (no errors in the logs) """ @@ -759,22 +761,15 @@ def _check_agent_log_on_test_nodes(self, ignore_error_rules: List[Dict[str, Any] start_time: datetime.datetime = datetime.datetime.now() try: - self._lisa_log.info("Checking agent log on the test node %s", node_name) - log.info("Checking agent log on the test node %s", node_name) + message = f"Checking agent log on test node {node_name}, starting at {check_log_start_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}" + self._lisa_log.info(message) + log.info(message) output = ssh_client.run_command("check-agent-log.py -j") errors = json.loads(output, object_hook=AgentLogRecord.from_dictionary) - # Individual tests may have rules to ignore known errors; filter those out - if len(ignore_error_rules) > 0: - new = [] - for e in errors: - # Ignore errors that occurred before the timestamp - if e.timestamp < before_timestamp: - continue - if not AgentLog.matches_ignore_rule(e, ignore_error_rules): - new.append(e) - errors = new + # Filter out errors that occurred before the starting timestamp or that match an ignore rule + errors = [e for e in errors if e.timestamp >= check_log_start_time and (len(ignore_error_rules) == 0 or not AgentLog.matches_ignore_rule(e, ignore_error_rules))] if len(errors) == 0: # If no errors, we are done; don't create a log or test result. From bf3738b0c18a9c86af1389f222758d78471df92e Mon Sep 17 00:00:00 2001 From: Zhidong Peng Date: Fri, 19 Jan 2024 12:58:23 -0800 Subject: [PATCH 133/240] update the proxy agenet log folder for logcollector (#3028) --- azurelinuxagent/ga/logcollector_manifests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/logcollector_manifests.py b/azurelinuxagent/ga/logcollector_manifests.py index fc240d30ca..3548de4fc4 100644 --- a/azurelinuxagent/ga/logcollector_manifests.py +++ b/azurelinuxagent/ga/logcollector_manifests.py @@ -122,6 +122,6 @@ diskinfo, echo,### Gathering Guest ProxyAgent Log Files ### -copy,/var/log/proxyagent/* +copy,/var/log/azure-proxy-agent/* echo, """ From d8beb1af1840c366b526f988f1240ba33c1abb54 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:08:12 -0800 Subject: [PATCH 134/240] Log instance view before asserting (#3029) --- tests_e2e/tests/lib/virtual_machine_extension_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index 699ed7cb4a..d54f881d05 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -135,6 +135,7 @@ def assert_instance_view( additional validations. """ instance_view = self.get_instance_view() + log.info("Instance view:\n%s", instance_view.serialize()) with soft_assertions(): if expected_version is not None: From 3c32d45177a44ca1fb83baffefc6b23780001575 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 1 Feb 2024 09:47:31 -0800 Subject: [PATCH 135/240] Add config parameter to wait for cloud-init (Extensions.WaitForCloudInit) (#3031) * Add config parameter to wait for cloud-init (Extensions.WaitForCloudInit) --------- Co-authored-by: narrieta --- README.md | 24 +++++ azurelinuxagent/common/conf.py | 10 ++ azurelinuxagent/common/event.py | 1 + azurelinuxagent/common/utils/shellutil.py | 39 +++++++- azurelinuxagent/ga/update.py | 20 ++++ tests/common/test_conf.py | 2 + tests/common/utils/test_shell_util.py | 9 +- tests/ga/test_update.py | 61 ++++++++++++- tests/lib/mock_update_handler.py | 4 +- tests/test_agent.py | 2 + .../lib/agent_test_suite_combinator.py | 74 +++++++++++---- tests_e2e/orchestrator/runbook.yml | 2 +- .../test_suites/agent_wait_for_cloud_init.yml | 13 +++ .../disable_agent_provisioning.py | 43 ++------- .../add_cloud_init_script.py | 63 +++++++++++++ .../agent_wait_for_cloud_init.py | 91 +++++++++++++++++++ tests_e2e/tests/lib/update_arm_template.py | 83 +++++++++++++++++ 17 files changed, 476 insertions(+), 65 deletions(-) create mode 100644 tests_e2e/test_suites/agent_wait_for_cloud_init.yml create mode 100755 tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py create mode 100755 tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py diff --git a/README.md b/README.md index 3d3a824e1f..6d0296bfcc 100644 --- a/README.md +++ b/README.md @@ -261,6 +261,30 @@ without the agent. In order to do that, the `provisionVMAgent` flag must be set provisioning time, via whichever API is being used. We will provide more details on this on our wiki when it is generally available. +#### __Extensions.WaitForCloudInit__ + +_Type: Boolean_ +_Default: n_ + +Waits for cloud-init to complete (cloud-init status --wait) before executing VM extensions. + +Both cloud-init and VM extensions are common ways to customize a VM during initial deployment. By +default, the agent will start executing extensions while cloud-init may still be in the 'config' +stage and won't wait for the 'final' stage to complete. Cloud-init and extensions may execute operations +that conflict with each other (for example, both of them may try to install packages). Setting this option +to 'y' ensures that VM extensions are executed only after cloud-init has completed all its stages. + +Note that using this option requires creating a custom image with the value of this option set to 'y', in +order to ensure that the wait is performed during the initial deployment of the VM. + +#### __Extensions.WaitForCloudInitTimeout__ + +_Type: Integer_ +_Default: 3600_ + +Timeout in seconds for the Agent to wait on cloud-init. If the timeout elapses, the Agent will continue +executing VM extensions. See Extensions.WaitForCloudInit for more details. + #### __Extensions.GoalStatePeriod__ _Type: Integer_ diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 57d6c9d280..a13f333576 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -117,6 +117,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Logs.Console": True, "Logs.Collect": True, "Extensions.Enabled": True, + "Extensions.WaitForCloudInit": False, "Provisioning.AllowResetSysUser": False, "Provisioning.RegenerateSshHostKeyPair": False, "Provisioning.DeleteRootPassword": False, @@ -170,6 +171,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): __INTEGER_OPTIONS__ = { "Extensions.GoalStatePeriod": 6, "Extensions.InitialGoalStatePeriod": 6, + "Extensions.WaitForCloudInitTimeout": 3600, "OS.EnableFirewallPeriod": 300, "OS.RemovePersistentNetRulesPeriod": 30, "OS.RootDeviceScsiTimeoutPeriod": 30, @@ -372,6 +374,14 @@ def get_extensions_enabled(conf=__conf__): return conf.get_switch("Extensions.Enabled", True) +def get_wait_for_cloud_init(conf=__conf__): + return conf.get_switch("Extensions.WaitForCloudInit", False) + + +def get_wait_for_cloud_init_timeout(conf=__conf__): + return conf.get_switch("Extensions.WaitForCloudInitTimeout", 3600) + + def get_goal_state_period(conf=__conf__): return conf.get_int("Extensions.GoalStatePeriod", 6) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index fe313968fe..b010583808 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -75,6 +75,7 @@ class WALAEventOperation: CGroupsCleanUp = "CGroupsCleanUp" CGroupsDisabled = "CGroupsDisabled" CGroupsInfo = "CGroupsInfo" + CloudInit = "CloudInit" CollectEventErrors = "CollectEventErrors" CollectEventUnicodeErrors = "CollectEventUnicodeErrors" ConfigurationChange = "ConfigurationChange" diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index 50fd4592f1..d2bfd787ed 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -18,9 +18,17 @@ # import os import subprocess +import sys import tempfile import threading +if sys.version_info[0] == 2: + # TimeoutExpired was introduced on Python 3; define a dummy class for Python 2 + class TimeoutExpired(Exception): + pass +else: + from subprocess import TimeoutExpired + import azurelinuxagent.common.logger as logger from azurelinuxagent.common.future import ustr @@ -206,7 +214,7 @@ def __run_command(command_action, command, log_error, encode_output): # W0622: Redefining built-in 'input' -- disabled: the parameter name mimics subprocess.communicate() -def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_input=True, encode_output=True, track_process=True): # pylint:disable=W0622 +def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, log_error=False, encode_input=True, encode_output=True, track_process=True, timeout=None): # pylint:disable=W0622 """ Executes the given command and returns its stdout. @@ -227,7 +235,9 @@ def run_command(command, input=None, stdin=None, stdout=subprocess.PIPE, stderr= value for these parameters is anything other than the default (subprocess.PIPE)), then the corresponding values returned by this function or the CommandError exception will be empty strings. - Note: This is the preferred method to execute shell commands over `azurelinuxagent.common.utils.shellutil.run` function. + NOTE: The 'timeout' parameter is ignored on Python 2 + + NOTE: This is the preferred method to execute shell commands over `azurelinuxagent.common.utils.shellutil.run` function. """ if input is not None and stdin is not None: raise ValueError("The input and stdin arguments are mutually exclusive") @@ -246,7 +256,30 @@ def command_action(): else: process = subprocess.Popen(command, stdin=popen_stdin, stdout=stdout, stderr=stderr, shell=False) - command_stdout, command_stderr = process.communicate(input=communicate_input) + try: + if sys.version_info[0] == 2: # communicate() doesn't support timeout on Python 2 + command_stdout, command_stderr = process.communicate(input=communicate_input) + else: + command_stdout, command_stderr = process.communicate(input=communicate_input, timeout=timeout) + except TimeoutExpired: + if log_error: + logger.error(u"Command [{0}] timed out", __format_command(command)) + + command_stdout, command_stderr = '', '' + + try: + process.kill() + # try to get any output from the command, but ignore any errors if we can't + try: + command_stdout, command_stderr = process.communicate() + # W0702: No exception type(s) specified (bare-except) + except: # pylint: disable=W0702 + pass + except Exception as exception: + if log_error: + logger.error(u"Can't terminate timed out process: {0}", ustr(exception)) + raise CommandError(command=__format_command(command), return_code=-1, stdout=command_stdout, stderr="command timeout\n{0}".format(command_stderr)) + if track_process: _on_command_completed(process.pid) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 88267b75e2..1a0e362407 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -149,6 +149,8 @@ def __init__(self): self._last_check_memory_usage_time = time.time() self._check_memory_usage_last_error_report = datetime.min + self._cloud_init_completed = False # Only used when Extensions.WaitForCloudInit is enabled; note that this variable is always reset on service start. + # VM Size is reported via the heartbeat, default it here. self._vm_size = None @@ -458,6 +460,22 @@ def _initialize_goal_state(self, protocol): logger.info("The current Fabric goal state is older than the most recent FastTrack goal state; will skip it.\nFabric: {0}\nFastTrack: {1}", egs.created_on_timestamp, last_fast_track_timestamp) + def _wait_for_cloud_init(self): + if conf.get_wait_for_cloud_init() and not self._cloud_init_completed: + message = "Waiting for cloud-init to complete..." + logger.info(message) + add_event(op=WALAEventOperation.CloudInit, message=message) + try: + output = shellutil.run_command(["cloud-init", "status", "--wait"], timeout=conf.get_wait_for_cloud_init_timeout()) + message = "cloud-init completed\n{0}".format(output) + logger.info(message) + add_event(op=WALAEventOperation.CloudInit, message=message) + except Exception as e: + message = "An error occurred while waiting for cloud-init; will proceed to execute VM extensions. Extensions that have conflicts with cloud-init may fail.\n{0}".format(ustr(e)) + logger.error(message) + add_event(op=WALAEventOperation.CloudInit, message=message, is_success=False, log_event=False) + self._cloud_init_completed = True # Mark as completed even on error since we will proceed to execute extensions + def _get_vm_size(self, protocol): """ Including VMSize is meant to capture the architecture of the VM (i.e. arm64 VMs will @@ -562,6 +580,8 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler, agent_ # check for agent updates agent_update_handler.run(self._goal_state, self._processing_new_extensions_goal_state()) + self._wait_for_cloud_init() + try: if self._processing_new_extensions_goal_state(): if not self._extensions_summary.converged: diff --git a/tests/common/test_conf.py b/tests/common/test_conf.py index 972b289a79..1ae951bf9f 100644 --- a/tests/common/test_conf.py +++ b/tests/common/test_conf.py @@ -27,6 +27,8 @@ class TestConf(AgentTestCase): # -- These values *MUST* match those from data/test_waagent.conf EXPECTED_CONFIGURATION = { "Extensions.Enabled": True, + "Extensions.WaitForCloudInit": False, + "Extensions.WaitForCloudInitTimeout": 3600, "Provisioning.Agent": "auto", "Provisioning.DeleteRootPassword": True, "Provisioning.RegenerateSshHostKeyPair": True, diff --git a/tests/common/utils/test_shell_util.py b/tests/common/utils/test_shell_util.py index 3c6afc60e6..5eb5a83a6d 100644 --- a/tests/common/utils/test_shell_util.py +++ b/tests/common/utils/test_shell_util.py @@ -18,13 +18,14 @@ import os import signal import subprocess +import sys import tempfile import threading import unittest from azurelinuxagent.common.future import ustr import azurelinuxagent.common.utils.shellutil as shellutil -from tests.lib.tools import AgentTestCase, patch +from tests.lib.tools import AgentTestCase, patch, skip_if_predicate_true from tests.lib.miscellaneous_tools import wait_for, format_processes @@ -225,6 +226,12 @@ def test_run_command_should_raise_an_exception_when_it_cannot_execute_the_comman self.__it_should_raise_an_exception_when_it_cannot_execute_the_command( lambda: shellutil.run_command("nonexistent_command")) + @skip_if_predicate_true(lambda: sys.version_info[0] == 2, "Timeouts are not supported on Python 2") + def test_run_command_should_raise_an_exception_when_the_command_times_out(self): + with self.assertRaises(shellutil.CommandError) as context: + shellutil.run_command(["sleep", "5"], timeout=1) + self.assertIn("command timeout", context.exception.stderr, "The command did not time out") + def test_run_pipe_should_raise_an_exception_when_it_cannot_execute_the_pipe(self): self.__it_should_raise_an_exception_when_it_cannot_execute_the_command( lambda: shellutil.run_pipe([["ls", "-ld", "."], ["nonexistent_command"], ["wc", "-l"]])) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index c25585f143..aa39ccb55a 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -38,7 +38,7 @@ ExtHandlerPackage, ExtHandlerPackageList, Extension, VMStatus, ExtHandlerStatus, ExtensionStatus, \ VMAgentUpdateStatuses from azurelinuxagent.common.protocol.util import ProtocolUtil -from azurelinuxagent.common.utils import fileutil, textutil, timeutil +from azurelinuxagent.common.utils import fileutil, textutil, timeutil, shellutil from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME, AGENT_STATUS_FILE from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.networkutil import FirewallCmdDirectCommands, AddFirewallRules @@ -980,7 +980,6 @@ def match_expected_info(): match_unexpected_errors() # Match on errors first, they can provide more info. match_expected_info() - def test_it_should_recreate_handler_env_on_service_startup(self): iterations = 5 @@ -1361,6 +1360,64 @@ def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): self.assertFalse(agent.is_blacklisted, "Legacy Agent should not be blacklisted") +class TestUpdateWaitForCloudInit(AgentTestCase): + @staticmethod + @contextlib.contextmanager + def create_mock_run_command(delay=None): + def run_command_mock(cmd, *args, **kwargs): + if cmd == ["cloud-init", "status", "--wait"]: + if delay is not None: + original_run_command(['sleep', str(delay)], *args, **kwargs) + return "cloud-init completed" + return original_run_command(cmd, *args, **kwargs) + original_run_command = shellutil.run_command + + with patch("azurelinuxagent.ga.update.shellutil.run_command", side_effect=run_command_mock) as run_command_patch: + yield run_command_patch + + def test_it_should_not_wait_for_cloud_init_by_default(self): + update_handler = UpdateHandler() + with self.create_mock_run_command() as run_command_patch: + update_handler._wait_for_cloud_init() + self.assertTrue(run_command_patch.call_count == 0, "'cloud-init status --wait' should not be called by default") + + def test_it_should_wait_for_cloud_init_when_requested(self): + update_handler = UpdateHandler() + with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init", return_value=True): + with self.create_mock_run_command() as run_command_patch: + update_handler._wait_for_cloud_init() + self.assertEqual(1, run_command_patch.call_count, "'cloud-init status --wait' should have be called once") + + @skip_if_predicate_true(lambda: sys.version_info[0] == 2, "Timeouts are not supported on Python 2") + def test_it_should_enforce_timeout_waiting_for_cloud_init(self): + update_handler = UpdateHandler() + with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init", return_value=True): + with patch("azurelinuxagent.ga.update.conf.get_wait_for_cloud_init_timeout", return_value=1): + with self.create_mock_run_command(delay=5): + with patch("azurelinuxagent.ga.update.logger.error") as mock_logger: + update_handler._wait_for_cloud_init() + call_args = [args for args, _ in mock_logger.call_args_list if "An error occurred while waiting for cloud-init" in args[0]] + self.assertTrue( + len(call_args) == 1 and len(call_args[0]) == 1 and "command timeout" in call_args[0][0], + "Expected a timeout waiting for cloud-init. Log calls: {0}".format(mock_logger.call_args_list)) + + def test_update_handler_should_wait_for_cloud_init_after_agent_update_and_before_extension_processing(self): + method_calls = [] + + agent_update_handler = Mock() + agent_update_handler.run = lambda *_, **__: method_calls.append("AgentUpdateHandler.run()") + + exthandlers_handler = Mock() + exthandlers_handler.run = lambda *_, **__: method_calls.append("ExtHandlersHandler.run()") + + with mock_wire_protocol(DATA_FILE) as protocol: + with mock_update_handler(protocol, iterations=1, agent_update_handler=agent_update_handler, exthandlers_handler=exthandlers_handler) as update_handler: + with patch('azurelinuxagent.ga.update.UpdateHandler._wait_for_cloud_init', side_effect=lambda *_, **__: method_calls.append("UpdateHandler._wait_for_cloud_init()")): + update_handler.run() + + self.assertListEqual(["AgentUpdateHandler.run()", "UpdateHandler._wait_for_cloud_init()", "ExtHandlersHandler.run()"], method_calls, "Wait for cloud-init should happen after agent update and before extension processing") + + class UpdateHandlerRunTestCase(AgentTestCase): def _test_run(self, autoupdate_enabled=False, check_daemon_running=False, expected_exit_code=0, emit_restart_event=None): fileutil.write_file(conf.get_agent_pid_file_path(), ustr(42)) diff --git a/tests/lib/mock_update_handler.py b/tests/lib/mock_update_handler.py index f0b311abe2..03d7a44521 100644 --- a/tests/lib/mock_update_handler.py +++ b/tests/lib/mock_update_handler.py @@ -86,9 +86,9 @@ def patch_object(target, attribute): try: with patch("azurelinuxagent.ga.exthandlers.get_exthandlers_handler", return_value=exthandlers_handler): - with patch("azurelinuxagent.ga.agent_update_handler.get_agent_update_handler", return_value=agent_update_handler): + with patch("azurelinuxagent.ga.update.get_agent_update_handler", return_value=agent_update_handler): with patch("azurelinuxagent.ga.remoteaccess.get_remote_access_handler", return_value=remote_access_handler): - with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): + with patch("azurelinuxagent.ga.update.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch.object(UpdateHandler, "is_running", PropertyMock(side_effect=is_running)): with patch('azurelinuxagent.ga.update.time.sleep', side_effect=lambda _: mock_sleep(0.001)) as sleep: with patch('sys.exit', side_effect=lambda _: 0) as mock_exit: diff --git a/tests/test_agent.py b/tests/test_agent.py index 414faa7266..0da6a2a853 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -53,6 +53,8 @@ Extensions.Enabled = True Extensions.GoalStatePeriod = 6 Extensions.InitialGoalStatePeriod = 6 +Extensions.WaitForCloudInit = False +Extensions.WaitForCloudInitTimeout = 3600 HttpProxy.Host = None HttpProxy.Port = None Lib.Dir = /var/lib/waagent diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index fbe53a1bdc..4b650e8641 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -159,15 +159,25 @@ def create_environment_list(self) -> List[Dict[str, Any]]: for image in images_info: if image in skip_images_info: continue - # 'image.urn' can actually be the URL to a VHD if the runbook provided it in the 'image' parameter + # 'image.urn' can actually be the URL to a VHD or an image from a gallery if the runbook provided it in the 'image' parameter if self._is_vhd(image.urn): marketplace_image = "" vhd = image.urn image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd") + shared_gallery = "" + elif self._is_image_from_gallery(image.urn): + marketplace_image = "" + vhd = "" + image_name = self._get_name_of_image_from_gallery(image.urn) + shared_gallery = image.urn else: marketplace_image = image.urn vhd = "" image_name = self._get_image_name(image.urn) + shared_gallery = "" + + if test_suite_info.executes_on_scale_set and (vhd != "" or shared_gallery != ""): + raise Exception("VHDS and images from galleries are currently not supported on scale sets.") location: str = self._get_location(test_suite_info, image) if location is None: @@ -194,6 +204,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: env_name=f"{image_name}-{test_suite_info.name}", marketplace_image=marketplace_image, vhd=vhd, + shared_gallery=shared_gallery, location=location, vm_size=vm_size, test_suite_info=test_suite_info) @@ -206,9 +217,6 @@ def create_environment_list(self) -> List[Dict[str, Any]]: env["c_test_suites"].append(test_suite_info) else: if test_suite_info.executes_on_scale_set: - # TODO: Add support for VHDs - if vhd != "": - raise Exception("VHDS are currently not supported on scale sets.") env = self.create_vmss_environment( env_name=env_name, marketplace_image=marketplace_image, @@ -220,18 +228,18 @@ def create_environment_list(self) -> List[Dict[str, Any]]: env_name=env_name, marketplace_image=marketplace_image, vhd=vhd, + shared_gallery=shared_gallery, location=location, vm_size=vm_size, test_suite_info=test_suite_info) shared_environments[env_name] = env - if test_suite_info.template != '': - vm_tags = env.get("vm_tags") - if vm_tags is not None: - if "templates" not in vm_tags: - vm_tags["templates"] = test_suite_info.template - else: - vm_tags["templates"] += "," + test_suite_info.template + if test_suite_info.template != '': + vm_tags = env["vm_tags"] + if "templates" not in vm_tags: + vm_tags["templates"] = test_suite_info.template + else: + vm_tags["templates"] += "," + test_suite_info.template environments.extend(shared_environments.values()) @@ -330,7 +338,7 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } - def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: + def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, shared_gallery: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: # # Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template # hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "templates" and pass the @@ -339,11 +347,9 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, # share the same test environment. Similarly, we use a dummy VM tag named "allow_ssh" to pass the value of the "allow_ssh" runbook parameter. # vm_tags = {} - if test_suite_info.template != '': - vm_tags["templates"] = test_suite_info.template if self.runbook.allow_ssh != '': vm_tags["allow_ssh"] = self.runbook.allow_ssh - return { + environment = { "c_platform": [ { "type": "azure", @@ -366,6 +372,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "azure": { "marketplace": marketplace_image, "vhd": vhd, + "shared_gallery": shared_gallery, "location": location, "vm_size": vm_size } @@ -383,6 +390,18 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "vm_tags": vm_tags } + if shared_gallery != '': + # Currently all the images in our shared gallery require secure boot + environment['c_platform'][0]['requirement']["features"] = { + "items": [ + { + "type": "Security_Profile", + "security_profile": "secureboot" + } + ] + } + return environment + def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]: return { "c_platform": [ @@ -406,7 +425,8 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio "c_location": location, "c_image": marketplace_image, "c_is_vhd": False, - "c_vm_size": vm_size + "c_vm_size": vm_size, + "vm_tags": {} } def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: @@ -420,12 +440,12 @@ def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: if images is not None: return images - # If it is not image or image set, it must be a URN or VHD - if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image): - raise Exception(f"The 'image' parameter must be an image, an image set name, a urn, or a vhd: {self.runbook.image}") + # If it is not image or image set, it must be a URN, VHD, or an image from a gallery + if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image) and not self._is_image_from_gallery(self.runbook.image): + raise Exception(f"The 'image' parameter must be an image, image set name, urn, vhd, or an image from a shared gallery: {self.runbook.image}") i = VmImageInfo() - i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD + i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD, or an image from a shared gallery i.locations = [] i.vm_sizes = [] @@ -536,6 +556,20 @@ def _is_vhd(vhd: str) -> bool: parsed = urllib.parse.urlparse(vhd) return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != "" + # Images from a gallery are given as "//". + _IMAGE_FROM_GALLERY = re.compile(r"(?P[^/]+)/(?P[^/]+)/(?P[^/]+)") + + @staticmethod + def _is_image_from_gallery(image: str) -> bool: + return AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) is not None + + @staticmethod + def _get_name_of_image_from_gallery(image: str) -> bool: + match = AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) + if match is None: + raise Exception(f"Invalid image from gallery: {image}") + return match.group('image') + @staticmethod def _report_test_result( suite_name: str, diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 9181e9189c..ed0b816b12 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update, agent_wait_for_cloud_init" # # Parameters used to create test VMs diff --git a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml new file mode 100644 index 0000000000..727803811e --- /dev/null +++ b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml @@ -0,0 +1,13 @@ +# +# This test verifies that the Agent waits for cloud-init to complete before it starts processing extensions. +# +# NOTE: This test is not fully automated. It requires a custom image where the test Agent has been installed and Extensions.WaitForCloudInit is enabled in waagent.conf. +# To execute it manually, create a custom image and use the 'image' runbook parameter, for example: "-v: image:gallery/wait-cloud-init/1.0.1". +# +name: "AgentWaitForCloudInit" +tests: + - "agent_wait_for_cloud_init/agent_wait_for_cloud_init.py" +template: "agent_wait_for_cloud_init/add_cloud_init_script.py" +install_test_agent: false +# Dummy image, since the parameter is required. The actual image needs to be passed as a parameter to the runbook. +images: "ubuntu_2204" diff --git a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py index 6f0a562cd2..af3bc738a5 100755 --- a/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py +++ b/tests_e2e/tests/agent_not_provisioned/disable_agent_provisioning.py @@ -32,18 +32,11 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # # NOTE: LISA's template uses this function to generate the value for osProfile.linuxConfiguration. The function is - # under the 'lisa' namespace. + # under the 'lisa' namespace. We set 'provisionVMAgent' to False. # # "getLinuxConfiguration": { # "parameters": [ - # { - # "name": "keyPath", - # "type": "string" - # }, - # { - # "name": "publicKeyData", - # "type": "string" - # } + # ... # ], # "output": { # "type": "object", @@ -62,31 +55,9 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # } # } # - # The code below sets template['functions'][i]['members']['getLinuxConfiguration']['output']['value']['provisionVMAgent'] to True, - # where template['functions'][i] is the 'lisa' namespace. - # - functions = template.get("functions") - if functions is None: - raise Exception('Cannot find "functions" in the LISA template.') - for namespace in functions: - name = namespace.get("namespace") - if name is None: - raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') - if name == "lisa": - members = namespace.get('members') - if members is None: - raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') - get_linux_configuration = members.get('getLinuxConfiguration') - if get_linux_configuration is None: - raise Exception(f'Cannot find the "getLinuxConfiguration" function the lisa namespace in the LISA template: {namespace}') - output = get_linux_configuration.get('output') - if output is None: - raise Exception(f'Cannot find the "output" of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}') - value = output.get('value') - if value is None: - raise Exception(f"Cannot find the output's value of the getLinuxConfiguration function in the LISA template: {get_linux_configuration}") - value['provisionVMAgent'] = False - break - else: - raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}') + get_linux_configuration = self.get_lisa_function(template, 'getLinuxConfiguration') + output = self.get_function_output(get_linux_configuration) + if output.get('customData') is not None: + raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_linux_configuration}") + output['provisionVMAgent'] = False diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py new file mode 100755 index 0000000000..1fbc60adc4 --- /dev/null +++ b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 + +from typing import Any, Dict + +from tests_e2e.tests.agent_wait_for_cloud_init.agent_wait_for_cloud_init import AgentWaitForCloudInit +from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate + + +class AddCloudInitScript(UpdateArmTemplate): + """ + Adds AgentWaitForCloudInit.CloudInitScript to the ARM template as osProfile.customData. + """ + def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: + if not is_lisa_template: + raise Exception('This test can only customize LISA ARM templates.') + + # + # cloud-init configuration needs to be added in the osProfile.customData property as a base64-encoded string. + # + # LISA uses the getOSProfile function to generate the value for osProfile; add customData to its output, checking that we do not + # override any existing value (the current LISA template does not have any). + # + # "getOSProfile": { + # "parameters": [ + # ... + # ], + # "output": { + # "type": "object", + # "value": { + # "computername": "[parameters('computername')]", + # "adminUsername": "[parameters('admin_username')]", + # "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]", + # "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]" + # } + # } + # } + # + encoded_script = base64.b64encode(AgentWaitForCloudInit.CloudInitScript.encode('utf-8')).decode('utf-8') + + get_os_profile = self.get_lisa_function(template, 'getOSProfile') + output = self.get_function_output(get_os_profile) + if output.get('customData') is not None: + raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_os_profile}") + output['customData'] = encoded_script + diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py b/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py new file mode 100755 index 0000000000..d9b4ecaef1 --- /dev/null +++ b/tests_e2e/tests/agent_wait_for_cloud_init/agent_wait_for_cloud_init.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +from assertpy import fail + +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient + + +class AgentWaitForCloudInit(AgentVmTest): + """ + This test verifies that the Agent waits for cloud-init to complete before it starts processing extensions. + + To do this, it adds 'CloudInitScript' in cloud-init's custom data. The script ensures first that the Agent + is waiting for cloud-init, and then sleeps for a couple of minutes before completing. The scripts appends + a set of known messages to waagent.log, and the test simply verifies that the messages are present in the + log in the expected order, and that they occur before the Agent reports that it is processing extensions. + """ + CloudInitScript = """#!/usr/bin/env bash + set -euox pipefail + + echo ">>> $(date) cloud-init script begin" >> /var/log/waagent.log + while ! grep 'Waiting for cloud-init to complete' /var/log/waagent.log; do + sleep 15 + done + echo ">>> $(date) The Agent is waiting for cloud-init, will pause for a couple of minutes" >> /var/log/waagent.log + sleep 120 + echo ">>> $(date) cloud-init script end" >> /var/log/waagent.log + """ + + def run(self): + ssh_client: SshClient = self._context.create_ssh_client() + + log.info("Waiting for Agent to start processing extensions") + for _ in range(15): + try: + ssh_client.run_command("grep 'ProcessExtensionsGoalState started' /var/log/waagent.log") + break + except CommandError: + log.info("The Agent has not started to process extensions, will check again after a short delay") + time.sleep(60) + else: + raise Exception("Timeout while waiting for the Agent to start processing extensions") + + log.info("The Agent has started to process extensions") + + output = ssh_client.run_command( + "grep -E '^>>>|" + + "INFO ExtHandler ExtHandler cloud-init completed|" + + "INFO ExtHandler ExtHandler ProcessExtensionsGoalState started' /var/log/waagent.log") + + output = output.rstrip().splitlines() + + expected = [ + 'cloud-init script begin', + 'The Agent is waiting for cloud-init, will pause for a couple of minutes', + 'cloud-init script end', + 'cloud-init completed', + 'ProcessExtensionsGoalState started' + ] + + indent = lambda lines: "\n".join([f" {ln}" for ln in lines]) + if len(output) == len(expected) and all([expected[i] in output[i] for i in range(len(expected))]): + log.info("The Agent waited for cloud-init before processing extensions.\nLog messages:\n%s", indent(output)) + else: + fail(f"The Agent did not wait for cloud-init before processing extensions.\nExpected:\n{indent(expected)}\nActual:\n{indent(output)}") + + +if __name__ == "__main__": + AgentWaitForCloudInit.run_from_command_line() + diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index af69fba048..010178ab9c 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -55,4 +55,87 @@ def get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, ty return item raise KeyError(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") + @staticmethod + def get_lisa_function(template: Dict[str, Any], function_name: str) -> Dict[str, Any]: + """ + Looks for the given function name in the LISA namespace and returns its definition. Raises KeyError if the function is not found. + """ + # + # NOTE: LISA's functions are in the "lisa" namespace, for example: + # + # "functions": [ + # { + # "namespace": "lisa", + # "members": { + # "getOSProfile": { + # "parameters": [ + # { + # "name": "computername", + # "type": "string" + # }, + # etc. + # ], + # "output": { + # "type": "object", + # "value": { + # "computername": "[parameters('computername')]", + # "adminUsername": "[parameters('admin_username')]", + # "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]", + # "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]" + # } + # } + # }, + # } + # } + # ] + functions = template.get("functions") + if functions is None: + raise Exception('Cannot find "functions" in the LISA template.') + for namespace in functions: + name = namespace.get("namespace") + if name is None: + raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') + if name == "lisa": + lisa_functions = namespace.get('members') + if lisa_functions is None: + raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') + function_definition = lisa_functions.get(function_name) + if function_definition is None: + raise KeyError(f'Cannot find function {function_name} in the lisa namespace in the LISA template: {namespace}') + return function_definition + raise Exception(f'Cannot find the "lisa" namespace in the LISA template: {functions}') + + @staticmethod + def get_function_output(function: Dict[str, Any]) -> Dict[str, Any]: + """ + Returns the "value" property of the output for the given function. + + Sample function: + + { + "parameters": [ + { + "name": "computername", + "type": "string" + }, + etc. + ], + "output": { + "type": "object", + "value": { + "computername": "[parameters('computername')]", + "adminUsername": "[parameters('admin_username')]", + "adminPassword": "[if(parameters('has_password'), parameters('admin_password'), json('null'))]", + "linuxConfiguration": "[if(parameters('has_linux_configuration'), parameters('linux_configuration'), json('null'))]" + } + } + } + """ + output = function.get('output') + if output is None: + raise Exception(f'Cannot find the "output" of the given function: {function}') + value = output.get('value') + if value is None: + raise Exception(f"Cannot find the output's value of the given function: {function}") + return value From cc689f5b8a7c51385e5fa3bd4859500147b5d0cf Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:34:24 -0800 Subject: [PATCH 136/240] Revert changes to publish_hostname in RedhatOSModernUtil (#3032) * Revert changes to publish_hostname in RedhatOSModernUtil * Fix pylint bad-super-call --- azurelinuxagent/common/osutil/redhat.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py index 05a4b659df..2d8ff3d1e5 100644 --- a/azurelinuxagent/common/osutil/redhat.py +++ b/azurelinuxagent/common/osutil/redhat.py @@ -192,3 +192,10 @@ def restart_if(self, ifname, retries=3, wait=5): time.sleep(wait) else: logger.warn("exceeded restart retries") + + def publish_hostname(self, hostname): + # RedhatOSUtil was updated to conditionally run NetworkManager restart in response to a race condition between + # NetworkManager restart and the agent restarting the network interface during publish_hostname. Keeping the + # NetworkManager restart in RedhatOSModernUtil because the issue was not reproduced on these versions. + shellutil.run("service NetworkManager restart") + DefaultOSUtil.publish_hostname(self, hostname) From 20f06702462cf56cb6a96e6ec866deccb92f1cd3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 2 Feb 2024 14:36:30 -0800 Subject: [PATCH 137/240] Remove agent_wait_for_cloud_init from automated runs (#3034) Co-authored-by: narrieta --- tests_e2e/orchestrator/runbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index ed0b816b12..9181e9189c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update, agent_wait_for_cloud_init" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update" # # Parameters used to create test VMs From 4b484b8b9d3fd62eb3dc8ed9e591c66bd8b232e6 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:09:38 -0800 Subject: [PATCH 138/240] Adding AutoUpdate.UpdateToLatestVersion new flag support (#3020) * support new flag * address comments * added more info * updated * address comments * resolving comment * updated --- README.md | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6d0296bfcc..5a5b126f2f 100644 --- a/README.md +++ b/README.md @@ -297,19 +297,38 @@ _Note_: setting up this parameter to more than a few minutes can make the state the VM be reported as unresponsive/unavailable on the Azure portal. Also, this setting affects how fast the agent starts executing extensions. -#### __AutoUpdate.Enabled__ +#### __AutoUpdate.UpdateToLatestVersion__ -_Type: Boolean_ +_Type: Boolean_ _Default: y_ -Enables auto-update of the Extension Handler. The Extension Handler is responsible +Enables auto-update of the Extension Handler. The Extension Handler is responsible for managing extensions and reporting VM status. The core functionality of the agent -is contained in the Extension Handler, and we encourage users to enable this option +is contained in the Extension Handler, and we encourage users to enable this option in order to maintain an up to date version. + +When this option is enabled, the Agent will install new versions when they become +available. When disabled, the Agent will not install any new versions, but it will use +the most recent version already installed on the VM. -On most distros the default value is 'y'. +_Notes_: +1. This option was added on version 2.10.0.8 of the Agent. For previous versions, see AutoUpdate.Enabled. +2. If both options are specified in waagent.conf, AutoUpdate.UpdateToLatestVersion overrides the value set for AutoUpdate.Enabled. +3. Changing config option requires a service restart to pick up the updated setting. + +For more information on the agent version, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output).
+For more information on the agent update, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#how-auto-update-works-for-extension-handler).
+For more information on the AutoUpdate.UpdateToLatestVersion vs AutoUpdate.Enabled, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion).
+ +#### __AutoUpdate.Enabled__ -For more information on the agent version, see our [FAQ](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output). +_Type: Boolean_ +_Default: y_ + +Enables auto-update of the Extension Handler. This flag is supported for legacy reasons and we strongly recommend using AutoUpdate.UpdateToLatestVersion instead. +The difference between these 2 flags is that, when set to 'n', AutoUpdate.Enabled will use the version of the Extension Handler that is pre-installed on the image, while AutoUpdate.UpdateToLatestVersion will use the most recent version that has already been installed on the VM (via auto-update). + +On most distros the default value is 'y'. #### __Provisioning.Agent__ From b846a11efab9d33c162f4cbb0a88e505a27dec59 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 5 Feb 2024 11:42:49 -0800 Subject: [PATCH 139/240] Retry get instance view if only name property is present (#3036) * Retry get instance view if incomplete during assertions * Retry getting instance view if only name property is present --- .../tests/lib/virtual_machine_extension_client.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/lib/virtual_machine_extension_client.py b/tests_e2e/tests/lib/virtual_machine_extension_client.py index d54f881d05..d1f3e61a15 100644 --- a/tests_e2e/tests/lib/virtual_machine_extension_client.py +++ b/tests_e2e/tests/lib/virtual_machine_extension_client.py @@ -18,7 +18,7 @@ # # This module includes facilities to execute VM extension operations (enable, remove, etc). # - +import json import uuid from assertpy import assert_that, soft_assertions @@ -134,8 +134,15 @@ def assert_instance_view( If 'assert_function' is provided, it is invoked passing as parameter the instance view. This function can be used to perform additional validations. """ + # Sometimes we get incomplete instance view with only 'name' property which causes issues during assertions. + # Retry attempt to get instance view if only 'name' property is populated. + attempt = 1 instance_view = self.get_instance_view() - log.info("Instance view:\n%s", instance_view.serialize()) + while instance_view.name is not None and instance_view.type_handler_version is None and instance_view.statuses is None and attempt < 3: + log.info("Instance view is incomplete: %s\nRetrying attempt to get instance view...", instance_view.serialize()) + instance_view = self.get_instance_view() + attempt += 1 + log.info("Instance view:\n%s", json.dumps(instance_view.serialize(), indent=4)) with soft_assertions(): if expected_version is not None: From 25d71034507c93dc453984320d9d013fa5f8c6ff Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 5 Feb 2024 14:18:59 -0800 Subject: [PATCH 140/240] Fix regex in agent extension workflow (#3035) --- ...low-validate_no_lag_between_agent_start_and_gs_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py index 0b492d8153..7f328398b8 100755 --- a/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py +++ b/tests_e2e/tests/scripts/agent_ext_workflow-validate_no_lag_between_agent_start_and_gs_processing.py @@ -43,7 +43,7 @@ def main(): # Example: Agent WALinuxAgent-2.2.47.2 is running as the goal state agent agent_started_regex = r"Azure Linux Agent \(Goal State Agent version [0-9.]+\)" - gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[(?P[a-z_\d]{13,14})\s(?P\d+)\sms\]" + gs_completed_regex = r"ProcessExtensionsGoalState completed\s\[(?P[a-z]+_\d+)\s(?P\d+)\sms\]" verified_atleast_one_log_line = False verified_atleast_one_agent_started_log_line = False From 0cd86173d40fc97a8eb21e199c2409d2d334fee4 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:52:44 -0800 Subject: [PATCH 141/240] Recover primary nic if down after publishing hostname in RedhatOSUtil (#3024) * Check nic state and recover if down: * Fix typo * Fix state comparison * Fix pylint errors * Fix string comparison * Report publish hostname failure in calling thread * Add todo to check nic state for all distros where we reset network * Update detection to check connection state and separate recover from publish * Pylint unused argument * refactor recover_nic argument * Network interface e2e test * e2e test for recovering the network interface on redhat distros * Only run scenario on distros which use RedhatOSUtil * Fix call to parent publish_hostname to include recover_nic arg * Update comments in default os util * Remove comment * Fix comment * Do not do detection/recover on RedhatOSMOdernUtil * Resolve PR comments * Make script executable * Revert pypy change * Fix publish hostname paramters --- azurelinuxagent/common/event.py | 1 + azurelinuxagent/common/osutil/default.py | 11 +- azurelinuxagent/common/osutil/gaia.py | 2 +- azurelinuxagent/common/osutil/iosxe.py | 4 +- azurelinuxagent/common/osutil/redhat.py | 89 ++++++++++- azurelinuxagent/common/osutil/suse.py | 2 +- azurelinuxagent/ga/env.py | 6 +- tests_e2e/test_suites/images.yml | 8 + .../test_suites/recover_network_interface.yml | 17 +++ .../recover_network_interface.py | 143 ++++++++++++++++++ ...ver_network_interface-get_nm_controlled.py | 39 +++++ 11 files changed, 308 insertions(+), 14 deletions(-) create mode 100644 tests_e2e/test_suites/recover_network_interface.yml create mode 100644 tests_e2e/tests/recover_network_interface/recover_network_interface.py create mode 100755 tests_e2e/tests/scripts/recover_network_interface-get_nm_controlled.py diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index b010583808..435a95e270 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -95,6 +95,7 @@ class WALAEventOperation: HealthCheck = "HealthCheck" HealthObservation = "HealthObservation" HeartBeat = "HeartBeat" + HostnamePublishing = "HostnamePublishing" HostPlugin = "HostPlugin" HostPluginHeartbeat = "HostPluginHeartbeat" HostPluginHeartbeatExtended = "HostPluginHeartbeatExtended" diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 69e20bea7a..c52146ca7c 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -1190,11 +1190,20 @@ def restart_if(self, ifname, retries=3, wait=5): else: logger.warn("exceeded restart retries") - def publish_hostname(self, hostname): + def check_and_recover_nic_state(self, ifname): + # TODO: This should be implemented for all distros where we reset the network during publishing hostname. Currently it is only implemented in RedhatOSUtil. + pass + + def publish_hostname(self, hostname, recover_nic=False): + """ + Publishes the provided hostname. + """ self.set_dhcp_hostname(hostname) self.set_hostname_record(hostname) ifname = self.get_if_name() self.restart_if(ifname) + if recover_nic: + self.check_and_recover_nic_state(ifname) def set_scsi_disks_timeout(self, timeout): for dev in os.listdir("/sys/block"): diff --git a/azurelinuxagent/common/osutil/gaia.py b/azurelinuxagent/common/osutil/gaia.py index 8271163c2d..849d5d1fa1 100644 --- a/azurelinuxagent/common/osutil/gaia.py +++ b/azurelinuxagent/common/osutil/gaia.py @@ -202,7 +202,7 @@ def set_hostname(self, hostname): def set_dhcp_hostname(self, hostname): logger.warn('set_dhcp_hostname is ignored on GAiA') - def publish_hostname(self, hostname): + def publish_hostname(self, hostname, recover_nic=False): logger.warn('publish_hostname is ignored on GAiA') def del_account(self, username): diff --git a/azurelinuxagent/common/osutil/iosxe.py b/azurelinuxagent/common/osutil/iosxe.py index ace28f073c..4ff2b9d974 100644 --- a/azurelinuxagent/common/osutil/iosxe.py +++ b/azurelinuxagent/common/osutil/iosxe.py @@ -58,12 +58,12 @@ def set_hostname(self, hostname): logger.warn("[{0}] failed with error: {1}, attempting fallback".format(' '.join(hostnamectl_cmd), ustr(e))) DefaultOSUtil.set_hostname(self, hostname) - def publish_hostname(self, hostname): + def publish_hostname(self, hostname, recover_nic=False): """ Restart NetworkManager first before publishing hostname """ shellutil.run("service NetworkManager restart") - super(IosxeOSUtil, self).publish_hostname(hostname) + super(IosxeOSUtil, self).publish_hostname(hostname, recover_nic) def register_agent_service(self): return shellutil.run("systemctl enable waagent", chk_err=False) diff --git a/azurelinuxagent/common/osutil/redhat.py b/azurelinuxagent/common/osutil/redhat.py index 2d8ff3d1e5..a9a103477b 100644 --- a/azurelinuxagent/common/osutil/redhat.py +++ b/azurelinuxagent/common/osutil/redhat.py @@ -117,12 +117,11 @@ def set_hostname(self, hostname): logger.warn("[{0}] failed, attempting fallback".format(' '.join(hostnamectl_cmd))) DefaultOSUtil.set_hostname(self, hostname) - def get_nm_controlled(self): - ifname = self.get_if_name() + def get_nm_controlled(self, ifname): filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname) nm_controlled_cmd = ['grep', 'NM_CONTROLLED=', filepath] try: - result = shellutil.run_command(nm_controlled_cmd, log_error=False, encode_output=False).rstrip() + result = shellutil.run_command(nm_controlled_cmd, log_error=False).rstrip() if result and len(result.split('=')) > 1: # Remove trailing white space and ' or " characters @@ -140,17 +139,87 @@ def get_nm_controlled(self): return True - def publish_hostname(self, hostname): + def get_nic_operational_and_general_states(self, ifname): + """ + Checks the contents of /sys/class/net/{ifname}/operstate and the results of 'nmcli -g general.state device show {ifname}' to determine the state of the provided interface. + Raises an exception if the network interface state cannot be determined. + """ + filepath = "/sys/class/net/{0}/operstate".format(ifname) + nic_general_state_cmd = ['nmcli', '-g', 'general.state', 'device', 'show', ifname] + if not os.path.isfile(filepath): + msg = "Unable to determine primary network interface {0} state, because state file does not exist: {1}".format(ifname, filepath) + logger.warn(msg) + raise Exception(msg) + + try: + nic_oper_state = fileutil.read_file(filepath).rstrip().lower() + nic_general_state = shellutil.run_command(nic_general_state_cmd, log_error=True).rstrip().lower() + if nic_oper_state != "up": + logger.warn("The primary network interface {0} operational state is '{1}'.".format(ifname, nic_oper_state)) + else: + logger.info("The primary network interface {0} operational state is '{1}'.".format(ifname, nic_oper_state)) + if nic_general_state != "100 (connected)": + logger.warn("The primary network interface {0} general state is '{1}'.".format(ifname, nic_general_state)) + else: + logger.info("The primary network interface {0} general state is '{1}'.".format(ifname, nic_general_state)) + return nic_oper_state, nic_general_state + except Exception as e: + msg = "Unexpected error while determining the primary network interface state: {0}".format(e) + logger.warn(msg) + raise Exception(msg) + + def check_and_recover_nic_state(self, ifname): + """ + Checks if the provided network interface is in an 'up' state. If the network interface is in a 'down' state, + attempt to recover the interface by restarting the Network Manager service. + + Raises an exception if an attempt to bring the interface into an 'up' state fails, or if the state + of the network interface cannot be determined. + """ + nic_operstate, nic_general_state = self.get_nic_operational_and_general_states(ifname) + if nic_operstate == "down" or "disconnected" in nic_general_state: + logger.info("Restarting the Network Manager service to recover network interface {0}".format(ifname)) + self.restart_network_manager() + # Interface does not come up immediately after NetworkManager restart. Wait 5 seconds before checking + # network interface state. + time.sleep(5) + nic_operstate, nic_general_state = self.get_nic_operational_and_general_states(ifname) + # It is possible for network interface to be in an unknown or unmanaged state. Log warning if state is not + # down, disconnected, up, or connected + if nic_operstate != "up" or nic_general_state != "100 (connected)": + msg = "Network Manager restart failed to bring network interface {0} into 'up' and 'connected' state".format(ifname) + logger.warn(msg) + raise Exception(msg) + else: + logger.info("Network Manager restart successfully brought the network interface {0} into 'up' and 'connected' state".format(ifname)) + elif nic_operstate != "up" or nic_general_state != "100 (connected)": + # We already logged a warning with the network interface state in get_nic_operstate(). Raise an exception + # for the env thread to send to telemetry. + raise Exception("The primary network interface {0} operational state is '{1}' and general state is '{2}'.".format(ifname, nic_operstate, nic_general_state)) + + def restart_network_manager(self): + shellutil.run("service NetworkManager restart") + + def publish_hostname(self, hostname, recover_nic=False): """ Restart NetworkManager first before publishing hostname, only if the network interface is not controlled by the NetworkManager service (as determined by NM_CONTROLLED=n in the interface configuration). If the NetworkManager service is restarted before the agent publishes the hostname, and NM_controlled=y, a race condition may happen between the NetworkManager service and the Guest Agent making changes to the network interface configuration simultaneously. + + Note: check_and_recover_nic_state(ifname) raises an Exception if an attempt to recover the network interface + fails, or if the network interface state cannot be determined. Callers should handle this exception by sending + an event to telemetry. + + TODO: Improve failure reporting and add success reporting to telemetry for hostname changes. Right now we are only reporting failures to telemetry by raising an Exception in publish_hostname for the calling thread to handle by reporting the failure to telemetry. """ - if not self.get_nm_controlled(): - shellutil.run("service NetworkManager restart") - super(RedhatOSUtil, self).publish_hostname(hostname) + ifname = self.get_if_name() + nm_controlled = self.get_nm_controlled(ifname) + if not nm_controlled: + self.restart_network_manager() + # TODO: Current recover logic is only effective when the NetworkManager manages the network interface. Update the recover logic so it is effective even when NM_CONTROLLED=n + super(RedhatOSUtil, self).publish_hostname(hostname, recover_nic and nm_controlled) def register_agent_service(self): return shellutil.run("systemctl enable {0}".format(self.service_name), chk_err=False) @@ -193,7 +262,11 @@ def restart_if(self, ifname, retries=3, wait=5): else: logger.warn("exceeded restart retries") - def publish_hostname(self, hostname): + def check_and_recover_nic_state(self, ifname): + # TODO: Implement and test a way to recover the network interface for RedhatOSModernUtil + pass + + def publish_hostname(self, hostname, recover_nic=False): # RedhatOSUtil was updated to conditionally run NetworkManager restart in response to a race condition between # NetworkManager restart and the agent restarting the network interface during publish_hostname. Keeping the # NetworkManager restart in RedhatOSModernUtil because the issue was not reproduced on these versions. diff --git a/azurelinuxagent/common/osutil/suse.py b/azurelinuxagent/common/osutil/suse.py index 52fd3ce565..ced0113dc8 100644 --- a/azurelinuxagent/common/osutil/suse.py +++ b/azurelinuxagent/common/osutil/suse.py @@ -72,7 +72,7 @@ def __init__(self): super(SUSEOSUtil, self).__init__() self.dhclient_name = 'wickedd-dhcp4' - def publish_hostname(self, hostname): + def publish_hostname(self, hostname, recover_nic=False): self.set_dhcp_hostname(hostname) self.set_hostname_record(hostname) ifname = self.get_if_name() diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 76d9ee73f1..0e73e7d3ec 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -171,7 +171,11 @@ def _operation(self): self._hostname, curr_hostname) self._osutil.set_hostname(curr_hostname) - self._osutil.publish_hostname(curr_hostname) + try: + self._osutil.publish_hostname(curr_hostname, recover_nic=True) + except Exception as e: + msg = "Error while publishing the hostname: {0}".format(e) + add_event(AGENT_NAME, op=WALAEventOperation.HostnamePublishing, is_success=False, message=msg, log_event=False) self._hostname = curr_hostname diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index a04de7449b..2892bd0fda 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -85,6 +85,7 @@ images: locations: AzureChinaCloud: [] centos_610: "OpenLogic CentOS 6.10 latest" + centos_75: "OpenLogic CentOS 7.5 latest" centos_79: "OpenLogic CentOS 7_9 latest" centos_82: urn: "OpenLogic CentOS 8_2 latest" @@ -121,7 +122,14 @@ images: AzureChinaCloud: [] AzureUSGovernment: [] oracle_610: "Oracle Oracle-Linux 6.10 latest" + oracle_75: "Oracle Oracle-Linux 7.5 latest" + oracle_79: "Oracle Oracle-Linux ol79-gen2 latest" + oracle_82: "Oracle Oracle-Linux ol82-gen2 latest" rhel_610: "RedHat RHEL 6.10 latest" + rhel_75: + urn: "RedHat RHEL 7.5 latest" + locations: + AzureChinaCloud: [] rhel_79: urn: "RedHat RHEL 7_9 latest" locations: diff --git a/tests_e2e/test_suites/recover_network_interface.yml b/tests_e2e/test_suites/recover_network_interface.yml new file mode 100644 index 0000000000..3021013d2b --- /dev/null +++ b/tests_e2e/test_suites/recover_network_interface.yml @@ -0,0 +1,17 @@ +# +# Brings the primary network interface down and checks that the agent can recover the network. +# +name: "RecoverNetworkInterface" +tests: + - "recover_network_interface/recover_network_interface.py" +images: +# TODO: This scenario should be run on all distros which bring the network interface down to publish hostname. Currently, only RedhatOSUtil attempts to recover the network interface if down after hostname publishing. + - "centos_79" + - "centos_75" + - "centos_82" + - "rhel_75" + - "rhel_79" + - "rhel_82" + - "oracle_75" + - "oracle_79" + - "oracle_82" \ No newline at end of file diff --git a/tests_e2e/tests/recover_network_interface/recover_network_interface.py b/tests_e2e/tests/recover_network_interface/recover_network_interface.py new file mode 100644 index 0000000000..bbc5e6c4b9 --- /dev/null +++ b/tests_e2e/tests/recover_network_interface/recover_network_interface.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This test uses CSE to bring the network down and call check_and_recover_nic_state to bring the network back into an +# 'up' and 'connected' state. The intention of the test is to alert us if there is some change in newer distros which +# affects this logic. +# + +import json +from typing import List, Dict, Any + +from assertpy import fail, assert_that +from time import sleep + +from tests_e2e.tests.lib.agent_test import AgentVmTest, TestSkipped +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds + + +class RecoverNetworkInterface(AgentVmTest): + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._context = context + self._ssh_client = context.create_ssh_client() + self._private_ip = context.vm.get_private_ip_address() + self._vm_password = "" + + def add_vm_password(self): + # Add password to VM to help with debugging in case of failure + # REMOVE PWD FROM LOGS IF WE EVER MAKE THESE RUNS/LOGS PUBLIC + username = self._ssh_client.username + pwd = self._ssh_client.run_command("openssl rand -base64 32 | tr : .").rstrip() + self._vm_password = pwd + log.info("VM Username: {0}; VM Password: {1}".format(username, pwd)) + self._ssh_client.run_command("echo '{0}:{1}' | sudo -S chpasswd".format(username, pwd)) + + def check_agent_reports_status(self): + status_updated = False + last_agent_status_time = self._context.vm.get_instance_view().vm_agent.statuses[0].time + log.info("Agent reported status at {0}".format(last_agent_status_time)) + retries = 3 + + while retries > 0 and not status_updated: + agent_status_time = self._context.vm.get_instance_view().vm_agent.statuses[0].time + if agent_status_time != last_agent_status_time: + status_updated = True + log.info("Agent reported status at {0}".format(last_agent_status_time)) + else: + retries -= 1 + sleep(60) + + if not status_updated: + fail("Agent hasn't reported status since {0} and ssh connection failed. Use the serial console in portal " + "to debug".format(last_agent_status_time)) + + def run(self): + # Add password to VM and log. This allows us to debug with serial console if necessary + log.info("") + log.info("Adding password to the VM to use for debugging in case necessary...") + self.add_vm_password() + + # Skip the test if NM_CONTROLLED=n. The current recover logic does not work in this case + result = self._ssh_client.run_command("recover_network_interface-get_nm_controlled.py", use_sudo=True) + if "Interface is NOT NM controlled" in result: + raise TestSkipped("Current recover method will not work on interfaces where NM_Controlled=n") + + # Get the primary network interface name + ifname = self._ssh_client.run_command("pypy3 -c 'from azurelinuxagent.common.osutil.redhat import RedhatOSUtil; print(RedhatOSUtil().get_if_name())'").rstrip() + # The interface name needs to be in double quotes for the pypy portion of the script + formatted_ifname = f'"{ifname}"' + + # The script should bring the primary network interface down and use the agent to recover the interface. These + # commands will bring the network down, so they should be executed on the machine using CSE instead of ssh. + script = f""" + set -euxo pipefail + ifdown {ifname}; + nic_state=$(nmcli -g general.state device show {ifname}) + echo Primary network interface state before recovering: $nic_state + source /home/{self._context.username}/bin/set-agent-env; + pypy3 -c 'from azurelinuxagent.common.osutil.redhat import RedhatOSUtil; RedhatOSUtil().check_and_recover_nic_state({formatted_ifname})'; + nic_state=$(nmcli -g general.state device show {ifname}); + echo Primary network interface state after recovering: $nic_state + """ + log.info("") + log.info("Using CSE to bring the primary network interface down and call the OSUtil to bring the interface back up. Command to execute: {0}".format(script)) + custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") + custom_script.enable( + protected_settings={ + 'commandToExecute': script + } + ) + + # Check that the interface was down and brought back up in instance view + log.info("") + log.info("Checking the instance view to confirm the primary network interface was brought down and successfully recovered by the agent...") + instance_view = custom_script.get_instance_view() + log.info("Instance view for custom script after enable is: {0}".format(json.dumps(instance_view.serialize(), indent=4))) + assert_that(len(instance_view.statuses)).described_as("Instance view should have a status for CustomScript").is_greater_than(0) + assert_that(instance_view.statuses[0].message).described_as("The primary network interface should be in a disconnected state before the attempt to recover").contains("Primary network interface state before recovering: 30 (disconnected)") + assert_that(instance_view.statuses[0].message).described_as("The primary network interface should be in a connected state after the attempt to recover").contains("Primary network interface state after recovering: 100 (connected)") + + # Check that the agent is successfully reporting status after recovering the network + log.info("") + log.info("Checking that the agent is reporting status after recovering the network...") + self.check_agent_reports_status() + + log.info("") + log.info("The primary network interface was successfully recovered by the agent.") + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # We may see temporary network unreachable warnings since we are bringing the network interface down + # 2024-02-01T23:40:03.563499Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: ac21bdd7-1a7a-4bba-b307-b9d5bc30da33 eTag: 941323814975149980]: Request failed: [Errno 101] Network is unreachable + # + { + 'message': r"Error fetching the goal state: \[ProtocolError\] GET vmSettings.*Request failed: \[Errno 101\] Network is unreachable" + } + ] + return ignore_rules + + +if __name__ == "__main__": + RecoverNetworkInterface.run_from_command_line() diff --git a/tests_e2e/tests/scripts/recover_network_interface-get_nm_controlled.py b/tests_e2e/tests/scripts/recover_network_interface-get_nm_controlled.py new file mode 100755 index 0000000000..32ca378d83 --- /dev/null +++ b/tests_e2e/tests/scripts/recover_network_interface-get_nm_controlled.py @@ -0,0 +1,39 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +import sys + +from azurelinuxagent.common.osutil import get_osutil + + +def main(): + os_util = get_osutil() + ifname = os_util.get_if_name() + nm_controlled = os_util.get_nm_controlled(ifname) + + if nm_controlled: + print("Interface is NM controlled") + else: + print("Interface is NOT NM controlled") + + sys.exit(0) + + +if __name__ == "__main__": + main() From 0c7b12d35e5fe5910c00a433f840dcd7c7678d4e Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 5 Feb 2024 16:16:38 -0800 Subject: [PATCH 142/240] Add recover_network_interface scenario to runbook (#3037) --- tests_e2e/orchestrator/runbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 9181e9189c..8b0ef37ec7 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -29,7 +29,7 @@ variable: # Test suites to execute # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update" + value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update, recover_network_interface" # # Parameters used to create test VMs From 176ffbf1efb1c908715ef8d82f22009f3c2f70fa Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 6 Feb 2024 13:40:02 -0800 Subject: [PATCH 143/240] Implementation of new conf flag AutoUpdate.UpdateToLatestVersion support (#3027) * GA update to latest version flag * address comments * resloving comments * added TODO * ignore warning * resolving comment * address comments * config present check * added a comment --- .../common/agent_supported_feature.py | 4 +- azurelinuxagent/common/conf.py | 33 ++++++++--- azurelinuxagent/ga/agent_update_handler.py | 4 +- azurelinuxagent/ga/update.py | 6 ++ config/alpine/waagent.conf | 6 +- config/arch/waagent.conf | 5 +- config/bigip/waagent.conf | 5 +- config/clearlinux/waagent.conf | 8 ++- config/coreos/waagent.conf | 5 +- config/debian/waagent.conf | 5 +- config/devuan/waagent.conf | 5 +- config/freebsd/waagent.conf | 5 +- config/gaia/waagent.conf | 7 +++ config/iosxe/waagent.conf | 5 +- config/mariner/waagent.conf | 8 ++- config/nsbsd/waagent.conf | 7 +++ config/openbsd/waagent.conf | 5 +- config/photonos/waagent.conf | 8 ++- config/suse/waagent.conf | 5 +- config/ubuntu/waagent.conf | 5 +- config/waagent.conf | 5 +- tests/common/test_conf.py | 54 ++++++++++++++++++ .../config/waagent_auto_update_disabled.conf | 11 ++++ ...led_update_to_latest_version_disabled.conf | 11 ++++ ...bled_update_to_latest_version_enabled.conf | 11 ++++ .../config/waagent_auto_update_enabled.conf | 11 ++++ ...led_update_to_latest_version_disabled.conf | 11 ++++ ...bled_update_to_latest_version_enabled.conf | 11 ++++ ...ent_update_to_latest_version_disabled.conf | 11 ++++ ...gent_update_to_latest_version_enabled.conf | 11 ++++ tests/data/test_waagent.conf | 6 +- tests/ga/test_agent_update_handler.py | 11 ++++ tests/ga/test_update.py | 16 +++--- tests/lib/tools.py | 1 - tests/test_agent.py | 2 +- tests_e2e/orchestrator/scripts/collect-logs | 5 +- tests_e2e/orchestrator/scripts/install-agent | 4 +- tests_e2e/tests/agent_update/self_update.py | 55 ++++++++++++++++++- tests_e2e/tests/lib/agent_log.py | 8 +++ .../agent_update-self_update_test_setup | 12 +++- 40 files changed, 360 insertions(+), 48 deletions(-) create mode 100644 tests/data/config/waagent_auto_update_disabled.conf create mode 100644 tests/data/config/waagent_auto_update_disabled_update_to_latest_version_disabled.conf create mode 100644 tests/data/config/waagent_auto_update_disabled_update_to_latest_version_enabled.conf create mode 100644 tests/data/config/waagent_auto_update_enabled.conf create mode 100644 tests/data/config/waagent_auto_update_enabled_update_to_latest_version_disabled.conf create mode 100644 tests/data/config/waagent_auto_update_enabled_update_to_latest_version_enabled.conf create mode 100644 tests/data/config/waagent_update_to_latest_version_disabled.conf create mode 100644 tests/data/config/waagent_update_to_latest_version_enabled.conf diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index c3e83c5142..694c636391 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -76,7 +76,7 @@ def __init__(self): class _GAVersioningGovernanceFeature(AgentSupportedFeature): """ - CRP would drive the RSM upgrade version if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version. + CRP would drive the RSM update if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version. Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning. Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution. @@ -84,7 +84,7 @@ class _GAVersioningGovernanceFeature(AgentSupportedFeature): __NAME = SupportedFeatureNames.GAVersioningGovernance __VERSION = "1.0" - __SUPPORTED = conf.get_autoupdate_enabled() + __SUPPORTED = conf.get_auto_update_to_latest_version() def __init__(self): super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME, diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index a13f333576..a94220ecf7 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -87,6 +87,12 @@ def get_int(self, key, default_value): except ValueError: return self._get_default(default_value) + def is_present(self, key): + """ + Returns True if the given flag present in the configuration file, False otherwise. + """ + return self.values.get(key) is not None + __conf__ = ConfigurationProvider() @@ -129,6 +135,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "ResourceDisk.EnableSwap": False, "ResourceDisk.EnableSwapEncryption": False, "AutoUpdate.Enabled": True, + "AutoUpdate.UpdateToLatestVersion": True, "EnableOverProvisioning": True, # # "Debug" options are experimental and may be removed in later @@ -137,7 +144,6 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.CgroupLogMetrics": False, "Debug.CgroupDisableOnProcessCheckFailure": True, "Debug.CgroupDisableOnQuotaCheckFailure": True, - "Debug.DownloadNewAgents": True, "Debug.EnableAgentMemoryUsageCheck": False, "Debug.EnableFastTrack": True, "Debug.EnableGAVersioning": True @@ -230,6 +236,13 @@ def get_switch_default_value(option): raise ValueError("{0} is not a valid configuration parameter.".format(option)) +def is_present(key, conf=__conf__): + """ + Returns True if the given flag present in the configuration file, False otherwise. + """ + return conf.is_present(key) + + def enable_firewall(conf=__conf__): return conf.get_switch("OS.EnableFirewall", False) @@ -513,15 +526,19 @@ def get_monitor_network_configuration_changes(conf=__conf__): return conf.get_switch("Monitor.NetworkConfigurationChanges", False) -def get_download_new_agents(conf=__conf__): +def get_auto_update_to_latest_version(conf=__conf__): """ - If True, the agent go through update logic to look for new agents to download otherwise it will stop agent updates. - NOTE: AutoUpdate.Enabled controls whether the Agent downloads new update and also whether any downloaded updates are started or not, while DownloadNewAgents controls only the former. - AutoUpdate.Enabled == false -> Agent preinstalled on the image will process extensions and will not update (regardless of DownloadNewAgents flag) - AutoUpdate.Enabled == true and DownloadNewAgents == true, any update already downloaded will be started, and agent look for future updates - AutoUpdate.Enabled == true and DownloadNewAgents == false, any update already downloaded will be started, but the agent will not look for future updates + If set to True, agent will update to the latest version + NOTE: + when both turned on, both AutoUpdate.Enabled and AutoUpdate.UpdateToLatestVersion same meaning: update to latest version + when turned off, AutoUpdate.Enabled: reverts to pre-installed agent, AutoUpdate.UpdateToLatestVersion: uses latest version already installed on the vm and does not download new agents + Even we are deprecating AutoUpdate.Enabled, we still need to support if users explicitly setting it instead new flag. + If AutoUpdate.UpdateToLatestVersion is present, it overrides any value set for AutoUpdate.Enabled (if present). + If AutoUpdate.UpdateToLatestVersion is not present but AutoUpdate.Enabled is present and set to 'n', we adhere to AutoUpdate.Enabled flag's behavior + if both not present, we default to True. """ - return conf.get_switch("Debug.DownloadNewAgents", True) + default = get_autoupdate_enabled(conf=conf) + return conf.get_switch("AutoUpdate.UpdateToLatestVersion", default) def get_cgroup_check_period(conf=__conf__): diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index f342357028..02549fa5a9 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -141,8 +141,8 @@ def _get_agent_family_manifest(self, goal_state): def run(self, goal_state, ext_gs_updated): try: - # Ignore new agents if update is disabled. The latter flag only used in e2e tests. - if not conf.get_autoupdate_enabled() or not conf.get_download_new_agents(): + # If auto update is disabled, we don't proceed with update + if not conf.get_auto_update_to_latest_version(): return # Update the state only on new goal state diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 1a0e362407..f2c20b0f75 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -772,6 +772,12 @@ def log_if_agent_versioning_feature_disabled(): log_if_op_disabled("OS.EnableFirewall", conf.enable_firewall()) log_if_op_disabled("Extensions.Enabled", conf.get_extensions_enabled()) log_if_op_disabled("AutoUpdate.Enabled", conf.get_autoupdate_enabled()) + log_if_op_disabled("AutoUpdate.UpdateToLatestVersion", conf.get_auto_update_to_latest_version()) + + if conf.is_present("AutoUpdate.Enabled") and conf.get_autoupdate_enabled() != conf.get_auto_update_to_latest_version(): + msg = "AutoUpdate.Enabled property is **Deprecated** now but it's set to different value from AutoUpdate.UpdateToLatestVersion. Please consider removing it if added by mistake" + logger.warn(msg) + add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=msg) if conf.enable_firewall(): log_if_int_changed_from_default("OS.EnableFirewallPeriod", conf.get_enable_firewall_period()) diff --git a/config/alpine/waagent.conf b/config/alpine/waagent.conf index d813ee5cae..a8620b5c4c 100644 --- a/config/alpine/waagent.conf +++ b/config/alpine/waagent.conf @@ -75,7 +75,11 @@ OS.OpensslPath=None OS.SshDir=/etc/ssh # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# TODO: Update the wiki link and point to readme page or public facing doc +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/arch/waagent.conf b/config/arch/waagent.conf index ef914e9f8c..947da9ba6e 100644 --- a/config/arch/waagent.conf +++ b/config/arch/waagent.conf @@ -100,7 +100,10 @@ OS.SshDir=/etc/ssh # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/bigip/waagent.conf b/config/bigip/waagent.conf index fe56c4d589..2bed138b9a 100644 --- a/config/bigip/waagent.conf +++ b/config/bigip/waagent.conf @@ -82,7 +82,10 @@ OS.SshdConfigPath=/config/ssh/sshd_config OS.EnableRDMA=n # Enable or disable goal state processing auto-update, default is enabled -AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/clearlinux/waagent.conf b/config/clearlinux/waagent.conf index 0b70d2621c..61d3830df8 100644 --- a/config/clearlinux/waagent.conf +++ b/config/clearlinux/waagent.conf @@ -73,8 +73,12 @@ OS.OpensslPath=None # Set the path to SSH keys and configuration files OS.SshDir=/etc/ssh -# Enable or disable self-update, default is enabled -AutoUpdate.Enabled=y +# Enable or disable goal state processing auto-update, default is enabled +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y + AutoUpdate.GAFamily=Prod # Determine if the overprovisioning feature is enabled. If yes, hold extension diff --git a/config/coreos/waagent.conf b/config/coreos/waagent.conf index 003482ab08..a7b2174034 100644 --- a/config/coreos/waagent.conf +++ b/config/coreos/waagent.conf @@ -104,7 +104,10 @@ OS.OpensslPath=None # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/debian/waagent.conf b/config/debian/waagent.conf index dfd7afcd6d..40a92b92b4 100644 --- a/config/debian/waagent.conf +++ b/config/debian/waagent.conf @@ -110,7 +110,10 @@ OS.SshDir=/etc/ssh # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/devuan/waagent.conf b/config/devuan/waagent.conf index be80edbd42..53b0a85bfa 100644 --- a/config/devuan/waagent.conf +++ b/config/devuan/waagent.conf @@ -104,7 +104,10 @@ OS.SshDir=/etc/ssh # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/freebsd/waagent.conf b/config/freebsd/waagent.conf index c917d16c5d..6774b8fdd3 100644 --- a/config/freebsd/waagent.conf +++ b/config/freebsd/waagent.conf @@ -102,7 +102,10 @@ OS.SudoersDir=/usr/local/etc/sudoers.d # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/gaia/waagent.conf b/config/gaia/waagent.conf index 0e171d28b9..fa915f7228 100644 --- a/config/gaia/waagent.conf +++ b/config/gaia/waagent.conf @@ -101,8 +101,15 @@ OS.SshDir=/etc/ssh OS.EnableRDMA=n # Enable or disable goal state processing auto-update, default is enabled +# When turned off, it reverts to the pre-installed agent that comes with image +# AutoUpdate.Enabled is a legacy parameter used only for backwards compatibility. We encourage users to transition to new option AutoUpdate.UpdateToLatestVersion +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details AutoUpdate.Enabled=n +# Enable or disable goal state processing auto-update, default is enabled +# When turned off, it remains on latest version installed on the vm +# AutoUpdate.UpdateToLatestVersion=y + # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/iosxe/waagent.conf b/config/iosxe/waagent.conf index 764058986f..88ed14c473 100644 --- a/config/iosxe/waagent.conf +++ b/config/iosxe/waagent.conf @@ -100,7 +100,10 @@ OS.SshDir=/etc/ssh # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/mariner/waagent.conf b/config/mariner/waagent.conf index dbd9e14a8b..05eb129f0e 100644 --- a/config/mariner/waagent.conf +++ b/config/mariner/waagent.conf @@ -75,8 +75,12 @@ OS.OpensslPath=None # Set the path to SSH keys and configuration files OS.SshDir=/etc/ssh -# Enable or disable self-update, default is enabled -AutoUpdate.Enabled=y +# Enable or disable goal state processing auto-update, default is enabled +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y + AutoUpdate.GAFamily=Prod # Determine if the overprovisioning feature is enabled. If yes, hold extension diff --git a/config/nsbsd/waagent.conf b/config/nsbsd/waagent.conf index d7f6f27595..8b04a410ae 100644 --- a/config/nsbsd/waagent.conf +++ b/config/nsbsd/waagent.conf @@ -98,8 +98,15 @@ Extension.LogDir=/log/azure # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled +# When turned off, it reverts to the pre-installed agent that comes with image +# AutoUpdate.Enabled is a legacy parameter used only for backwards compatibility. We encourage users to transition to new option AutoUpdate.UpdateToLatestVersion +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details AutoUpdate.Enabled=n +# Enable or disable goal state processing auto-update, default is enabled +# When turned off, it remains on latest version installed on the vm +# AutoUpdate.UpdateToLatestVersion=y + # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/openbsd/waagent.conf b/config/openbsd/waagent.conf index a644d5d69a..c0bc8ed14b 100644 --- a/config/openbsd/waagent.conf +++ b/config/openbsd/waagent.conf @@ -96,7 +96,10 @@ OS.PasswordPath=/etc/master.passwd # OS.EnableRDMA=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/photonos/waagent.conf b/config/photonos/waagent.conf index 65da1313c6..05227f6bc4 100644 --- a/config/photonos/waagent.conf +++ b/config/photonos/waagent.conf @@ -70,8 +70,12 @@ OS.OpensslPath=None # Set the path to SSH keys and configuration files OS.SshDir=/etc/ssh -# Enable or disable self-update, default is enabled -AutoUpdate.Enabled=y +# Enable or disable goal state processing auto-update, default is enabled +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y + AutoUpdate.GAFamily=Prod # Determine if the overprovisioning feature is enabled. If yes, hold extension diff --git a/config/suse/waagent.conf b/config/suse/waagent.conf index c617f9af8b..9e6369a87b 100644 --- a/config/suse/waagent.conf +++ b/config/suse/waagent.conf @@ -113,7 +113,10 @@ OS.SshDir=/etc/ssh # OS.CheckRdmaDriver=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/ubuntu/waagent.conf b/config/ubuntu/waagent.conf index 19b56bae4a..286933ce5b 100644 --- a/config/ubuntu/waagent.conf +++ b/config/ubuntu/waagent.conf @@ -101,7 +101,10 @@ OS.SshDir=/etc/ssh # OS.CheckRdmaDriver=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/config/waagent.conf b/config/waagent.conf index 7316dc2da4..3c9ad5d4c9 100644 --- a/config/waagent.conf +++ b/config/waagent.conf @@ -122,7 +122,10 @@ OS.SshDir=/etc/ssh # OS.CheckRdmaDriver=y # Enable or disable goal state processing auto-update, default is enabled -# AutoUpdate.Enabled=y +# When turned off, it remains on latest version installed on the vm +# Added this new option AutoUpdate.UpdateToLatestVersion in place of AutoUpdate.Enabled, and encourage users to transition to this new option +# See wiki[https://github.com/Azure/WALinuxAgent/wiki/FAQ#autoupdateenabled-vs-autoupdateupdatetolatestversion] for more details +# AutoUpdate.UpdateToLatestVersion=y # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/tests/common/test_conf.py b/tests/common/test_conf.py index 1ae951bf9f..e6cc7de029 100644 --- a/tests/common/test_conf.py +++ b/tests/common/test_conf.py @@ -65,6 +65,7 @@ class TestConf(AgentTestCase): "OS.CheckRdmaDriver": False, "AutoUpdate.Enabled": True, "AutoUpdate.GAFamily": "Prod", + "AutoUpdate.UpdateToLatestVersion": True, "EnableOverProvisioning": True, "OS.AllowHTTP": False, "OS.EnableFirewall": False @@ -146,3 +147,56 @@ def test_write_agent_disabled(self): def test_get_extensions_enabled(self): self.assertTrue(conf.get_extensions_enabled(self.conf)) + + def test_get_get_auto_update_to_latest_version(self): + # update flags not set + self.assertTrue(conf.get_auto_update_to_latest_version(self.conf)) + + config = conf.ConfigurationProvider() + # AutoUpdate.Enabled is set to 'n' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_disabled.conf"), + config) + self.assertFalse(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'n'") + + # AutoUpdate.Enabled is set to 'y' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_enabled.conf"), + config) + self.assertTrue(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'y'") + + # AutoUpdate.UpdateToLatestVersion is set to 'n' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_update_to_latest_version_disabled.conf"), + config) + self.assertFalse(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'n'") + + # AutoUpdate.UpdateToLatestVersion is set to 'y' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_update_to_latest_version_enabled.conf"), + config) + self.assertTrue(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'y'") + + # AutoUpdate.Enabled is set to 'y' and AutoUpdate.UpdateToLatestVersion is set to 'n' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_enabled_update_to_latest_version_disabled.conf"), + config) + self.assertFalse(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'n'") + + # AutoUpdate.Enabled is set to 'n' and AutoUpdate.UpdateToLatestVersion is set to 'y' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_disabled_update_to_latest_version_enabled.conf"), + config) + self.assertTrue(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'y'") + + # AutoUpdate.Enabled is set to 'n' and AutoUpdate.UpdateToLatestVersion is set to 'n' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_disabled_update_to_latest_version_disabled.conf"), + config) + self.assertFalse(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'n'") + + # AutoUpdate.Enabled is set to 'y' and AutoUpdate.UpdateToLatestVersion is set to 'y' + conf.load_conf_from_file( + os.path.join(data_dir, "config/waagent_auto_update_enabled_update_to_latest_version_enabled.conf"), + config) + self.assertTrue(conf.get_auto_update_to_latest_version(config), "AutoUpdate.UpdateToLatestVersion should be 'y'") diff --git a/tests/data/config/waagent_auto_update_disabled.conf b/tests/data/config/waagent_auto_update_disabled.conf new file mode 100644 index 0000000000..933c6b2b49 --- /dev/null +++ b/tests/data/config/waagent_auto_update_disabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=n + +# Enable or disable goal state processing auto-update, default is enabled +# AutoUpdate.UpdateToLatestVersion=y + diff --git a/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_disabled.conf b/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_disabled.conf new file mode 100644 index 0000000000..484a3f222a --- /dev/null +++ b/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_disabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=n + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=n + diff --git a/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_enabled.conf b/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_enabled.conf new file mode 100644 index 0000000000..2e6b51ce43 --- /dev/null +++ b/tests/data/config/waagent_auto_update_disabled_update_to_latest_version_enabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=n + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=y + diff --git a/tests/data/config/waagent_auto_update_enabled.conf b/tests/data/config/waagent_auto_update_enabled.conf new file mode 100644 index 0000000000..1f9070ba64 --- /dev/null +++ b/tests/data/config/waagent_auto_update_enabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=y + +# Enable or disable goal state processing auto-update, default is enabled +# AutoUpdate.UpdateToLatestVersion=y + diff --git a/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_disabled.conf b/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_disabled.conf new file mode 100644 index 0000000000..86a21ec3ac --- /dev/null +++ b/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_disabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=y + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=n + diff --git a/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_enabled.conf b/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_enabled.conf new file mode 100644 index 0000000000..497f03897d --- /dev/null +++ b/tests/data/config/waagent_auto_update_enabled_update_to_latest_version_enabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +AutoUpdate.Enabled=y + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=y + diff --git a/tests/data/config/waagent_update_to_latest_version_disabled.conf b/tests/data/config/waagent_update_to_latest_version_disabled.conf new file mode 100644 index 0000000000..a2c7f859f2 --- /dev/null +++ b/tests/data/config/waagent_update_to_latest_version_disabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +# AutoUpdate.Enabled=n + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=n + diff --git a/tests/data/config/waagent_update_to_latest_version_enabled.conf b/tests/data/config/waagent_update_to_latest_version_enabled.conf new file mode 100644 index 0000000000..48ed2e2de4 --- /dev/null +++ b/tests/data/config/waagent_update_to_latest_version_enabled.conf @@ -0,0 +1,11 @@ +# +# Microsoft Azure Linux Agent Configuration +# + +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility +# AutoUpdate.Enabled=n + +# Enable or disable goal state processing auto-update, default is enabled +AutoUpdate.UpdateToLatestVersion=y + diff --git a/tests/data/test_waagent.conf b/tests/data/test_waagent.conf index cc60886e6e..8fb0515520 100644 --- a/tests/data/test_waagent.conf +++ b/tests/data/test_waagent.conf @@ -116,9 +116,13 @@ OS.SshDir=/notareal/path # OS.CheckRdmaDriver=n -# Enable or disable goal state processing auto-update, default is enabled +# Enable or disable goal state processing auto-update, default is enabled. +# Deprecated now but keep it for backward compatibility # AutoUpdate.Enabled=y +# Enable or disable goal state processing auto-update, default is enabled +# AutoUpdate.UpdateToLatestVersion=y + # Determine the update family, this should not be changed # AutoUpdate.GAFamily=Prod diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 5ba7f3c70f..fc58a6ee25 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -459,3 +459,14 @@ def test_it_should_save_rsm_state_of_the_most_recent_goal_state(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self.assertFalse(os.path.exists(state_file), "The rsm file should be removed (file: {0})".format(state_file)) + + def test_it_should_not_update_to_latest_if_flag_is_disabled(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf.xml" + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, _): + with patch("azurelinuxagent.common.conf.get_auto_update_to_latest_version", return_value=False): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index aa39ccb55a..079f896bed 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -686,12 +686,13 @@ def _test_run_latest(self, mock_child=None, mock_time=None, child_args=None): def test_run_latest(self): self.prepare_agents() - agent = self.update_handler.get_latest_agent_greater_than_daemon() - args, kwargs = self._test_run_latest() - args = args[0] - cmds = textutil.safe_shlex_split(agent.get_agent_cmd()) - if cmds[0].lower() == "python": - cmds[0] = sys.executable + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + agent = self.update_handler.get_latest_agent_greater_than_daemon() + args, kwargs = self._test_run_latest() + args = args[0] + cmds = textutil.safe_shlex_split(agent.get_agent_cmd()) + if cmds[0].lower() == "python": + cmds[0] = sys.executable self.assertEqual(args, cmds) self.assertTrue(len(args) > 1) @@ -801,7 +802,8 @@ def test_run_latest_exception_blacklists(self): verify_string = "Force blacklisting: {0}".format(str(uuid.uuid4())) with patch('azurelinuxagent.ga.update.UpdateHandler.get_latest_agent_greater_than_daemon', return_value=latest_agent): - self._test_run_latest(mock_child=ChildMock(side_effect=Exception(verify_string))) + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=True): + self._test_run_latest(mock_child=ChildMock(side_effect=Exception(verify_string))) self.assertFalse(latest_agent.is_available) self.assertTrue(latest_agent.error.is_blacklisted) diff --git a/tests/lib/tools.py b/tests/lib/tools.py index 008be8552a..11bd801917 100644 --- a/tests/lib/tools.py +++ b/tests/lib/tools.py @@ -181,7 +181,6 @@ def setUp(self): self.tmp_dir = tempfile.mkdtemp(prefix=prefix) self.test_file = 'test_file' - conf.get_autoupdate_enabled = Mock(return_value=True) conf.get_lib_dir = Mock(return_value=self.tmp_dir) ext_log_dir = os.path.join(self.tmp_dir, "azure") diff --git a/tests/test_agent.py b/tests/test_agent.py index 0da6a2a853..f892f090e2 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -28,6 +28,7 @@ EXPECTED_CONFIGURATION = \ """AutoUpdate.Enabled = True AutoUpdate.GAFamily = Prod +AutoUpdate.UpdateToLatestVersion = True Autoupdate.Frequency = 3600 DVD.MountPoint = /mnt/cdrom/secure Debug.AgentCpuQuota = 50 @@ -41,7 +42,6 @@ Debug.CgroupLogMetrics = False Debug.CgroupMonitorExpiryTime = 2022-03-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent -Debug.DownloadNewAgents = True Debug.EnableAgentMemoryUsageCheck = False Debug.EnableFastTrack = True Debug.EnableGAVersioning = True diff --git a/tests_e2e/orchestrator/scripts/collect-logs b/tests_e2e/orchestrator/scripts/collect-logs index eadf0483ae..c221288a13 100755 --- a/tests_e2e/orchestrator/scripts/collect-logs +++ b/tests_e2e/orchestrator/scripts/collect-logs @@ -10,13 +10,16 @@ logs_file_name="/tmp/waagent-logs.tgz" echo "Collecting logs to $logs_file_name ..." +PYTHON=$(get-agent-python) +waagent_conf=$($PYTHON -c 'from azurelinuxagent.common.osutil import get_osutil; print(get_osutil().agent_conf_file_path)') + tar --exclude='journal/*' --exclude='omsbundle' --exclude='omsagent' --exclude='mdsd' --exclude='scx*' \ --exclude='*.so' --exclude='*__LinuxDiagnostic__*' --exclude='*.zip' --exclude='*.deb' --exclude='*.rpm' \ --warning=no-file-changed \ -czf "$logs_file_name" \ /var/log \ /var/lib/waagent/ \ - /etc/waagent.conf + $waagent_conf set -euxo pipefail diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 0496784766..94f850484f 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -113,9 +113,9 @@ python=$(get-agent-python) waagent_conf_path=$($python -c 'from azurelinuxagent.common.osutil import get_osutil; osutil=get_osutil(); print(osutil.agent_conf_file_path)') echo "Agent's conf path: $waagent_conf_path" sed -i 's/AutoUpdate.Enabled=n/AutoUpdate.Enabled=y/g' "$waagent_conf_path" -# By default GAUpdates flag set to True, so that agent go through update logic to look for new agents. +# By default UpdateToLatestVersion flag set to True, so that agent go through update logic to look for new agents. # But in e2e tests this flag needs to be off in test version 9.9.9.9 to stop the agent updates, so that our scenarios run on 9.9.9.9. -# sed -i '$a Debug.DownloadNewAgents=n' "$waagent_conf_path" +sed -i '$a AutoUpdate.UpdateToLatestVersion=n' "$waagent_conf_path" # Logging and exiting tests if Extensions.Enabled flag is disabled for other distros than debian if grep -q "Extensions.Enabled=n" $waagent_conf_path; then pypy_get_distro=$(pypy3 -c 'from azurelinuxagent.common.version import get_distro; print(get_distro())') diff --git a/tests_e2e/tests/agent_update/self_update.py b/tests_e2e/tests/agent_update/self_update.py index 65fec9df5d..2aedb72f41 100644 --- a/tests_e2e/tests/agent_update/self_update.py +++ b/tests_e2e/tests/agent_update/self_update.py @@ -45,15 +45,23 @@ def __init__(self, context: AgentVmTestContext): _setup_lock = RLock() def run(self): + log.info("Verifying agent updated to latest version from custom test version") self._test_setup() self._verify_agent_updated_to_latest_version() + log.info("Verifying agent remains on custom test version when AutoUpdate.UpdateToLatestVersion=n") + self._test_setup_and_update_to_latest_version_false() + self._verify_agent_remains_on_custom_test_version() + def _test_setup(self) -> None: """ Builds the custom test agent pkg as some lower version and installs it on the vm """ self._build_custom_test_agent() - self._ssh_client.run_command(f"agent_update-self_update_test_setup --package ~/tmp/{self._test_pkg_name} --version {self._test_version}", use_sudo=True) + output: str = self._ssh_client.run_command( + f"agent_update-self_update_test_setup --package ~/tmp/{self._test_pkg_name} --version {self._test_version} --update_to_latest_version y", + use_sudo=True) + log.info("Successfully installed custom test agent pkg version \n%s", output) def _build_custom_test_agent(self) -> None: """ @@ -94,10 +102,13 @@ def _verify_agent_updated_to_latest_version(self) -> None: Verifies the agent updated to latest version from custom test version. We retrieve latest version from goal state and compare with current agent version running as that latest version """ - latest_version: str = self._ssh_client.run_command("agent_update-self_update_latest_version.py", use_sudo=True).rstrip() + latest_version: str = self._ssh_client.run_command("agent_update-self_update_latest_version.py", + use_sudo=True).rstrip() self._verify_guest_agent_update(latest_version) # Verify agent updated to latest version by custom test agent - self._ssh_client.run_command("agent_update-self_update_check.py --latest-version {0} --current-version {1}".format(latest_version, self._test_version)) + self._ssh_client.run_command( + "agent_update-self_update_check.py --latest-version {0} --current-version {1}".format(latest_version, + self._test_version)) def _verify_guest_agent_update(self, latest_version: str) -> None: """ @@ -121,3 +132,41 @@ def _check_agent_version(latest_version: str) -> bool: waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info( f"Successfully verified agent updated to latest version. Current agent version running:\n {waagent_version}") + + def _test_setup_and_update_to_latest_version_false(self) -> None: + """ + Builds the custom test agent pkg as some lower version and installs it on the vm + Also modify the configuration AutoUpdate.UpdateToLatestVersion=n + """ + self._build_custom_test_agent() + output: str = self._ssh_client.run_command( + f"agent_update-self_update_test_setup --package ~/tmp/{self._test_pkg_name} --version {self._test_version} --update_to_latest_version n", + use_sudo=True) + log.info("Successfully installed custom test agent pkg version \n%s", output) + + def _verify_agent_remains_on_custom_test_version(self) -> None: + """ + Verifies the agent remains on custom test version when UpdateToLatestVersion=n + """ + + def _check_agent_version(version: str) -> bool: + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + expected_version = f"Goal state agent: {version}" + if expected_version in waagent_version: + return True + else: + return False + + waagent_version: str = "" + log.info("Verifying if current agent on version: {0}".format(self._test_version)) + success: bool = retry_if_false(lambda: _check_agent_version(self._test_version), delay=60) + if not success: + fail("Guest agent was on different version than expected version {0} and found \n {1}".format( + self._test_version, waagent_version)) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info( + f"Successfully verified agent stayed on test version. Current agent version running:\n {waagent_version}") + + +if __name__ == "__main__": + SelfUpdateBvt.run_from_command_line() diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 4ef884d70c..60d42ec75c 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -360,6 +360,14 @@ def get_errors(self) -> List[AgentLogRecord]: 'message': r"Fetch failed:.*GET.*vmSettings.*timed out", 'if': lambda r: r.prefix == 'LogCollector' and self.agent_log_contains("LogCollector Log collection successfully completed", after_timestamp=r.timestamp) }, + # + # In tests, we use both autoupdate flags to install test agent with different value and changing it depending on the scenario. So, we can ignore this warning. + # + # 2024-01-30T22:22:37.299911Z WARNING ExtHandler ExtHandler AutoUpdate.Enabled property is **Deprecated** now but it's set to different value from AutoUpdate.UpdateToLatestVersion. Please consider removing it if added by mistake + { + 'message': r"AutoUpdate.Enabled property is \*\*Deprecated\*\* now but it's set to different value from AutoUpdate.UpdateToLatestVersion", + 'if': lambda r: r.prefix == 'ExtHandler' and r.thread == 'ExtHandler' + } ] def is_error(r: AgentLogRecord) -> bool: diff --git a/tests_e2e/tests/scripts/agent_update-self_update_test_setup b/tests_e2e/tests/scripts/agent_update-self_update_test_setup index bc5a1d9155..512beb322b 100755 --- a/tests_e2e/tests/scripts/agent_update-self_update_test_setup +++ b/tests_e2e/tests/scripts/agent_update-self_update_test_setup @@ -22,7 +22,7 @@ set -euo pipefail usage() ( - echo "Usage: agent_update-self_update_test_setup -p|--package -v|--version " + echo "Usage: agent_update-self_update_test_setup -p|--package -v|--version -u|--update_to_latest_version " exit 1 ) @@ -44,6 +44,14 @@ while [[ $# -gt 0 ]]; do version=$1 shift ;; + -u|--update_to_latest_version) + shift + if [ "$#" -lt 1 ]; then + usage + fi + update_to_latest_version=$1 + shift + ;; *) usage esac @@ -53,7 +61,7 @@ if [ "$#" -ne 0 ] || [ -z ${package+x} ] || [ -z ${version+x} ]; then fi echo "updating the related to self-update flags" -update-waagent-conf Debug.EnableGAVersioning=n Debug.SelfUpdateHotfixFrequency=120 Debug.SelfUpdateRegularFrequency=120 Autoupdate.Frequency=120 +update-waagent-conf AutoUpdate.UpdateToLatestVersion=$update_to_latest_version Debug.EnableGAVersioning=n Debug.SelfUpdateHotfixFrequency=120 Debug.SelfUpdateRegularFrequency=120 Autoupdate.Frequency=120 agent-service stop mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log From 2bd2dc90a4b474b1f35e5805dcb851e53ce70261 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:40:36 -0800 Subject: [PATCH 144/240] Fix daily pipeline failures for recover_network_interface (#3039) * Fix daily pipeline failures for recover_network_interface * Clear any unused settings properties when enabling cse --------- Co-authored-by: Norberto Arrieta --- tests_e2e/test_suites/images.yml | 17 ++++++++++++++--- .../ext_telemetry_pipeline.py | 4 ++-- .../recover_network_interface.py | 6 +----- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 2892bd0fda..f75b9be891 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -122,9 +122,20 @@ images: AzureChinaCloud: [] AzureUSGovernment: [] oracle_610: "Oracle Oracle-Linux 6.10 latest" - oracle_75: "Oracle Oracle-Linux 7.5 latest" - oracle_79: "Oracle Oracle-Linux ol79-gen2 latest" - oracle_82: "Oracle Oracle-Linux ol82-gen2 latest" + oracle_75: + urn: "Oracle Oracle-Linux 7.5 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] + oracle_79: + urn: "Oracle Oracle-Linux ol79-gen2 latest" + locations: + AzureChinaCloud: [] + oracle_82: + urn: "Oracle Oracle-Linux ol82-gen2 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] rhel_610: "RedHat RHEL 6.10 latest" rhel_75: urn: "RedHat RHEL 7.5 latest" diff --git a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py index 01a687f634..e13f0ce6a7 100755 --- a/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py +++ b/tests_e2e/tests/ext_telemetry_pipeline/ext_telemetry_pipeline.py @@ -56,12 +56,12 @@ def run(self): log.info("") log.info("Add CSE to the test VM...") cse = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - cse.enable(settings={'commandToExecute': "echo 'enable'"}) + cse.enable(settings={'commandToExecute': "echo 'enable'"}, protected_settings={}) cse.assert_instance_view() log.info("") log.info("Add CSE to the test VM again...") - cse.enable(settings={'commandToExecute': "echo 'enable again'"}) + cse.enable(settings={'commandToExecute': "echo 'enable again'"}, protected_settings={}) cse.assert_instance_view() # Check agent log to verify ETP is enabled diff --git a/tests_e2e/tests/recover_network_interface/recover_network_interface.py b/tests_e2e/tests/recover_network_interface/recover_network_interface.py index bbc5e6c4b9..39799d3752 100644 --- a/tests_e2e/tests/recover_network_interface/recover_network_interface.py +++ b/tests_e2e/tests/recover_network_interface/recover_network_interface.py @@ -103,11 +103,7 @@ def run(self): log.info("") log.info("Using CSE to bring the primary network interface down and call the OSUtil to bring the interface back up. Command to execute: {0}".format(script)) custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - custom_script.enable( - protected_settings={ - 'commandToExecute': script - } - ) + custom_script.enable(protected_settings={'commandToExecute': script}, settings={}) # Check that the interface was down and brought back up in instance view log.info("") From 475c7f3034f9b0cc9f62f140aefd8eb7eab51b25 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:23:26 -0800 Subject: [PATCH 145/240] Keep failed VMs by default on pipeline runs (#3040) --- tests_e2e/pipeline/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 4ea7565276..35d3fe4c14 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -51,7 +51,7 @@ parameters: - name: keep_environment displayName: Keep the test VMs (do not delete them) type: string - default: no + default: failed values: - always - failed From d8565ac88a93fa18496c1c1a09c42a1405fd276e Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:29:40 -0800 Subject: [PATCH 146/240] enable RSM e2e tests (#3030) * enable RSM tests * merge conflicts --- tests_e2e/test_suites/agent_update.yml | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 76 +++++++++++-------- .../scripts/agent_update-modify_agent_version | 4 +- ...ate-verify_agent_reported_update_status.py | 3 +- ...ate-verify_versioning_supported_feature.py | 3 +- .../scripts/agent_update-wait_for_rsm_gs.py | 2 +- 6 files changed, 50 insertions(+), 40 deletions(-) diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index b78f4109f5..3d3d4918f6 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -3,7 +3,7 @@ # Self-update: If vm not enrolled into RSM, it will validate agent uses self-update to update to latest version published name: "AgentUpdate" tests: -# - "agent_update/rsm_update.py" TODO: will enable this test once we have a new test version published + - "agent_update/rsm_update.py" - "agent_update/self_update.py" images: - "random(endorsed, 10)" diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 8325599910..89c186a2f1 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -79,7 +79,7 @@ def run(self) -> None: log.info("*******Verifying the Agent Downgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - self._downgrade_version: str = "1.5.0.0" + self._downgrade_version: str = "2.3.15.0" log.info("Attempting downgrade version %s", self._downgrade_version) self._request_rsm_update(self._downgrade_version) self._check_rsm_gs(self._downgrade_version) @@ -88,33 +88,31 @@ def run(self) -> None: self._verify_guest_agent_update(self._downgrade_version) self._verify_agent_reported_update_status(self._downgrade_version) - # Verify upgrade scenario log.info("*******Verifying the Agent Upgrade scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - upgrade_version: str = "1.5.1.0" + upgrade_version: str = "2.3.15.1" log.info("Attempting upgrade version %s", upgrade_version) self._request_rsm_update(upgrade_version) self._check_rsm_gs(upgrade_version) self._verify_guest_agent_update(upgrade_version) self._verify_agent_reported_update_status(upgrade_version) - # verify no version update. There is bug in CRP and will enable once it's fixed + # verify no version update. log.info("*******Verifying the no version update scenario*******") stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) - version: str = "1.5.1.0" - log.info("Attempting update version same as current version %s", upgrade_version) - self._request_rsm_update(version) - self._check_rsm_gs(version) - self._verify_guest_agent_update(version) - self._verify_agent_reported_update_status(version) + current_version: str = "2.3.15.1" + log.info("Attempting update version same as current version %s", current_version) + self._request_rsm_update(current_version) + self._check_rsm_gs(current_version) + self._verify_guest_agent_update(current_version) + self._verify_agent_reported_update_status(current_version) # verify requested version below daemon version + # All the daemons set to 2.2.53, so requesting version below daemon version log.info("*******Verifying requested version below daemon version scenario*******") - # changing daemon version to 1.5.0.1 from 1.0.0.0 as there is no pkg below than 1.0.0.0 available in PIR, Otherwise we will get pkg not found error - self._prepare_agent("1.5.0.1", update_config=False) stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info("Current agent version running on the vm before update is \n%s", stdout) version: str = "1.5.0.0" @@ -126,23 +124,27 @@ def run(self) -> None: def _check_rsm_gs(self, requested_version: str) -> None: # This checks if RSM GS available to the agent after we send the rsm update request - log.info('Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') - self._run_remote_test(self._ssh_client, f"agent_update-wait_for_rsm_gs.py --version {requested_version}", use_sudo=True) + log.info( + 'Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') + self._run_remote_test(self._ssh_client, f"agent_update-wait_for_rsm_gs.py --version {requested_version}", + use_sudo=True) log.info('Verified latest GS contain requested version after rsm update requested') - def _prepare_agent(self, daemon_version="1.0.0.0", update_config=True) -> None: + def _prepare_agent(self) -> None: """ This method is to ensure agent is ready for accepting rsm updates. As part of that we update following flags 1) Changing daemon version since daemon has a hard check on agent version in order to update agent. It doesn't allow versions which are less than daemon version. 2) Updating GAFamily type "Test" and GAUpdates flag to process agent updates on test versions. """ - log.info('Executing modify_agent_version remote script to update agent installed version to lower than requested version') - self._run_remote_test(self._ssh_client, f"agent_update-modify_agent_version {daemon_version}", use_sudo=True) - log.info('Successfully updated agent installed version') - if update_config: - log.info('Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') - self._run_remote_test(self._ssh_client, "update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) - log.info('Successfully updated agent update config') + log.info( + 'Executing modify_agent_version remote script to update agent installed version to lower than requested version') + output: str = self._ssh_client.run_command("agent_update-modify_agent_version 2.2.53", use_sudo=True) + log.info('Successfully updated agent installed version \n%s', output) + log.info( + 'Executing update-waagent-conf remote script to update agent update config flags to allow and download test versions') + output: str = self._ssh_client.run_command( + "update-waagent-conf AutoUpdate.UpdateToLatestVersion=y Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Successfully updated agent update config \n %s', output) @staticmethod def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: @@ -186,7 +188,9 @@ def _request_rsm_update(self, requested_version: str) -> None: # Later this api call will be replaced by azure-python-sdk wrapper base_url = cloud.endpoints.resource_manager url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ - "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, self._context.vm.resource_group, self._context.vm.name) + "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, + self._context.vm.resource_group, + self._context.vm.name) data = { "target": "Microsoft.OSTCLinuxAgent.Test", "targetVersion": requested_version @@ -197,12 +201,14 @@ def _request_rsm_update(self, requested_version: str) -> None: if response.status_code == 202: log.info("RSM upgrade request accepted") else: - raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format(response.status_code, response.content)) + raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format( + response.status_code, response.content)) def _verify_guest_agent_update(self, requested_version: str) -> None: """ Verify current agent version running on rsm requested version """ + def _check_agent_version(requested_version: str) -> bool: waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) expected_version = f"Goal state agent: {requested_version}" @@ -216,9 +222,11 @@ def _check_agent_version(requested_version: str) -> bool: success: bool = retry_if_false(lambda: _check_agent_version(requested_version)) if not success: fail("Guest agent didn't update to requested version {0} but found \n {1}. \n " - "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format(requested_version, waagent_version)) + "To debug verify if CRP has upgrade operation around that time and also check if agent log has any errors ".format( + requested_version, waagent_version)) waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - log.info(f"Successfully verified agent updated to requested version. Current agent version running:\n {waagent_version}") + log.info( + f"Successfully verified agent updated to requested version. Current agent version running:\n {waagent_version}") def _verify_no_guest_agent_update(self, version: str) -> None: """ @@ -226,7 +234,8 @@ def _verify_no_guest_agent_update(self, version: str) -> None: """ log.info("Verifying no update happened to agent") current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True) - assert_that(current_agent).does_not_contain(version).described_as(f"Agent version changed.\n Current agent {current_agent}") + assert_that(current_agent).does_not_contain(version).described_as( + f"Agent version changed.\n Current agent {current_agent}") log.info("Verified agent was not updated to requested version") def _verify_agent_reported_supported_feature_flag(self): @@ -234,7 +243,8 @@ def _verify_agent_reported_supported_feature_flag(self): RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log """ - log.info("Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") + log.info( + "Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") self._run_remote_test(self._ssh_client, "agent_update-verify_versioning_supported_feature.py", use_sudo=True) log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag") @@ -243,8 +253,11 @@ def _verify_agent_reported_update_status(self, version: str): Verify if the agent reported update status to CRP after update performed """ - log.info("Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format(version)) - self._run_remote_test(self._ssh_client, f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) + log.info( + "Executing verify_agent_reported_update_status.py remote script to verify agent reported update status for version {0}".format( + version)) + self._run_remote_test(self._ssh_client, + f"agent_update-verify_agent_reported_update_status.py --version {version}", use_sudo=True) log.info("Successfully Agent reported update status for version {0}".format(version)) def _retrieve_installed_agent_version(self): @@ -258,7 +271,8 @@ def _retrieve_installed_agent_version(self): if match: self._installed_agent_version = match.groups()[0] else: - log.warning("Unable to retrieve installed agent version and set to default value {0}".format(self._installed_agent_version)) + log.warning("Unable to retrieve installed agent version and set to default value {0}".format( + self._installed_agent_version)) if __name__ == "__main__": diff --git a/tests_e2e/tests/scripts/agent_update-modify_agent_version b/tests_e2e/tests/scripts/agent_update-modify_agent_version index c8011e0094..68cb017d58 100755 --- a/tests_e2e/tests/scripts/agent_update-modify_agent_version +++ b/tests_e2e/tests/scripts/agent_update-modify_agent_version @@ -32,6 +32,4 @@ echo "Agent's Python: $PYTHON" # some distros return .pyc byte file instead source file .py. So, I retrieve parent directory first. version_file_dir=$($PYTHON -c 'import azurelinuxagent.common.version as v; import os; print(os.path.dirname(v.__file__))') version_file_full_path="$version_file_dir/version.py" -sed -E -i "s/AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '$version'/" $version_file_full_path -echo "Restarting service..." -agent-service restart \ No newline at end of file +sed -E -i "s/^AGENT_VERSION\s+=\s+'[0-9.]+'/AGENT_VERSION = '$version'/" $version_file_full_path \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py index 5da40cb423..6f12478612 100755 --- a/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py +++ b/tests_e2e/tests/scripts/agent_update-verify_agent_reported_update_status.py @@ -36,8 +36,7 @@ def check_agent_reported_update_status(expected_version: str) -> bool: with open(file, 'r') as f: data = json.load(f) log.info("Agent status file is %s and it's content %s", file, data) - status = data["__status__"] - guest_agent_status = status["aggregateStatus"]["guestAgentStatus"] + guest_agent_status = data["aggregateStatus"]["guestAgentStatus"] if "updateStatus" in guest_agent_status.keys(): if guest_agent_status["updateStatus"]["expectedVersion"] == expected_version: log.info("we found the expected version %s in agent status file", expected_version) diff --git a/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py index 8627111a3a..d876033b67 100755 --- a/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py +++ b/tests_e2e/tests/scripts/agent_update-verify_versioning_supported_feature.py @@ -33,8 +33,7 @@ def check_agent_supports_versioning() -> bool: with open(file, 'r') as f: data = json.load(f) log.info("Agent status file is %s and it's content %s", file, data) - status = data["__status__"] - supported_features = status["supportedFeatures"] + supported_features = data["supportedFeatures"] for supported_feature in supported_features: if supported_feature["Key"] == "VersioningGovernance": return True diff --git a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py index c65047903a..832e0fd64e 100755 --- a/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py +++ b/tests_e2e/tests/scripts/agent_update-wait_for_rsm_gs.py @@ -35,7 +35,7 @@ def get_requested_version(gs: GoalState) -> str: raise Exception( u"No manifest links found for agent family Test, skipping agent update verification") manifest = agent_family_manifests[0] - if manifest.is_requested_version_specified and manifest.version is not None: + if manifest.version is not None: return str(manifest.version) return "" From e97d63b7232728ec0f82bb2571949d4dbb359979 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 7 Feb 2024 15:42:52 -0800 Subject: [PATCH 147/240] Check for 'Access denied' errors when testing SSH connectivity (#3042) Co-authored-by: narrieta --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 2a7241d78b..f432c2d4c1 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -491,7 +491,7 @@ def _check_ssh_connectivity(ssh_client: SshClient) -> None: break except CommandError as error: # Check for "System is booting up. Unprivileged users are not permitted to log in yet. Please come back later. For technical details, see pam_nologin(8)." - if "Unprivileged users are not permitted to log in yet" not in error.stderr: + if not any(m in error.stderr for m in ["Unprivileged users are not permitted to log in yet", "Permission denied"]): raise if attempt >= max_attempts - 1: raise Exception(f"SSH connectivity check failed after {max_attempts} attempts, giving up [{error}]") From 8bc876d9d83ed33b0eb394e216f59e9424bc9f30 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 7 Feb 2024 15:56:38 -0800 Subject: [PATCH 148/240] Add Ubuntu 24 to end-to-end tests (#3041) * Add Ubuntu 24 to end-to-end tests * disable AzureMonitorLinuxAgent --------- Co-authored-by: narrieta --- tests_e2e/test_suites/images.yml | 7 +++++++ tests_e2e/tests/lib/vm_extension_identifier.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index f75b9be891..03c1bfd77c 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -28,6 +28,7 @@ image-sets: - "ubuntu_1804" - "ubuntu_2004" - "ubuntu_2204" + - "ubuntu_2404" # Endorsed distros (ARM64) that are tested on the daily runs endorsed-arm64: @@ -177,3 +178,9 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] + ubuntu_2404: + # TODO: Currently using the daily build, update to the release build once it is available + urn: "Canonical 0001-com-ubuntu-server-noble-daily 24_04-daily-lts-gen2 latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] diff --git a/tests_e2e/tests/lib/vm_extension_identifier.py b/tests_e2e/tests/lib/vm_extension_identifier.py index afbee1e245..9a11e43529 100644 --- a/tests_e2e/tests/lib/vm_extension_identifier.py +++ b/tests_e2e/tests/lib/vm_extension_identifier.py @@ -34,7 +34,7 @@ def __init__(self, publisher: str, ext_type: str, version: str): unsupported_distros: Dict[str, List[str]] = { "Microsoft.OSTCExtensions.VMAccessForLinux": ["flatcar"], - "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar", "mariner_1"] + "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent": ["flatcar", "mariner_1", "ubuntu_2404"] } def supports_distro(self, system_info: str) -> bool: From 32e61e141634e4028dd07415a8a747c87859f5a0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 7 Feb 2024 16:05:24 -0800 Subject: [PATCH 149/240] Skip capture of VM information on test runs (#3043) Co-authored-by: narrieta --- tests_e2e/orchestrator/docker/Dockerfile | 2 +- tests_e2e/orchestrator/lib/agent_junit.py | 1 + .../orchestrator/lib/agent_test_suite_combinator.py | 10 +++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index 33ef0b44bd..597e57418b 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -67,7 +67,7 @@ RUN \ cd $HOME && \ git clone https://github.com/microsoft/lisa.git && \ cd lisa && \ - git checkout 7396cbd6d9b31a99b13c184f735ce9827334f21b && \ + git checkout 2c16e32001fdefb9572dff61241451b648259dbf && \ \ python3 -m pip install --upgrade pip && \ python3 -m pip install --editable .[azure,libvirt] --config-settings editable_mode=compat && \ diff --git a/tests_e2e/orchestrator/lib/agent_junit.py b/tests_e2e/orchestrator/lib/agent_junit.py index 47a5e7d697..2e09c73d75 100644 --- a/tests_e2e/orchestrator/lib/agent_junit.py +++ b/tests_e2e/orchestrator/lib/agent_junit.py @@ -37,6 +37,7 @@ @dataclass class AgentJUnitSchema(schema.Notifier): path: str = "agent.junit.xml" + include_subtest: bool = True class AgentJUnit(JUnit): diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 4b650e8641..ffecaf3630 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -276,7 +276,8 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: "c_env_name": self.runbook.vm_name, "c_platform": [ { - "type": "ready" + "type": "ready", + "capture_vm_information": False } ], "c_environment": { @@ -331,7 +332,8 @@ def create_existing_vmss_environment(self) -> Dict[str, Any]: }, "c_platform": [ { - "type": "ready" + "type": "ready", + "capture_vm_information": False } ], "c_location": self.runbook.location, @@ -356,6 +358,7 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, "admin_username": self.runbook.user, "admin_private_key_file": self.runbook.identity_file, "keep_environment": self.runbook.keep_environment, + "capture_vm_information": False, "azure": { "deploy": True, "cloud": self.runbook.cloud, @@ -406,7 +409,8 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio return { "c_platform": [ { - "type": "ready" + "type": "ready", + "capture_vm_information": False } ], From 694e41d15c0554b4e5e7dd2799b6a8745ba28d9b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 7 Feb 2024 19:43:00 -0800 Subject: [PATCH 150/240] Create symlink for waagent.com on Flatcar (#3045) Co-authored-by: narrieta --- tests_e2e/orchestrator/scripts/install-agent | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 94f850484f..61181b44d3 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -131,6 +131,17 @@ if grep -q "Extensions.Enabled=n" $waagent_conf_path; then fi fi +# +# TODO: Remove this block once the symlink is created in the Flatcar image +# +# Currently, the Agent looks for /usr/share/oem/waagent.conf, but new Flatcar images use /etc/waagent.conf. Flatcar will create +# this symlink in new images, but we need to create it for now. +if [[ $(uname -a) == *"flatcar"* ]]; then + if [[ ! -f /usr/share/oem/waagent.conf ]]; then + ln -s "$waagent_conf_path" /usr/share/oem/waagent.conf + fi +fi + # # Restart the service # From 7521421a568b1ce48080f0dc3557be805b13627b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:42:25 -0800 Subject: [PATCH 151/240] don't allow agent update if attempts reached max limit (#3033) * set max update attempts * download refactor * pylint --- azurelinuxagent/ga/agent_update_handler.py | 19 +- azurelinuxagent/ga/ga_version_updater.py | 47 ++++- azurelinuxagent/ga/guestagent.py | 114 ++++++----- azurelinuxagent/ga/rsm_version_updater.py | 8 - .../ga/self_update_version_updater.py | 7 - azurelinuxagent/ga/update.py | 14 -- ... => WALinuxAgent-9.9.9.10-no_manifest.zip} | Bin tests/data/wire/ga_manifest.xml | 2 +- tests/ga/test_agent_update_handler.py | 72 ++++++- tests/ga/test_guestagent.py | 179 +++++++++--------- tests/ga/test_update.py | 21 -- 11 files changed, 277 insertions(+), 206 deletions(-) rename tests/data/ga/{WALinuxAgent-9.9.9.9-no_manifest.zip => WALinuxAgent-9.9.9.10-no_manifest.zip} (100%) diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 02549fa5a9..8caec10873 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -182,12 +182,23 @@ def run(self, goal_state, ext_gs_updated): if not self._updater.is_retrieved_version_allowed_to_update(agent_family): return self._updater.log_new_agent_update_message() - self._updater.purge_extra_agents_from_disk() agent = self._updater.download_and_get_new_agent(self._protocol, agent_family, goal_state) - if agent.is_blacklisted or not agent.is_downloaded: - msg = "Downloaded agent version is in bad state : {0} , skipping agent update".format( - str(agent.version)) + + # Below condition is to break the update loop if new agent is in bad state in previous attempts + # If the bad agent update already attempted 3 times, we don't want to continue with update anymore. + # Otherewise we allow the update by increment the update attempt count and clear the bad state to make good agent + # [Note: As a result, it is breaking contract between RSM and agent, we may NOT honor the RSM retries for that version] + if agent.get_update_attempt_count() >= 3: + msg = "Attempted enough update retries for version: {0} but still agent not recovered from bad state. So, we stop updating to this version".format(str(agent.version)) raise AgentUpdateError(msg) + else: + agent.clear_error() + agent.inc_update_attempt_count() + msg = "Agent update attempt count: {0} for version: {1}".format(agent.get_update_attempt_count(), str(agent.version)) + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + + self._updater.purge_extra_agents_from_disk() self._updater.proceed_with_update() except Exception as err: diff --git a/azurelinuxagent/ga/ga_version_updater.py b/azurelinuxagent/ga/ga_version_updater.py index 46ae1f31f1..82a621eacf 100644 --- a/azurelinuxagent/ga/ga_version_updater.py +++ b/azurelinuxagent/ga/ga_version_updater.py @@ -26,8 +26,8 @@ from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN -from azurelinuxagent.ga.guestagent import GuestAgent +from azurelinuxagent.common.version import AGENT_NAME, AGENT_DIR_PATTERN, CURRENT_VERSION +from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE class GAVersionUpdater(object): @@ -77,12 +77,6 @@ def log_new_agent_update_message(self): """ raise NotImplementedError - def purge_extra_agents_from_disk(self): - """ - Method remove the extra agents from disk. - """ - raise NotImplementedError - def proceed_with_update(self): """ performs upgrade/downgrade @@ -104,6 +98,33 @@ def sync_new_gs_id(self, gs_id): """ self._gs_id = gs_id + @staticmethod + def download_new_agent_pkg(package_to_download, protocol, is_fast_track_goal_state): + """ + Function downloads the new agent. + @param package_to_download: package to download + @param protocol: protocol object + @param is_fast_track_goal_state: True if goal state is fast track else False + """ + agent_name = "{0}-{1}".format(AGENT_NAME, package_to_download.version) + agent_dir = os.path.join(conf.get_lib_dir(), agent_name) + agent_pkg_path = ".".join((os.path.join(conf.get_lib_dir(), agent_name), "zip")) + agent_handler_manifest_file = os.path.join(agent_dir, AGENT_MANIFEST_FILE) + if not os.path.exists(agent_dir) or not os.path.isfile(agent_handler_manifest_file): + protocol.client.download_zip_package("agent package", package_to_download.uris, agent_pkg_path, agent_dir, use_verify_header=is_fast_track_goal_state) + else: + logger.info("Agent {0} was previously downloaded - skipping download", agent_name) + + if not os.path.isfile(agent_handler_manifest_file): + try: + # Clean up the agent directory if the manifest file is missing + logger.info("Agent handler manifest file is missing, cleaning up the agent directory: {0}".format(agent_dir)) + if os.path.isdir(agent_dir): + shutil.rmtree(agent_dir, ignore_errors=True) + except Exception as err: + logger.warn("Unable to delete Agent directory: {0}".format(err)) + raise AgentUpdateError("Downloaded agent package: {0} is missing agent handler manifest file: {1}".format(agent_name, agent_handler_manifest_file)) + def download_and_get_new_agent(self, protocol, agent_family, goal_state): """ Function downloads the new agent and returns the downloaded version. @@ -116,9 +137,17 @@ def download_and_get_new_agent(self, protocol, agent_family, goal_state): self._agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) package_to_download = self._get_agent_package_to_download(self._agent_manifest, self._version) is_fast_track_goal_state = goal_state.extensions_goal_state.source == GoalStateSource.FastTrack - agent = GuestAgent.from_agent_package(package_to_download, protocol, is_fast_track_goal_state) + self.download_new_agent_pkg(package_to_download, protocol, is_fast_track_goal_state) + agent = GuestAgent.from_agent_package(package_to_download) return agent + def purge_extra_agents_from_disk(self): + """ + Remove the agents from disk except current version and new agent version + """ + known_agents = [CURRENT_VERSION, self._version] + self._purge_unknown_agents_from_disk(known_agents) + def _get_agent_package_to_download(self, agent_manifest, version): """ Returns the package of the given Version found in the manifest. If not found, returns exception diff --git a/azurelinuxagent/ga/guestagent.py b/azurelinuxagent/ga/guestagent.py index 35472c74a6..b4b2d05b32 100644 --- a/azurelinuxagent/ga/guestagent.py +++ b/azurelinuxagent/ga/guestagent.py @@ -10,29 +10,26 @@ from azurelinuxagent.common import logger, conf from azurelinuxagent.common.exception import UpdateError from azurelinuxagent.common.utils.flexible_version import FlexibleVersion -from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME, CURRENT_VERSION +from azurelinuxagent.common.version import AGENT_DIR_PATTERN, AGENT_NAME from azurelinuxagent.ga.exthandlers import HandlerManifest AGENT_ERROR_FILE = "error.json" # File name for agent error record AGENT_MANIFEST_FILE = "HandlerManifest.json" MAX_FAILURE = 3 # Max failure allowed for agent before declare bad agent +AGENT_UPDATE_COUNT_FILE = "update_attempt.json" # File for tracking agent update attempt count class GuestAgent(object): - def __init__(self, path, pkg, protocol, is_fast_track_goal_state): + def __init__(self, path, pkg): """ If 'path' is given, the object is initialized to the version installed under that path. If 'pkg' is given, the version specified in the package information is downloaded and the object is initialized to that version. - 'is_fast_track_goal_state' and 'protocol' are used only when a package is downloaded. - NOTE: Prefer using the from_installed_agent and from_agent_package methods instead of calling __init__ directly """ - self._is_fast_track_goal_state = is_fast_track_goal_state self.pkg = pkg - self._protocol = protocol version = None if path is not None: m = AGENT_DIR_PATTERN.match(path) @@ -52,11 +49,13 @@ def __init__(self, path, pkg, protocol, is_fast_track_goal_state): self.error = GuestAgentError(self.get_agent_error_file()) self.error.load() + self.update_attempt_data = GuestAgentUpdateAttempt(self.get_agent_update_count_file()) + self.update_attempt_data.load() + try: - self._ensure_downloaded() self._ensure_loaded() except Exception as e: - # If we're unable to download/unpack the agent, delete the Agent directory + # If we're unable to unpack the agent, delete the Agent directory try: if os.path.isdir(self.get_agent_dir()): shutil.rmtree(self.get_agent_dir(), ignore_errors=True) @@ -77,14 +76,14 @@ def from_installed_agent(path): """ Creates an instance of GuestAgent using the agent installed in the given 'path'. """ - return GuestAgent(path, None, None, False) + return GuestAgent(path, None) @staticmethod - def from_agent_package(package, protocol, is_fast_track_goal_state): + def from_agent_package(package): """ Creates an instance of GuestAgent using the information provided in the 'package'; if that version of the agent is not installed it, it installs it. """ - return GuestAgent(None, package, protocol, is_fast_track_goal_state) + return GuestAgent(None, package) @property def name(self): @@ -99,6 +98,9 @@ def get_agent_dir(self): def get_agent_error_file(self): return os.path.join(conf.get_lib_dir(), self.name, AGENT_ERROR_FILE) + def get_agent_update_count_file(self): + return os.path.join(conf.get_lib_dir(), self.name, AGENT_UPDATE_COUNT_FILE) + def get_agent_manifest_path(self): return os.path.join(self.get_agent_dir(), AGENT_MANIFEST_FILE) @@ -136,45 +138,20 @@ def mark_failure(self, is_fatal=False, reason=''): except Exception as e: logger.warn(u"Agent {0} failed recording error state: {1}", self.name, ustr(e)) - def _ensure_downloaded(self): - logger.verbose(u"Ensuring Agent {0} is downloaded", self.name) - - if self.is_downloaded: - logger.verbose(u"Agent {0} was previously downloaded - skipping download", self.name) - return - - if self.pkg is None: - raise UpdateError(u"Agent {0} is missing package and download URIs".format( - self.name)) - - self._download() + def inc_update_attempt_count(self): + try: + self.update_attempt_data.inc_count() + self.update_attempt_data.save() + except Exception as e: + logger.warn(u"Agent {0} failed recording update attempt: {1}", self.name, ustr(e)) - msg = u"Agent {0} downloaded successfully".format(self.name) - logger.verbose(msg) - add_event( - AGENT_NAME, - version=self.version, - op=WALAEventOperation.Install, - is_success=True, - message=msg) + def get_update_attempt_count(self): + return self.update_attempt_data.count def _ensure_loaded(self): self._load_manifest() self._load_error() - def _download(self): - try: - self._protocol.client.download_zip_package("agent package", self.pkg.uris, self.get_agent_pkg_path(), self.get_agent_dir(), use_verify_header=self._is_fast_track_goal_state) - except Exception as exception: - msg = "Unable to download Agent {0}: {1}".format(self.name, ustr(exception)) - add_event( - AGENT_NAME, - op=WALAEventOperation.Download, - version=CURRENT_VERSION, - is_success=False, - message=msg) - raise UpdateError(msg) - def _load_error(self): try: self.error = GuestAgentError(self.get_agent_error_file()) @@ -303,3 +280,52 @@ def __str__(self): self.failure_count, self.was_fatal, self.reason) + + +class GuestAgentUpdateAttempt(object): + def __init__(self, path): + self.count = 0 + if path is None: + raise UpdateError(u"GuestAgentUpdateAttempt requires a path") + self.path = path + + self.clear() + + def inc_count(self): + self.count += 1 + + def clear(self): + self.count = 0 + + def load(self): + if self.path is not None and os.path.isfile(self.path): + try: + with open(self.path, 'r') as f: + self.from_json(json.load(f)) + except Exception as error: + # The update_attempt.json file is only supposed to be written only by the agent. + # If for whatever reason the file is malformed, just delete it to reset state of the errors. + logger.warn( + "Ran into error when trying to load error file {0}, deleting it to clean state. Error: {1}".format( + self.path, textutil.format_exception(error))) + try: + os.remove(self.path) + except Exception: + # We try best case efforts to delete the file, ignore error if we're unable to do so + pass + + def save(self): + if os.path.isdir(os.path.dirname(self.path)): + with open(self.path, 'w') as f: + json.dump(self.to_json(), f) + + def from_json(self, data): + self.count = data.get(u"count", 0) + + def to_json(self): + data = { + u"count": self.count + } + return data + + diff --git a/azurelinuxagent/ga/rsm_version_updater.py b/azurelinuxagent/ga/rsm_version_updater.py index a7a8bd97d1..366f1d703e 100644 --- a/azurelinuxagent/ga/rsm_version_updater.py +++ b/azurelinuxagent/ga/rsm_version_updater.py @@ -104,14 +104,6 @@ def log_new_agent_update_message(self): logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - def purge_extra_agents_from_disk(self): - """ - Remove the agents( including rsm version if exists) from disk except current version. There is a chance that rsm version could exist and/or blacklisted - on previous update attempts. So we should remove it from disk in order to honor current rsm version update. - """ - known_agents = [CURRENT_VERSION] - self._purge_unknown_agents_from_disk(known_agents) - def proceed_with_update(self): """ upgrade/downgrade to the new version. diff --git a/azurelinuxagent/ga/self_update_version_updater.py b/azurelinuxagent/ga/self_update_version_updater.py index 6605a28eb3..5a839851de 100644 --- a/azurelinuxagent/ga/self_update_version_updater.py +++ b/azurelinuxagent/ga/self_update_version_updater.py @@ -171,13 +171,6 @@ def log_new_agent_update_message(self): logger.info(msg) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - def purge_extra_agents_from_disk(self): - """ - Remove the agents from disk except current version and new agent version if exists - """ - known_agents = [CURRENT_VERSION, self._version] - self._purge_unknown_agents_from_disk(known_agents) - def proceed_with_update(self): """ upgrade to largest version. Downgrade is not supported. diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index f2c20b0f75..a8d34f7c40 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -372,7 +372,6 @@ def run(self, debug=False): self._ensure_extension_telemetry_state_configured_properly(protocol) self._ensure_firewall_rules_persisted(dst_ip=protocol.get_endpoint()) self._add_accept_tcp_firewall_rule_if_not_enabled(dst_ip=protocol.get_endpoint()) - self._reset_legacy_blacklisted_agents() self._cleanup_legacy_goal_state_history() # Get all thread handlers @@ -1199,16 +1198,3 @@ def _execute_run_command(command): except Exception as e: msg = "Error while checking ip table rules:{0}".format(ustr(e)) logger.error(msg) - - def _reset_legacy_blacklisted_agents(self): - # Reset the state of all blacklisted agents that were blacklisted by legacy agents (i.e. not during auto-update) - - # Filter legacy agents which are blacklisted but do not contain a `reason` in their error.json files - # (this flag signifies that this agent was blacklisted by the newer agents). - try: - legacy_blacklisted_agents = [agent for agent in self._load_agents() if - agent.is_blacklisted and agent.error.reason == ''] - for agent in legacy_blacklisted_agents: - agent.clear_error() - except Exception as err: - logger.warn("Unable to reset legacy blacklisted agents due to: {0}".format(err)) diff --git a/tests/data/ga/WALinuxAgent-9.9.9.9-no_manifest.zip b/tests/data/ga/WALinuxAgent-9.9.9.10-no_manifest.zip similarity index 100% rename from tests/data/ga/WALinuxAgent-9.9.9.9-no_manifest.zip rename to tests/data/ga/WALinuxAgent-9.9.9.10-no_manifest.zip diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml index 799e1f111c..c51bdbbc48 100644 --- a/tests/data/wire/ga_manifest.xml +++ b/tests/data/wire/ga_manifest.xml @@ -31,7 +31,7 @@ 9.9.9.10 - http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0 + http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__9.9.9.10 diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index fc58a6ee25..82fcd362df 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -9,8 +9,9 @@ from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses from azurelinuxagent.common.protocol.util import ProtocolUtil -from azurelinuxagent.common.version import CURRENT_VERSION +from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler +from azurelinuxagent.ga.guestagent import GuestAgent from tests.ga.test_update import UpdateTestCase from tests.lib.http_request_predicates import HttpRequestPredicates from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse @@ -27,7 +28,7 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) @contextlib.contextmanager - def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False): + def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None): # Default to DATA_FILE of test_data parameter raises the pylint warning # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) test_data = DATA_FILE if test_data is None else test_data @@ -51,7 +52,10 @@ def put_handler(url, *args, **_): protocol.aggregate_status = json.loads(args[0]) return MockHttpResponse(status=201) - protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + http_get_handler = mock_get_header if mock_get_header else get_handler + http_put_handler = mock_put_header if mock_put_header else put_handler + + protocol.set_http_handlers(http_get_handler=http_get_handler, http_put_handler=http_put_handler) with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): @@ -401,7 +405,7 @@ def test_it_should_report_update_status_with_error_on_download_fail(self): self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) self.assertEqual(1, vm_agent_update_status.code) self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) - self.assertIn("Downloaded agent version is in bad state", vm_agent_update_status.message) + self.assertIn("Failed to download agent package from all URIs", vm_agent_update_status.message) def test_it_should_report_update_status_with_missing_rsm_version_error(self): data_file = DATA_FILE.copy() @@ -470,3 +474,63 @@ def test_it_should_not_update_to_latest_if_flag_is_disabled(self): agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + + def test_it_should_continue_with_update_if_number_of_update_attempts_less_than_3(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + + latest_version = self.prepare_agents(count=2) + self.expand_agents() + latest_path = os.path.join(self.tmp_dir, "{0}-{1}".format(AGENT_NAME, latest_version)) + agent = GuestAgent.from_installed_agent(latest_path) + # marking agent as bad agent on first attempt + agent.mark_failure(is_fatal=True) + agent.inc_update_attempt_count() + self.assertTrue(agent.is_blacklisted, "Agent should be blacklisted") + self.assertEqual(1, agent.get_update_attempt_count(), "Agent update attempts should be 1") + with self._get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + # Rest 2 attempts it should continue with update even agent is marked as bad agent in first attempt + for i in range(2): + with self.assertRaises(AgentUpgradeExitException): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family( + str(latest_version)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(i+2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), str(latest_version)]) + agent = GuestAgent.from_installed_agent(latest_path) + self.assertFalse(agent.is_blacklisted, "Agent should not be blacklisted") + self.assertEqual(i+2, agent.get_update_attempt_count(), "Agent update attempts should be {0}".format(i+2)) + + # check if next update is not attempted + agent.mark_failure(is_fatal=True) + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + agent = GuestAgent.from_installed_agent(latest_path) + self.assertTrue(agent.is_blacklisted, "Agent should be blacklisted") + self.assertEqual(3, agent.get_update_attempt_count(), "Agent update attempts should be 3") + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "Attempted enough update retries for version: {0} but still agent not recovered from bad state".format(latest_version) in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), + "Update is not allowed after 3 attempts") + + def test_it_should_fail_the_update_if_agent_pkg_is_invalid(self): + agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.10' + + def http_get_handler(uri, *_, **__): + if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): + response = load_bin_data("ga/WALinuxAgent-9.9.9.10-no_manifest.zip") + return MockHttpResponse(status=httpclient.OK, body=response) + return None + self.prepare_agents(count=1) + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + with self._get_agent_update_handler(test_data=data_file, mock_get_header=http_get_handler) as (agent_update_handler, mock_telemetry): + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family("9.9.9.10") + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)]) + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "Downloaded agent package: WALinuxAgent-9.9.9.10 is missing agent handler manifest file" in kwarg['message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent update should fail") diff --git a/tests/ga/test_guestagent.py b/tests/ga/test_guestagent.py index 3d25ff7921..972e603c2c 100644 --- a/tests/ga/test_guestagent.py +++ b/tests/ga/test_guestagent.py @@ -1,17 +1,14 @@ +import contextlib import json import os +import tempfile from azurelinuxagent.common import conf from azurelinuxagent.common.exception import UpdateError from azurelinuxagent.ga.guestagent import GuestAgent, AGENT_MANIFEST_FILE, AGENT_ERROR_FILE, GuestAgentError, \ - MAX_FAILURE -from azurelinuxagent.common.future import httpclient -from azurelinuxagent.common.protocol.restapi import ExtHandlerPackage + MAX_FAILURE, GuestAgentUpdateAttempt from azurelinuxagent.common.version import AGENT_NAME from tests.ga.test_update import UpdateTestCase, EMPTY_MANIFEST, WITH_ERROR, NO_ERROR -from tests.lib import wire_protocol_data -from tests.lib.mock_wire_protocol import MockHttpResponse, mock_wire_protocol -from tests.lib.tools import load_bin_data, patch class TestGuestAgent(UpdateTestCase): @@ -102,6 +99,22 @@ def test_mark_failure(self): self.assertEqual(2, agent.error.failure_count) self.assertTrue(agent.is_blacklisted) + def test_inc_update_attempt_count(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.inc_update_attempt_count() + self.assertEqual(1, agent.update_attempt_data.count) + + agent.inc_update_attempt_count() + self.assertEqual(2, agent.update_attempt_data.count) + + def test_get_update_count(self): + agent = GuestAgent.from_installed_agent(self.agent_path) + agent.inc_update_attempt_count() + self.assertEqual(1, agent.get_update_attempt_count()) + + agent.inc_update_attempt_count() + self.assertEqual(2, agent.get_update_attempt_count()) + def test_load_manifest(self): self.expand_agents() agent = GuestAgent.from_installed_agent(self.agent_path) @@ -140,94 +153,6 @@ def test_load_error(self): agent._load_error() self.assertTrue(agent.error is not None) - def test_download(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri == agent_uri: - response = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir) - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertTrue(os.path.isdir(agent.get_agent_dir())) - self.assertTrue(agent.is_downloaded) - - def test_download_fail(self): - self.remove_agents() - self.assertFalse(os.path.isdir(self.agent_path)) - - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__1.0.0' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - return MockHttpResponse(status=httpclient.SERVICE_UNAVAILABLE) - return None - - agent_version = self._get_agent_version() - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(agent_uri) - - with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with patch("azurelinuxagent.ga.guestagent.add_event") as add_event: - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(os.path.isfile(self.agent_path)) - - messages = [kwargs['message'] for _, kwargs in add_event.call_args_list if kwargs['op'] == 'Install' and kwargs['is_success'] == False] - self.assertEqual(1, len(messages), "Expected exactly 1 install error/ Got: {0}".format(add_event.call_args_list)) - self.assertIn(str.format('[UpdateError] Unable to download Agent WALinuxAgent-{0}', agent_version), messages[0], "The install error does not include the expected message") - - self.assertFalse(agent.is_blacklisted, "Download failures should not blacklist the Agent") - - def test_invalid_agent_package_does_not_blacklist_the_agent(self): - agent_uri = 'https://foo.blob.core.windows.net/bar/OSTCExtensions.WALinuxAgent__9.9.9.9' - - def http_get_handler(uri, *_, **__): - if uri in (agent_uri, 'http://168.63.129.16:32526/extensionArtifact'): - response = load_bin_data("ga/WALinuxAgent-9.9.9.9-no_manifest.zip") - return MockHttpResponse(status=httpclient.OK, body=response) - return None - - pkg = ExtHandlerPackage(version="9.9.9.9") - pkg.uris.append(agent_uri) - - with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - agent = GuestAgent.from_agent_package(pkg, protocol, False) - - self.assertFalse(agent.is_blacklisted, "The agent should not be blacklisted if unable to unpack/download") - self.assertFalse(os.path.exists(agent.get_agent_dir()), "Agent directory should be cleaned up") - - @patch("azurelinuxagent.ga.update.GuestAgent._download") - def test_ensure_download_skips_blacklisted(self, mock_download): - agent = GuestAgent.from_installed_agent(self.agent_path) - self.assertEqual(0, mock_download.call_count) - - agent.clear_error() - agent.mark_failure(is_fatal=True) - self.assertTrue(agent.is_blacklisted) - - pkg = ExtHandlerPackage(version=str(self._get_agent_version())) - pkg.uris.append(None) - # _download is mocked so there will be no http request; passing a None protocol - agent = GuestAgent.from_agent_package(pkg, None, False) - - self.assertEqual(1, agent.error.failure_count) - self.assertTrue(agent.error.was_fatal) - self.assertTrue(agent.is_blacklisted) - self.assertEqual(0, mock_download.call_count) - class TestGuestAgentError(UpdateTestCase): def test_creation(self): @@ -308,3 +233,69 @@ def test_str(self): WITH_ERROR["reason"]) self.assertEqual(s, str(err)) return + + +UPDATE_ATTEMPT = { + "count": 2 +} + +NO_ATTEMPT = { + "count": 0 +} + + +class TestGuestAgentUpdateAttempt(UpdateTestCase): + @contextlib.contextmanager + def get_attempt_count_file(self, attempt_count=None): + if attempt_count is None: + attempt_count = NO_ATTEMPT + with tempfile.NamedTemporaryFile(mode="w") as fp: + json.dump(attempt_count, fp) + fp.seek(0) + yield fp + + def test_creation(self): + self.assertRaises(TypeError, GuestAgentUpdateAttempt) + self.assertRaises(UpdateError, GuestAgentUpdateAttempt, None) + + with self.get_attempt_count_file(UPDATE_ATTEMPT) as path: + update_data = GuestAgentUpdateAttempt(path.name) + update_data.load() + self.assertEqual(path.name, update_data.path) + self.assertNotEqual(None, update_data) + + self.assertEqual(UPDATE_ATTEMPT["count"], update_data.count) + + def test_clear(self): + with self.get_attempt_count_file(UPDATE_ATTEMPT) as path: + update_data = GuestAgentUpdateAttempt(path.name) + update_data.load() + self.assertEqual(path.name, update_data.path) + self.assertNotEqual(None, update_data) + + update_data.clear() + self.assertEqual(NO_ATTEMPT["count"], update_data.count) + + def test_save(self): + with self.get_attempt_count_file(UPDATE_ATTEMPT) as path: + update_data = GuestAgentUpdateAttempt(path.name) + update_data.load() + update_data.inc_count() + update_data.save() + + with self.get_attempt_count_file(update_data.to_json()) as path: + new_data = GuestAgentUpdateAttempt(path.name) + new_data.load() + + self.assertEqual(update_data.count, new_data.count) + + def test_inc_count(self): + with self.get_attempt_count_file() as path: + update_data = GuestAgentUpdateAttempt(path.name) + update_data.load() + + self.assertEqual(0, update_data.count) + update_data.inc_count() + self.assertEqual(1, update_data.count) + update_data.inc_count() + self.assertEqual(2, update_data.count) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 079f896bed..58732de5c4 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1340,27 +1340,6 @@ def get_handler(url, **kwargs): "Fetching the goal state recovered from previous errors." in args[0]] self.assertTrue(len(info_msgs) > 0, "Agent should've logged a message when recovered from GS errors") - def test_it_should_reset_legacy_blacklisted_agents_on_process_start(self): - # Add some good agents - self.prepare_agents(count=10) - good_agents = [agent.name for agent in self.agents()] - - # Add a set of blacklisted agents - self.prepare_agents(count=20, is_available=False) - for agent in self.agents(): - # Assert the test environment is correctly set - if agent.name not in good_agents: - self.assertTrue(agent.is_blacklisted, "Agent {0} should be blacklisted".format(agent.name)) - else: - self.assertFalse(agent.is_blacklisted, "Agent {0} should not be blacklisted".format(agent.name)) - - with _get_update_handler(autoupdate_enabled=False) as (update_handler, _): - update_handler.run(debug=True) - self.assertEqual(20, self.agent_count(), "All agents should be available on disk") - # Ensure none of the agents are blacklisted - for agent in self.agents(): - self.assertFalse(agent.is_blacklisted, "Legacy Agent should not be blacklisted") - class TestUpdateWaitForCloudInit(AgentTestCase): @staticmethod From 4f6d98422b22b509ef6430a276bf2c86c083ce9a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:47:20 -0800 Subject: [PATCH 152/240] disable RSM updates (#3044) --- azurelinuxagent/common/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index a94220ecf7..6662285317 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -670,7 +670,7 @@ def get_enable_ga_versioning(conf=__conf__): If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR. NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", True) + return conf.get_switch("Debug.EnableGAVersioning", False) def get_firewall_rules_log_period(conf=__conf__): From b498fd54c2ac49c384b816258c79914297f95f7e Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:54:54 -0800 Subject: [PATCH 153/240] Skip test on alma and rocky until we investigate (#3047) --- tests_e2e/tests/publish_hostname/publish_hostname.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index cc74a596b6..45a7be85f5 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -121,9 +121,11 @@ def retry_ssh_if_connection_reset(self, command: str, use_sudo=False): sleep(30) def run(self): - # TODO: Investigate why hostname is not being published on Ubuntu as expected - if "ubuntu" in self._ssh_client.run_command("get_distro.py").lower(): - raise TestSkipped("Known issue with hostname publishing on ubuntu. Will skip test until we continue " + # TODO: Investigate why hostname is not being published on Ubuntu, alma, and rocky as expected + distros_with_known_publishing_issues = ["ubuntu", "alma", "rocky"] + distro = self._ssh_client.run_command("get_distro.py").lower() + if any(d in distro for d in distros_with_known_publishing_issues): + raise TestSkipped("Known issue with hostname publishing on this distro. Will skip test until we continue " "investigation.") # Add password to VM and log. This allows us to debug with serial console if necessary From 49de9b3fc460a2e89d05cde07877feba253f85e3 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 13 Feb 2024 16:06:53 -0800 Subject: [PATCH 154/240] Ext_sequencing scenario should check agent log for extension enable order (#3049) * Ext_sequencing scenario should check agent log for extension enable order * Format timestamp for ignore errors before timestamp * If test is skipped, scenario start will be datetime min * Remove unnecessary log * Make none check explicit --- .../tests/ext_sequencing/ext_sequencing.py | 14 ++-- .../ext_sequencing-get_ext_enable_time.py | 76 ++++++------------- 2 files changed, 33 insertions(+), 57 deletions(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index e50b0d6abc..528502c94a 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -22,6 +22,7 @@ # validates they are enabled in order of dependencies. # import copy +import random import re import uuid from datetime import datetime @@ -95,10 +96,8 @@ def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensi for ext in extensions: # Only check extensions which succeeded provisioning if "succeeded" in ext.statuses_summary[0].code: - enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext '{extension_full_names[ext.name]}'", use_sudo=True) - formatted_time = datetime.strptime(enabled_time.strip(), u'%Y-%m-%dT%H:%M:%SZ') - if formatted_time < test_case_start: - fail("Extension {0} was not enabled".format(extension_full_names[ext.name])) + enabled_time = ssh_client.run_command(f"ext_sequencing-get_ext_enable_time.py --ext '{extension_full_names[ext.name]}' --after_time '{test_case_start}'", use_sudo=True) + formatted_time = datetime.strptime(enabled_time.strip(), u'%Y-%m-%dT%H:%M:%S.%fZ') enabled_times.append( { "name": ext.name, @@ -184,7 +183,7 @@ def run(self): } for case in self._test_cases: - test_case_start = datetime.now() + test_case_start = random.choice(list(ssh_clients.values())).run_command("date '+%Y-%m-%d %T'").rstrip() if self._scenario_start == datetime.min: self._scenario_start = test_case_start @@ -201,6 +200,7 @@ def run(self): # test out log.info("") log.info("Test case: {0}".format(case.__name__.replace('_', ' '))) + log.info("Test case start time: {0}".format(test_case_start)) ext_template = copy.deepcopy(base_extension_template) ext_template['resources'][0]['properties']['virtualMachineProfile']['extensionProfile'][ 'extensions'] = extensions @@ -255,7 +255,9 @@ def run(self): def get_ignore_errors_before_timestamp(self) -> datetime: # Ignore errors in the agent log before the first test case starts - return self._scenario_start + if self._scenario_start == datetime.min: + return self._scenario_start + return datetime.strptime(self._scenario_start, u'%Y-%m-%d %H:%M:%S') def get_ignore_error_rules(self) -> List[Dict[str, Any]]: ignore_rules = [ diff --git a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py index f65da676be..32bf7bd8bc 100755 --- a/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py +++ b/tests_e2e/tests/scripts/ext_sequencing-get_ext_enable_time.py @@ -20,68 +20,42 @@ # import argparse -import json -import os +import re import sys +from datetime import datetime -from pathlib import Path +from tests_e2e.tests.lib.agent_log import AgentLog def main(): """ - Returns the timestamp of when the provided extension was enabled + Searches the agent log after the provided timestamp to determine when the agent enabled the provided extension. """ parser = argparse.ArgumentParser() parser.add_argument("--ext", dest='ext', required=True) + parser.add_argument("--after_time", dest='after_time', required=True) args, _ = parser.parse_known_args() - # Extension enabled time is in extension extension status file - ext_dirs = [item for item in os.listdir(Path('/var/lib/waagent')) if item.startswith(args.ext)] - if not ext_dirs: - print("Extension {0} directory does not exist".format(args.ext), file=sys.stderr) - sys.exit(1) - ext_status_path = Path('/var/lib/waagent/' + ext_dirs[0] + '/status') - ext_status_files = os.listdir(ext_status_path) - ext_status_files.sort() - if not ext_status_files: - # Extension did not report a status - print("Extension {0} did not report a status".format(args.ext), file=sys.stderr) - sys.exit(1) - latest_ext_status_path = os.path.join(ext_status_path, ext_status_files[-1]) - ext_status_file = open(latest_ext_status_path, 'r') - ext_status = json.loads(ext_status_file.read()) - - # Example status file - # [ - # { - # "status": { - # "status": "success", - # "formattedMessage": { - # "lang": "en-US", - # "message": "Enable succeeded" - # }, - # "operation": "Enable", - # "code": "0", - # "name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent" - # }, - # "version": "1.0", - # "timestampUTC": "2023-12-12T23:14:45Z" - # } - # ] - msg = "" - if len(ext_status) == 0 or not ext_status[0]['status']: - msg = "Extension {0} did not report a status".format(args.ext) - elif not ext_status[0]['status']['operation'] or ext_status[0]['status']['operation'] != 'Enable': - msg = "Extension {0} did not report a status for enable operation".format(args.ext) - elif ext_status[0]['status']['status'] != 'success': - msg = "Extension {0} did not report success for the enable operation".format(args.ext) - elif not ext_status[0]['timestampUTC']: - msg = "Extension {0} did not report the time the enable operation succeeded".format(args.ext) - else: - print(ext_status[0]['timestampUTC']) - sys.exit(0) - - print(msg, file=sys.stderr) + # Only search the agent log after the provided timestamp: args.after_time + after_time = datetime.strptime(args.after_time, u'%Y-%m-%d %H:%M:%S') + # Agent logs for extension enable: 2024-02-09T09:29:08.943529Z INFO ExtHandler [Microsoft.Azure.Extensions.CustomScript-2.1.10] Enable extension: [bin/custom-script-shim enable] + enable_log_regex = r"\[{0}-[.\d]+\] Enable extension: .*".format(args.ext) + + agent_log = AgentLog() + try: + for agent_record in agent_log.read(): + if agent_record.timestamp >= after_time: + # The agent_record prefix for enable logs is the extension name, for example: [Microsoft.Azure.Extensions.CustomScript-2.1.10] + if agent_record.prefix is not None: + ext_enabled = re.match(enable_log_regex, " ".join([agent_record.prefix, agent_record.message])) + + if ext_enabled is not None: + print(agent_record.when) + sys.exit(0) + except IOError as e: + print("Error when parsing agent log: {0}".format(str(e))) + + print("Extension {0} was not enabled after {1}".format(args.ext, args.after_time), file=sys.stderr) sys.exit(1) From 540e0670ff85b67007c19a3fd7e5c962f439f9c5 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:37:55 -0800 Subject: [PATCH 155/240] update canary region (#3056) --- tests_e2e/test_suites/agent_publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/test_suites/agent_publish.yml b/tests_e2e/test_suites/agent_publish.yml index 3ab29c6a0b..8b11eb4e7e 100644 --- a/tests_e2e/test_suites/agent_publish.yml +++ b/tests_e2e/test_suites/agent_publish.yml @@ -7,6 +7,6 @@ tests: images: - "random(endorsed, 10)" - "random(endorsed-arm64, 2)" -locations: "AzureCloud:centraluseuap" +locations: "AzureCloud:eastus2euap" owns_vm: true install_test_agent: false \ No newline at end of file From 0163223620ede1a4e58b5957a0dcc6d7e48089e9 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 16 Feb 2024 10:10:22 -0800 Subject: [PATCH 156/240] Add Python 3.10 to the pylint matrix (#3057) Co-authored-by: narrieta --- .github/workflows/ci_pr.yml | 25 +++---- azurelinuxagent/agent.py | 4 +- azurelinuxagent/common/conf.py | 2 +- azurelinuxagent/common/event.py | 4 +- azurelinuxagent/common/logger.py | 2 +- azurelinuxagent/common/osutil/factory.py | 2 +- azurelinuxagent/common/osutil/freebsd.py | 2 +- azurelinuxagent/common/osutil/gaia.py | 2 +- azurelinuxagent/common/osutil/openwrt.py | 15 +++-- .../common/protocol/healthservice.py | 2 +- azurelinuxagent/common/protocol/wire.py | 8 +-- azurelinuxagent/common/singletonperthread.py | 3 +- azurelinuxagent/common/utils/cryptutil.py | 2 +- .../common/utils/flexible_version.py | 2 +- azurelinuxagent/ga/cgroupconfigurator.py | 65 +------------------ azurelinuxagent/ga/collect_logs.py | 8 +-- .../ga/collect_telemetry_events.py | 8 +-- azurelinuxagent/ga/env.py | 4 +- azurelinuxagent/ga/exthandlers.py | 2 +- azurelinuxagent/ga/monitor.py | 4 +- azurelinuxagent/ga/periodic_operation.py | 2 +- azurelinuxagent/ga/send_telemetry_events.py | 4 +- azurelinuxagent/ga/update.py | 2 +- azurelinuxagent/pa/deprovision/factory.py | 2 +- azurelinuxagent/pa/rdma/factory.py | 3 +- azurelinuxagent/pa/rdma/suse.py | 3 +- ci/3.10.pylintrc | 40 ++++++++++++ .../agent_ext_workflow/extension_workflow.py | 2 +- tests_e2e/tests/agent_update/rsm_update.py | 2 +- .../tests/ext_sequencing/ext_sequencing.py | 6 +- tests_e2e/tests/lib/retry.py | 6 +- .../publish_hostname/publish_hostname.py | 2 +- .../scripts/agent_cpu_quota-start_service.py | 8 +-- ...gent_firewall-verify_all_firewall_rules.py | 3 +- ...firewall-verify_firewalld_rules_readded.py | 3 +- 35 files changed, 120 insertions(+), 134 deletions(-) create mode 100644 ci/3.10.pylintrc diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 84b3ab68e7..153d5392fd 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -14,8 +14,8 @@ jobs: fail-fast: false matrix: include: - - python-version: 2.6 - - python-version: 3.4 + - python-version: "2.6" + - python-version: "3.4" name: "Python ${{ matrix.python-version }} Unit Tests" runs-on: ubuntu-20.04 @@ -42,7 +42,7 @@ jobs: - name: Test with nosetests run: | - if [[ ${{ matrix.python-version }} == 2.6 ]]; then + if [[ ${{ matrix.python-version }} == "2.6" ]]; then source /home/waagent/virtualenv/python2.6.9/bin/activate else source /home/waagent/virtualenv/python3.4.8/bin/activate @@ -86,22 +86,25 @@ jobs: fail-fast: false matrix: include: - - python-version: 3.5 + - python-version: "3.5" PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py" - - python-version: 3.6 + - python-version: "3.6" PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: 3.7 + - python-version: "3.7" PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: 3.8 + - python-version: "3.8" PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: 3.9 + - python-version: "3.9" PYLINTOPTS: "--rcfile=ci/3.6.pylintrc" additional-nose-opts: "--with-coverage --cover-erase --cover-inclusive --cover-branches --cover-package=azurelinuxagent" + - python-version: "3.10" + PYLINTOPTS: "--rcfile=ci/3.10.pylintrc --ignore=tests" + name: "Python ${{ matrix.python-version }} Unit Tests" runs-on: ubuntu-20.04 @@ -133,13 +136,13 @@ jobs: pylint $PYLINTOPTS --jobs=0 $PYLINTFILES - name: Test with nosetests - if: success() || (failure() && steps.install-dependencies.outcome == 'success') + if: matrix.python-version != '3.10' && (success() || (failure() && steps.install-dependencies.outcome == 'success')) run: | ./ci/nosetests.sh exit $? - name: Compile Coverage - if: matrix.python-version == 3.9 + if: matrix.python-version == '3.9' run: | echo looking for coverage files : ls -alh | grep -i coverage @@ -148,7 +151,7 @@ jobs: sudo env "PATH=$PATH" coverage report - name: Upload Coverage - if: matrix.python-version == 3.9 + if: matrix.python-version == '3.9' uses: codecov/codecov-action@v3 with: file: ./coverage.xml diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 2811e215ed..6d297976d4 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -131,7 +131,7 @@ def daemon(self): """ set_daemon_version(AGENT_VERSION) logger.set_prefix("Daemon") - threading.current_thread().setName("Daemon") + threading.current_thread().setName("Daemon") # pylint: disable=deprecated-method child_args = None \ if self.conf_file_path is None \ else "-configuration-path:{0}".format(self.conf_file_path) @@ -171,7 +171,7 @@ def run_exthandlers(self, debug=False): Run the update and extension handler """ logger.set_prefix("ExtHandler") - threading.current_thread().setName("ExtHandler") + threading.current_thread().setName("ExtHandler") # pylint: disable=deprecated-method # # Agents < 2.2.53 used to echo the log to the console. Since the extension handler could have been started by diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 6662285317..1470bb99f2 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -35,7 +35,7 @@ class ConfigurationProvider(object): """ def __init__(self): - self.values = dict() + self.values = {} def load(self, content): if not content: diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 435a95e270..d1d53a7237 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -282,7 +282,7 @@ def _encode_message(op, message): def _log_event(name, op, message, duration, is_success=True): - global _EVENT_MSG # pylint: disable=W0603 + global _EVENT_MSG # pylint: disable=W0602, W0603 if not is_success: logger.error(_EVENT_MSG, name, op, message, duration) @@ -604,7 +604,7 @@ def add_common_event_parameters(self, event, event_timestamp): TelemetryEventParam(CommonTelemetryEventSchema.OpcodeName, event_timestamp.strftime(logger.Logger.LogTimeFormatInUTC)), TelemetryEventParam(CommonTelemetryEventSchema.EventTid, threading.current_thread().ident), TelemetryEventParam(CommonTelemetryEventSchema.EventPid, os.getpid()), - TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName())] + TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName())] # pylint: disable=deprecated-method if event.eventId == TELEMETRY_EVENT_EVENT_ID and event.providerId == TELEMETRY_EVENT_PROVIDER_ID: # Currently only the GuestAgentExtensionEvents has these columns, the other tables dont have them so skipping diff --git a/azurelinuxagent/common/logger.py b/azurelinuxagent/common/logger.py index 3d0dc617d3..4246d9f6ec 100644 --- a/azurelinuxagent/common/logger.py +++ b/azurelinuxagent/common/logger.py @@ -137,7 +137,7 @@ def write_log(log_appender): # pylint: disable=W0612 msg = msg_format time = datetime.utcnow().strftime(Logger.LogTimeFormatInUTC) level_str = LogLevel.STRINGS[level] - thread_name = currentThread().getName() + thread_name = currentThread().getName() # pylint: disable=deprecated-method if self.prefix is not None: log_item = u"{0} {1} {2} {3} {4}\n".format(time, level_str, thread_name, self.prefix, msg) else: diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index e2f15afb56..c9bbd2cc5f 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -16,7 +16,7 @@ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error +from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, disable=deprecated-module import azurelinuxagent.common.logger as logger from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_CODE_NAME, DISTRO_VERSION, DISTRO_FULL_NAME diff --git a/azurelinuxagent/common/osutil/freebsd.py b/azurelinuxagent/common/osutil/freebsd.py index f8ee6db81b..ea3a83e1a6 100644 --- a/azurelinuxagent/common/osutil/freebsd.py +++ b/azurelinuxagent/common/osutil/freebsd.py @@ -150,7 +150,7 @@ def _get_netstat_rn_ipv4_routes(): route_header_line = output_lines.index("Internet:") + 1 # Parse the file structure and left justify the routes route_start_line = route_header_line + 1 - route_line_length = max([len(line) for line in output_lines[route_header_line:]]) + route_line_length = max(len(line) for line in output_lines[route_header_line:]) netstat_route_list = [line.ljust(route_line_length) for line in output_lines[route_start_line:]] # Parse the headers _route_headers = output_lines[route_header_line].split() diff --git a/azurelinuxagent/common/osutil/gaia.py b/azurelinuxagent/common/osutil/gaia.py index 849d5d1fa1..8a0f04b0d0 100644 --- a/azurelinuxagent/common/osutil/gaia.py +++ b/azurelinuxagent/common/osutil/gaia.py @@ -179,7 +179,7 @@ def _address_to_string(self, addr): return socket.inet_ntoa(struct.pack("!I", addr)) def _get_prefix(self, mask): - return str(sum([bin(int(x)).count('1') for x in mask.split('.')])) + return str(sum(bin(int(x)).count('1') for x in mask.split('.'))) def route_add(self, net, mask, gateway): logger.info('route_add {0} {1} {2}', net, mask, gateway) diff --git a/azurelinuxagent/common/osutil/openwrt.py b/azurelinuxagent/common/osutil/openwrt.py index d99f0321c5..81e352eb2e 100644 --- a/azurelinuxagent/common/osutil/openwrt.py +++ b/azurelinuxagent/common/osutil/openwrt.py @@ -25,15 +25,16 @@ from azurelinuxagent.common.osutil.default import DefaultOSUtil from azurelinuxagent.common.utils.networkutil import NetworkInterfaceCard -class OpenWRTOSUtil(DefaultOSUtil): +class OpenWRTOSUtil(DefaultOSUtil): def __init__(self): super(OpenWRTOSUtil, self).__init__() self.agent_conf_file_path = '/etc/waagent.conf' self.dhclient_name = 'udhcpc' - self.ip_command_output = re.compile('^\d+:\s+(\w+):\s+(.*)$') # pylint: disable=W1401 self.jit_enabled = True - + + _ip_command_output = re.compile(r'^\d+:\s+(\w+):\s+(.*)$') + def eject_dvd(self, chk_err=True): logger.warn('eject is not supported on OpenWRT') @@ -79,18 +80,18 @@ def get_nic_state(self, as_string=False): return {} for entry in output.splitlines(): - result = self.ip_command_output.match(entry) + result = OpenWRTOSUtil._ip_command_output.match(entry) if result: name = result.group(1) state[name] = NetworkInterfaceCard(name, result.group(2)) - self._update_nic_state(state, "ip -o -f inet address", NetworkInterfaceCard.add_ipv4, "an IPv4 address") self._update_nic_state(state, "ip -o -f inet6 address", NetworkInterfaceCard.add_ipv6, "an IPv6 address") return state - def _update_nic_state(self, state, ip_command, handler, description): + @staticmethod + def _update_nic_state(state, ip_command, handler, description): """ Update the state of NICs based on the output of a specified ip subcommand. @@ -104,7 +105,7 @@ def _update_nic_state(self, state, ip_command, handler, description): return for entry in output.splitlines(): - result = self.ip_command_output.match(entry) + result = OpenWRTOSUtil._ip_command_output.match(entry) if result: interface_name = result.group(1) if interface_name in state: diff --git a/azurelinuxagent/common/protocol/healthservice.py b/azurelinuxagent/common/protocol/healthservice.py index 3abe7299b9..e227cb9279 100644 --- a/azurelinuxagent/common/protocol/healthservice.py +++ b/azurelinuxagent/common/protocol/healthservice.py @@ -73,7 +73,7 @@ def __init__(self, endpoint): self.api = HealthService.API self.version = HealthService.VERSION self.source = HealthService.OBSERVER_NAME - self.observations = list() + self.observations = [] @property def as_json(self): diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index c93624cb1b..6ec5b1e23c 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -682,8 +682,8 @@ def _try_expand_zip_package(package_type, target_file, target_directory): if os.path.exists(target_directory): try: shutil.rmtree(target_directory) - except Exception as exception: - logger.warn("Cannot delete {0}: {1}", target_directory, ustr(exception)) + except Exception as rmtree_exception: + logger.warn("Cannot delete {0}: {1}", target_directory, ustr(rmtree_exception)) raise finally: try: @@ -886,11 +886,11 @@ def _call_hostplugin_with_container_check(self, host_func): message=msg, log_event=True) return ret - except (ResourceGoneError, InvalidContainerError) as error: + except (ResourceGoneError, InvalidContainerError) as host_error: msg = "[PERIODIC] Request failed using the host plugin channel after goal state refresh. " \ "ContainerId changed from {0} to {1}, role config file changed from {2} to {3}. " \ "Exception type: {4}.".format(old_container_id, new_container_id, old_role_config_name, - new_role_config_name, type(error).__name__) + new_role_config_name, type(host_error).__name__) add_periodic(delta=logger.EVERY_SIX_HOURS, name=AGENT_NAME, version=CURRENT_VERSION, diff --git a/azurelinuxagent/common/singletonperthread.py b/azurelinuxagent/common/singletonperthread.py index 0d9139012e..ee85dfd15e 100644 --- a/azurelinuxagent/common/singletonperthread.py +++ b/azurelinuxagent/common/singletonperthread.py @@ -8,7 +8,8 @@ class _SingletonPerThreadMetaClass(type): def __call__(cls, *args, **kwargs): with cls._lock: - obj_name = "%s__%s" % (cls.__name__, currentThread().getName()) # Object Name = className__threadName + # Object Name = className__threadName + obj_name = "%s__%s" % (cls.__name__, currentThread().getName()) # pylint: disable=deprecated-method if obj_name not in cls._instances: cls._instances[obj_name] = super(_SingletonPerThreadMetaClass, cls).__call__(*args, **kwargs) return cls._instances[obj_name] diff --git a/azurelinuxagent/common/utils/cryptutil.py b/azurelinuxagent/common/utils/cryptutil.py index b7c9422747..bed829ae67 100644 --- a/azurelinuxagent/common/utils/cryptutil.py +++ b/azurelinuxagent/common/utils/cryptutil.py @@ -132,7 +132,7 @@ def asn1_to_ssh(self, pubkey): keydata_base64 = base64.b64encode(bytebuffer(keydata)) return ustr(b"ssh-rsa " + keydata_base64 + b"\n", encoding='utf-8') - except ImportError as e: + except ImportError: raise CryptError("Failed to load pyasn1.codec.der") def num_to_bytes(self, num): diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py index c616da1ca3..ac3ac9347b 100644 --- a/azurelinuxagent/common/utils/flexible_version.py +++ b/azurelinuxagent/common/utils/flexible_version.py @@ -17,7 +17,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils import version # pylint: disable=no-name-in-module +from distutils import version # pylint: disable=no-name-in-module, disable=deprecated-module import re diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index e52fc15d0d..fa4dbe2027 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -28,7 +28,7 @@ from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.osutil import get_osutil, systemd +from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.version import get_distro from azurelinuxagent.common.utils import shellutil, fileutil from azurelinuxagent.ga.extensionprocessutil import handle_process_completion @@ -184,10 +184,6 @@ def initialize(self): _log_cgroup_info("systemd version: {0}", systemd.get_version()) - # This is temporarily disabled while we analyze telemetry. Likely it will be removed. - # self.__collect_azure_unit_telemetry() - # self.__collect_agent_unit_files_telemetry() - if not self.__check_no_legacy_cgroups(): return @@ -224,65 +220,6 @@ def initialize(self): finally: self._initialized = True - @staticmethod - def __collect_azure_unit_telemetry(): - azure_units = [] - - try: - units = shellutil.run_command(['systemctl', 'list-units', 'azure*', '-all']) - for line in units.split('\n'): - match = re.match(r'\s?(azure[^\s]*)\s?', line, re.IGNORECASE) - if match is not None: - azure_units.append((match.group(1), line)) - except shellutil.CommandError as command_error: - _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error)) - - for unit_name, unit_description in azure_units: - unit_slice = "Unknown" - try: - unit_slice = systemd.get_unit_property(unit_name, "Slice") - except Exception as exception: - _log_cgroup_warning("Failed to query Slice for {0}: {1}", unit_name, ustr(exception)) - - _log_cgroup_info("Found an Azure unit under slice {0}: {1}", unit_slice, unit_description) - - if len(azure_units) == 0: - try: - cgroups = shellutil.run_command('systemd-cgls') - for line in cgroups.split('\n'): - if re.match(r'[^\x00-\xff]+azure\.slice\s*', line, re.UNICODE): - logger.info(ustr("Found a cgroup for azure.slice\n{0}").format(cgroups)) - # Don't add the output of systemd-cgls to the telemetry, since currently it does not support Unicode - add_event(op=WALAEventOperation.CGroupsInfo, message="Found a cgroup for azure.slice") - except shellutil.CommandError as command_error: - _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error)) - - @staticmethod - def __collect_agent_unit_files_telemetry(): - agent_unit_files = [] - agent_service_name = get_osutil().get_service_name() - try: - fragment_path = systemd.get_unit_property(agent_service_name, "FragmentPath") - if fragment_path != systemd.get_agent_unit_file(): - agent_unit_files.append(fragment_path) - except Exception as exception: - _log_cgroup_warning("Failed to query the agent's FragmentPath: {0}", ustr(exception)) - - try: - drop_in_paths = systemd.get_unit_property(agent_service_name, "DropInPaths") - for path in drop_in_paths.split(): - agent_unit_files.append(path) - except Exception as exception: - _log_cgroup_warning("Failed to query the agent's DropInPaths: {0}", ustr(exception)) - - for unit_file in agent_unit_files: - try: - with open(unit_file, "r") as file_object: - _log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file, - file_object.read()) - except Exception as exception: - _log_cgroup_warning("Can't read {0}: {1}", unit_file, ustr(exception)) - def __check_no_legacy_cgroups(self): """ Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent. When running diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index 4987d865e9..b4b9cb4f63 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -116,8 +116,8 @@ def is_alive(self): def start(self): self.event_thread = threading.Thread(target=self.daemon) - self.event_thread.setDaemon(True) - self.event_thread.setName(self.get_thread_name()) + self.event_thread.setDaemon(True) # pylint: disable=deprecated-method + self.event_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method self.event_thread.start() def join(self): @@ -303,8 +303,8 @@ def is_alive(self): def start(self): self.event_thread = threading.Thread(target=self.daemon) - self.event_thread.setDaemon(True) - self.event_thread.setName(self.get_thread_name()) + self.event_thread.setDaemon(True) # pylint: disable=deprecated-method + self.event_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method self.event_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/collect_telemetry_events.py b/azurelinuxagent/ga/collect_telemetry_events.py index e0144a6399..9b0abcf0fb 100644 --- a/azurelinuxagent/ga/collect_telemetry_events.py +++ b/azurelinuxagent/ga/collect_telemetry_events.py @@ -499,7 +499,7 @@ def _trim_legacy_extension_event_parameters(event): :param event: Extension event to trim. :return: Trimmed extension event; containing only extension-specific parameters. """ - params_to_keep = dict().fromkeys([ + params_to_keep = dict.fromkeys([ GuestAgentExtensionEventsSchema.Name, GuestAgentExtensionEventsSchema.Version, GuestAgentExtensionEventsSchema.Operation, @@ -542,8 +542,8 @@ def is_alive(self): def start(self): self.thread = threading.Thread(target=self.daemon) - self.thread.setDaemon(True) - self.thread.setName(CollectTelemetryEventsHandler.get_thread_name()) + self.thread.setDaemon(True) # pylint: disable=deprecated-method + self.thread.setName(CollectTelemetryEventsHandler.get_thread_name()) # pylint: disable=deprecated-method self.thread.start() def stop(self): @@ -583,4 +583,4 @@ def daemon(self): @staticmethod def add_common_params_to_telemetry_event(event, event_time): reporter = get_event_logger() - reporter.add_common_event_parameters(event, event_time) \ No newline at end of file + reporter.add_common_event_parameters(event, event_time) diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 0e73e7d3ec..6480683e25 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -213,8 +213,8 @@ def is_alive(self): def start(self): self.env_thread = threading.Thread(target=self.daemon) - self.env_thread.setDaemon(True) - self.env_thread.setName(self.get_thread_name()) + self.env_thread.setDaemon(True) # pylint: disable=deprecated-method + self.env_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method self.env_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index fcb14d22b9..403579dfe1 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -27,7 +27,7 @@ import tempfile import time import zipfile -from distutils.version import LooseVersion +from distutils.version import LooseVersion # pylint: disable=deprecated-module from collections import defaultdict from functools import partial diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index 1c123d70e3..240c502353 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -281,8 +281,8 @@ def is_alive(self): def start(self): self.monitor_thread = threading.Thread(target=self.daemon) - self.monitor_thread.setDaemon(True) - self.monitor_thread.setName(self.get_thread_name()) + self.monitor_thread.setDaemon(True) # pylint: disable=deprecated-method + self.monitor_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method self.monitor_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/periodic_operation.py b/azurelinuxagent/ga/periodic_operation.py index 35bc6e6810..1414239c86 100644 --- a/azurelinuxagent/ga/periodic_operation.py +++ b/azurelinuxagent/ga/periodic_operation.py @@ -70,7 +70,7 @@ def sleep_until_next_operation(operations): Takes a list of operations, finds the operation that should be executed next (that with the closest next_run_time) and sleeps until it is time to execute that operation. """ - next_operation_time = min([op.next_run_time() for op in operations]) + next_operation_time = min(op.next_run_time() for op in operations) sleep_timedelta = next_operation_time - datetime.datetime.utcnow() # timedelta.total_seconds() is not available on Python 2.6, do the computation manually diff --git a/azurelinuxagent/ga/send_telemetry_events.py b/azurelinuxagent/ga/send_telemetry_events.py index 2923a43b13..a87308117c 100644 --- a/azurelinuxagent/ga/send_telemetry_events.py +++ b/azurelinuxagent/ga/send_telemetry_events.py @@ -70,8 +70,8 @@ def is_alive(self): def start(self): self._thread = threading.Thread(target=self._process_telemetry_thread) - self._thread.setDaemon(True) - self._thread.setName(self.get_thread_name()) + self._thread.setDaemon(True) # pylint: disable=deprecated-method + self._thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method self._thread.start() def stop(self): diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index a8d34f7c40..fa554b0d85 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -411,7 +411,7 @@ def run(self, debug=False): logger.warn(textutil.format_exception(error)) sys.exit(1) # additional return here because sys.exit is mocked in unit tests - return + return # pylint: disable=unreachable self._shutdown() sys.exit(0) diff --git a/azurelinuxagent/pa/deprovision/factory.py b/azurelinuxagent/pa/deprovision/factory.py index 2caedc8daa..24b2e5b897 100644 --- a/azurelinuxagent/pa/deprovision/factory.py +++ b/azurelinuxagent/pa/deprovision/factory.py @@ -15,7 +15,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error +from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, deprecated-module from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, DISTRO_FULL_NAME from .arch import ArchDeprovisionHandler diff --git a/azurelinuxagent/pa/rdma/factory.py b/azurelinuxagent/pa/rdma/factory.py index ec4a8bc48b..3e8d152c45 100644 --- a/azurelinuxagent/pa/rdma/factory.py +++ b/azurelinuxagent/pa/rdma/factory.py @@ -15,8 +15,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error - +from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, deprecated-module import azurelinuxagent.common.logger as logger from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_FULL_NAME, DISTRO_VERSION diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py index bcf971482e..e4541b600b 100644 --- a/azurelinuxagent/pa/rdma/suse.py +++ b/azurelinuxagent/pa/rdma/suse.py @@ -24,7 +24,8 @@ from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_VERSION -from distutils.version import LooseVersion as Version +from distutils.version import LooseVersion as Version # pylint: disable=deprecated-module + class SUSERDMAHandler(RDMAHandler): diff --git a/ci/3.10.pylintrc b/ci/3.10.pylintrc new file mode 100644 index 0000000000..43b8172c28 --- /dev/null +++ b/ci/3.10.pylintrc @@ -0,0 +1,40 @@ +[MESSAGES CONTROL] + +disable=C, # (C) convention, for programming standard violation + broad-except, # W0703: *Catching too general exception %s* + broad-exception-raised, # W0719: Raising too general exception: Exception + consider-using-dict-comprehension, # R1717: *Consider using a dictionary comprehension* + consider-using-from-import, # R0402: Use 'from foo import bar' instead + consider-using-in, # R1714: *Consider merging these comparisons with "in" to %r* + consider-using-set-comprehension, # R1718: *Consider using a set comprehension* + consider-using-with, # R1732: *Emitted if a resource-allocating assignment or call may be replaced by a 'with' block* + duplicate-code, # R0801: *Similar lines in %s files* + fixme, # Used when a warning note as FIXME or TODO is detected + logging-format-interpolation, # W1202: Use lazy % formatting in logging functions + logging-fstring-interpolation, # W1203: Use lazy % or .format() formatting in logging functions + no-else-break, # R1723: *Unnecessary "%s" after "break"* + no-else-continue, # R1724: *Unnecessary "%s" after "continue"* + no-else-raise, # R1720: *Unnecessary "%s" after "raise"* + no-else-return, # R1705: *Unnecessary "%s" after "return"* + no-self-use, # R0201: Method could be a function + protected-access, # W0212: Access to a protected member of a client class + raise-missing-from, # W0707: *Consider explicitly re-raising using the 'from' keyword* + redundant-u-string-prefix, # The u prefix for strings is no longer necessary in Python >=3.0 + simplifiable-if-expression, # R1719: *The if expression can be replaced with %s* + simplifiable-if-statement, # R1703: *The if statement can be replaced with %s* + super-with-arguments, # R1725: *Consider using Python 3 style super) without arguments* + too-few-public-methods, # R0903: *Too few public methods %s/%s)* + too-many-ancestors, # R0901: *Too many ancestors %s/%s)* + too-many-arguments, # R0913: *Too many arguments %s/%s)* + too-many-boolean-expressions, # R0916: *Too many boolean expressions in if statement %s/%s)* + too-many-branches, # R0912: *Too many branches %s/%s)* + too-many-instance-attributes, # R0902: *Too many instance attributes %s/%s)* + too-many-locals, # R0914: *Too many local variables %s/%s)* + too-many-nested-blocks, # R1702: *Too many nested blocks %s/%s)* + too-many-public-methods, # R0904: *Too many public methods %s/%s)* + too-many-return-statements, # R0911: *Too many return statements %s/%s)* + too-many-statements, # R0915: *Too many statements %s/%s)* + unspecified-encoding, # W1514: Using open without explicitly specifying an encoding + use-a-generator, # R1729: *Use a generator instead '%s%s)'* + useless-object-inheritance, # R0205: *Class %r inherits from object, can be safely removed from bases in python3* + useless-return, # R1711: *Useless return at end of function or method* diff --git a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py index b5a377e726..3f25c6a6bc 100644 --- a/tests_e2e/tests/agent_ext_workflow/extension_workflow.py +++ b/tests_e2e/tests/agent_ext_workflow/extension_workflow.py @@ -114,7 +114,7 @@ def assert_instance_view(self, data=None): def assert_data_in_instance_view(self, instance_view: VirtualMachineExtensionInstanceView): log.info("Asserting extension status ...") status_message = instance_view.statuses[0].message - log.info("Status message: %s" % status_message) + log.info("Status message: %s", status_message) with soft_assertions(): expected_ext_version = "%s-%s" % (self.name, self.version) diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 89c186a2f1..86ff7b5e96 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -197,7 +197,7 @@ def _request_rsm_update(self, requested_version: str) -> None: } log.info("Attempting rsm upgrade post request to endpoint: {0} with data: {1}".format(url, data)) - response = requests.post(url, data=json.dumps(data), headers=headers) + response = requests.post(url, data=json.dumps(data), headers=headers, timeout=300) if response.status_code == 202: log.info("RSM upgrade request accepted") else: diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index 528502c94a..b2b3b9a705 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -70,7 +70,7 @@ def __init__(self, context: AgentVmTestContext): @staticmethod def _get_dependency_map(extensions: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: - dependency_map: Dict[str, Dict[str, Any]] = dict() + dependency_map: Dict[str, Dict[str, Any]] = {} for ext in extensions: ext_name = ext['name'] @@ -115,7 +115,7 @@ def _get_sorted_extension_names(extensions: List[VirtualMachineScaleSetVMExtensi @staticmethod def _validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], sorted_extension_names: List[str], relax_check: bool): - installed_ext = dict() + installed_ext = {} # Iterate through the extensions in the enabled order and validate if their depending extensions are already # enabled prior to that. @@ -154,7 +154,7 @@ def _validate_extension_sequencing(dependency_map: Dict[str, Dict[str, Any]], so def run(self): instances_ip_address: List[VmssInstanceIpAddress] = self._context.vmss.get_instances_ip_address() - ssh_clients: Dict[str, SshClient] = dict() + ssh_clients: Dict[str, SshClient] = {} for instance in instances_ip_address: ssh_clients[instance.instance_name] = SshClient(ip_address=instance.ip_address, username=self._context.username, identity_file=self._context.identity_file) diff --git a/tests_e2e/tests/lib/retry.py b/tests_e2e/tests/lib/retry.py index db0a52fcf2..9c045ae74d 100644 --- a/tests_e2e/tests/lib/retry.py +++ b/tests_e2e/tests/lib/retry.py @@ -22,7 +22,8 @@ from tests_e2e.tests.lib.shell import CommandError -def execute_with_retry(operation: Callable[[], Any]) -> Any: +# R1710: Either all return statements in a function should return an expression, or none of them should. (inconsistent-return-statements) +def execute_with_retry(operation: Callable[[], Any]) -> Any: # pylint: disable=inconsistent-return-statements """ Some Azure errors (e.g. throttling) are retryable; this method attempts the given operation retrying a few times (after a short delay) if the error includes the string "RetryableError" @@ -79,7 +80,8 @@ def retry_if_false(operation: Callable[[], bool], attempts: int = 5, delay: int return success -def retry(operation: Callable[[], Any], attempts: int = 5, delay: int = 30) -> Any: +# R1710: Either all return statements in a function should return an expression, or none of them should. (inconsistent-return-statements) +def retry(operation: Callable[[], Any], attempts: int = 5, delay: int = 30) -> Any: # pylint: disable=inconsistent-return-statements """ This method attempts the given operation retrying a few times on exceptions. Returns the value returned by the operation. """ diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index 45a7be85f5..de31708965 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -105,7 +105,7 @@ def check_agent_reports_status(self): self._context.username, self._vm_password)) - def retry_ssh_if_connection_reset(self, command: str, use_sudo=False): + def retry_ssh_if_connection_reset(self, command: str, use_sudo=False): # pylint: disable=inconsistent-return-statements # The agent may bring the network down and back up to publish the hostname, which can reset the ssh connection. # Adding retry here for connection reset. retries = 3 diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py index ba0f5abb23..c263f4ec34 100755 --- a/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py +++ b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py @@ -34,7 +34,7 @@ def __init__(self): self._stopped = False def run(self): - threading.current_thread().setName("*Stress*") + threading.current_thread().setName("*Stress*") # pylint: disable=deprecated-method while not self._stopped: try: @@ -47,15 +47,15 @@ def run(self): while i < 30 and not self._stopped: time.sleep(1) i += 1 - except Exception as exception: - logger.error("{0}:\n{1}", exception, traceback.format_exc()) + except Exception as run_exception: + logger.error("{0}:\n{1}", run_exception, traceback.format_exc()) def stop(self): self._stopped = True try: - threading.current_thread().setName("*StartService*") + threading.current_thread().setName("*StartService*") # pylint: disable=deprecated-method logger.set_prefix("E2ETest") logger.add_logger_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, "/var/log/waagent.log") diff --git a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py index 2d165bc175..b19e5b6c1e 100755 --- a/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py +++ b/tests_e2e/tests/scripts/agent_firewall-verify_all_firewall_rules.py @@ -91,7 +91,8 @@ def delete_iptable_rules(commands: List[List[str]] = None) -> None: cmd = None for command in commands: cmd = command - retry(lambda: execute_cmd(cmd=cmd), attempts=3) + # W0640: Cell variable cmd defined in loop (cell-var-from-loop) + retry(lambda: execute_cmd(cmd=cmd), attempts=3) # pylint: disable=W0640 except Exception as e: raise Exception("Error -- Failed to Delete the ip table rule set {0}".format(e)) diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py index 5cec654a16..f0c639b2ef 100755 --- a/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py +++ b/tests_e2e/tests/scripts/agent_persist_firewall-verify_firewalld_rules_readded.py @@ -43,7 +43,8 @@ def delete_firewalld_rules(commands=None): cmd = None for command in commands: cmd = command - retry(lambda: execute_cmd(cmd=cmd), attempts=3) + # W0640: Cell variable cmd defined in loop (cell-var-from-loop) + retry(lambda: execute_cmd(cmd=cmd), attempts=3) # pylint: disable=W0640 except Exception as e: raise Exception("Error -- Failed to Delete the firewalld rule set {0}".format(e)) From 43edc9666653cc492b0d029fecc977dc8fc84aaa Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:04:13 -0800 Subject: [PATCH 157/240] reset network service unit file if python version changes (#3058) --- azurelinuxagent/ga/persist_firewall_rules.py | 38 +++++++++++++++----- tests/ga/test_persist_firewall_rules.py | 15 ++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/azurelinuxagent/ga/persist_firewall_rules.py b/azurelinuxagent/ga/persist_firewall_rules.py index 74b878ce57..a20e2874aa 100644 --- a/azurelinuxagent/ga/persist_firewall_rules.py +++ b/azurelinuxagent/ga/persist_firewall_rules.py @@ -34,7 +34,8 @@ class PersistFirewallRulesHandler(object): # This unit file (Version={version}) was created by the Azure VM Agent. # Do not edit. [Unit] -Description=Setup network rules for WALinuxAgent +Description=Setup network rules for WALinuxAgent +After=local-fs.target Before=network-pre.target Wants=network-pre.target DefaultDependencies=no @@ -69,7 +70,7 @@ class PersistFirewallRulesHandler(object): # The current version of the unit file; Update it whenever the unit file is modified to ensure Agent can dynamically # modify the unit file on VM too - _UNIT_VERSION = "1.3" + _UNIT_VERSION = "1.4" @staticmethod def get_service_file_path(): @@ -184,7 +185,7 @@ def _setup_network_setup_service(self): self.__setup_binary_file() network_service_enabled = self.__verify_network_setup_service_enabled() - if network_service_enabled and not self.__unit_file_version_modified(): + if network_service_enabled and not self.__should_update_unit_file(): logger.info("Service: {0} already enabled. No change needed.".format(self._network_setup_service_name)) self.__log_network_setup_service_logs() @@ -314,17 +315,29 @@ def __get_unit_file_version(self): return match.group(1).strip() - def __unit_file_version_modified(self): + def __get_unit_exec_start(self): + if not os.path.exists(self.get_service_file_path()): + raise OSError("{0} not found".format(self.get_service_file_path())) + + match = fileutil.findre_in_file(self.get_service_file_path(), + line_re="ExecStart=(.*)") + if match is None: + raise ValueError("ExecStart tag not found in the unit file") + + return match.group(1).strip() + + def __should_update_unit_file(self): """ - Check if the unit file version changed from the expected version - :return: True if unit file version changed else False + Check if the unit file version changed from the expected version or if the exec-start changed from the expected exec-start + :return: True if unit file need update else False """ try: unit_file_version = self.__get_unit_file_version() + unit_exec_start = self.__get_unit_exec_start() except Exception as error: - logger.info("Unable to determine version of unit file: {0}, overwriting unit file".format(ustr(error))) - # Since we can't determine the version, marking the file as modified to overwrite the unit file + logger.info("Unable to read content of unit file: {0}, overwriting unit file".format(ustr(error))) + # Since we can't determine the version or exec start, marking the file as modified to overwrite the unit file return True if unit_file_version != self._UNIT_VERSION: @@ -332,7 +345,14 @@ def __unit_file_version_modified(self): "Unit file version: {0} does not match with expected version: {1}, overwriting unit file".format( unit_file_version, self._UNIT_VERSION)) return True + binary_path = os.path.join(conf.get_lib_dir(), self.BINARY_FILE_NAME) + expected_exec_start = "{0} {1}".format(sys.executable, binary_path) + if unit_exec_start != expected_exec_start: + logger.info( + "Unit file exec-start: {0} does not match with expected exec-start: {1}, overwriting unit file".format( + unit_exec_start, expected_exec_start)) + return True logger.info( - "Unit file version matches with expected version: {0}, not overwriting unit file".format(unit_file_version)) + "Unit file matches with expected version: {0} and exec start: {1}, not overwriting unit file".format(unit_file_version, unit_exec_start)) return False diff --git a/tests/ga/test_persist_firewall_rules.py b/tests/ga/test_persist_firewall_rules.py index 5ee397baf3..adcf43b752 100644 --- a/tests/ga/test_persist_firewall_rules.py +++ b/tests/ga/test_persist_firewall_rules.py @@ -414,3 +414,18 @@ def test_it_should_reset_service_unit_files_if_version_changed(self): mock_popen=self.__mock_network_setup_service_enabled) self.assertNotIn(test_ver, fileutil.read_file(handler.get_service_file_path()), "Test version found incorrectly") + + def test_it_should_reset_service_unit_file_if_python_version_changes(self): + with self._get_persist_firewall_rules_handler() as handler: + # 1st step - Setup the service with some python Version + python_ver = "test_python" + with patch("sys.executable", python_ver): + self.__setup_and_assert_network_service_setup_scenario(handler) + self.assertIn(python_ver, fileutil.read_file(handler.get_service_file_path()), "Python version not found") + + # 2nd step - Re-run the setup and ensure the service file set up again even if service enabled + self.__executed_commands = [] + self.__setup_and_assert_network_service_setup_scenario(handler, + mock_popen=self.__mock_network_setup_service_enabled) + self.assertNotIn(python_ver, fileutil.read_file(handler.get_service_file_path()), + "Python version found incorrectly") From 0aabddd0bfbe2362114cfb48bbba3fef9ad87bd6 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 20 Feb 2024 10:53:55 -0800 Subject: [PATCH 158/240] Ignore network unreachable errors in publish hostname (#3060) --- .../tests/publish_hostname/publish_hostname.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index de31708965..6ec97ae2e7 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -26,6 +26,7 @@ import datetime import re +from typing import List, Dict, Any from assertpy import fail from time import sleep @@ -204,6 +205,19 @@ def run(self): self.check_agent_reports_status() raise + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + ignore_rules = [ + # + # We may see temporary network unreachable warnings since we are bringing the network interface down + # 2024-02-16T09:27:14.114569Z WARNING MonitorHandler ExtHandler Error in SendHostPluginHeartbeat: [HttpError] [HTTP Failed] GET http://168.63.129.16:32526/health -- IOError [Errno 101] Network is unreachable -- 1 attempts made --- [NOTE: Will not log the same error for the next hour] + # + { + 'message': r"SendHostPluginHeartbeat:.*GET http://168.63.129.16:32526/health -- IOError [Errno 101] Network is unreachable", + 'if': lambda r: r.level == "WARNING" + } + ] + return ignore_rules + if __name__ == "__main__": PublishHostname.run_from_command_line() From b1b0545a1a907aa379dbda414579294a4ad36104 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 20 Feb 2024 11:03:09 -0800 Subject: [PATCH 159/240] Address pylint warning deprecated-method (#3059) Co-authored-by: narrieta --- azurelinuxagent/agent.py | 4 ++-- azurelinuxagent/common/event.py | 2 +- azurelinuxagent/common/logger.py | 4 ++-- azurelinuxagent/common/singletonperthread.py | 4 ++-- azurelinuxagent/ga/collect_logs.py | 8 ++++---- azurelinuxagent/ga/collect_telemetry_events.py | 4 ++-- azurelinuxagent/ga/env.py | 4 ++-- azurelinuxagent/ga/monitor.py | 4 ++-- azurelinuxagent/ga/send_telemetry_events.py | 4 ++-- tests/ga/test_send_telemetry_events.py | 2 +- tests_e2e/tests/scripts/agent_cpu_quota-start_service.py | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 6d297976d4..0dae70f195 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -131,7 +131,7 @@ def daemon(self): """ set_daemon_version(AGENT_VERSION) logger.set_prefix("Daemon") - threading.current_thread().setName("Daemon") # pylint: disable=deprecated-method + threading.current_thread().name = "Daemon" child_args = None \ if self.conf_file_path is None \ else "-configuration-path:{0}".format(self.conf_file_path) @@ -171,7 +171,7 @@ def run_exthandlers(self, debug=False): Run the update and extension handler """ logger.set_prefix("ExtHandler") - threading.current_thread().setName("ExtHandler") # pylint: disable=deprecated-method + threading.current_thread().name = "ExtHandler" # # Agents < 2.2.53 used to echo the log to the console. Since the extension handler could have been started by diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index d1d53a7237..dba95dd9de 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -604,7 +604,7 @@ def add_common_event_parameters(self, event, event_timestamp): TelemetryEventParam(CommonTelemetryEventSchema.OpcodeName, event_timestamp.strftime(logger.Logger.LogTimeFormatInUTC)), TelemetryEventParam(CommonTelemetryEventSchema.EventTid, threading.current_thread().ident), TelemetryEventParam(CommonTelemetryEventSchema.EventPid, os.getpid()), - TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().getName())] # pylint: disable=deprecated-method + TelemetryEventParam(CommonTelemetryEventSchema.TaskName, threading.current_thread().name)] if event.eventId == TELEMETRY_EVENT_EVENT_ID and event.providerId == TELEMETRY_EVENT_PROVIDER_ID: # Currently only the GuestAgentExtensionEvents has these columns, the other tables dont have them so skipping diff --git a/azurelinuxagent/common/logger.py b/azurelinuxagent/common/logger.py index 4246d9f6ec..3506a649ad 100644 --- a/azurelinuxagent/common/logger.py +++ b/azurelinuxagent/common/logger.py @@ -19,7 +19,7 @@ """ import sys from datetime import datetime, timedelta -from threading import currentThread +from threading import current_thread from azurelinuxagent.common.future import ustr @@ -137,7 +137,7 @@ def write_log(log_appender): # pylint: disable=W0612 msg = msg_format time = datetime.utcnow().strftime(Logger.LogTimeFormatInUTC) level_str = LogLevel.STRINGS[level] - thread_name = currentThread().getName() # pylint: disable=deprecated-method + thread_name = current_thread().name if self.prefix is not None: log_item = u"{0} {1} {2} {3} {4}\n".format(time, level_str, thread_name, self.prefix, msg) else: diff --git a/azurelinuxagent/common/singletonperthread.py b/azurelinuxagent/common/singletonperthread.py index ee85dfd15e..c7bcda803d 100644 --- a/azurelinuxagent/common/singletonperthread.py +++ b/azurelinuxagent/common/singletonperthread.py @@ -1,4 +1,4 @@ -from threading import Lock, currentThread +from threading import Lock, current_thread class _SingletonPerThreadMetaClass(type): @@ -9,7 +9,7 @@ class _SingletonPerThreadMetaClass(type): def __call__(cls, *args, **kwargs): with cls._lock: # Object Name = className__threadName - obj_name = "%s__%s" % (cls.__name__, currentThread().getName()) # pylint: disable=deprecated-method + obj_name = "%s__%s" % (cls.__name__, current_thread().name) if obj_name not in cls._instances: cls._instances[obj_name] = super(_SingletonPerThreadMetaClass, cls).__call__(*args, **kwargs) return cls._instances[obj_name] diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index b4b9cb4f63..d82933e963 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -116,8 +116,8 @@ def is_alive(self): def start(self): self.event_thread = threading.Thread(target=self.daemon) - self.event_thread.setDaemon(True) # pylint: disable=deprecated-method - self.event_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method + self.event_thread.daemon = True + self.event_thread.name = self.get_thread_name() self.event_thread.start() def join(self): @@ -303,8 +303,8 @@ def is_alive(self): def start(self): self.event_thread = threading.Thread(target=self.daemon) - self.event_thread.setDaemon(True) # pylint: disable=deprecated-method - self.event_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method + self.event_thread.daemon = True + self.event_thread.name = self.get_thread_name() self.event_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/collect_telemetry_events.py b/azurelinuxagent/ga/collect_telemetry_events.py index 9b0abcf0fb..05f18c60d4 100644 --- a/azurelinuxagent/ga/collect_telemetry_events.py +++ b/azurelinuxagent/ga/collect_telemetry_events.py @@ -542,8 +542,8 @@ def is_alive(self): def start(self): self.thread = threading.Thread(target=self.daemon) - self.thread.setDaemon(True) # pylint: disable=deprecated-method - self.thread.setName(CollectTelemetryEventsHandler.get_thread_name()) # pylint: disable=deprecated-method + self.thread.daemon = True + self.thread.name = CollectTelemetryEventsHandler.get_thread_name() self.thread.start() def stop(self): diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 6480683e25..6b1dd451b8 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -213,8 +213,8 @@ def is_alive(self): def start(self): self.env_thread = threading.Thread(target=self.daemon) - self.env_thread.setDaemon(True) # pylint: disable=deprecated-method - self.env_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method + self.env_thread.daemon = True + self.env_thread.name = self.get_thread_name() self.env_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index 240c502353..a5ff29aa01 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -281,8 +281,8 @@ def is_alive(self): def start(self): self.monitor_thread = threading.Thread(target=self.daemon) - self.monitor_thread.setDaemon(True) # pylint: disable=deprecated-method - self.monitor_thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method + self.monitor_thread.daemon = True + self.monitor_thread.name = self.get_thread_name() self.monitor_thread.start() def daemon(self): diff --git a/azurelinuxagent/ga/send_telemetry_events.py b/azurelinuxagent/ga/send_telemetry_events.py index a87308117c..08d26eef31 100644 --- a/azurelinuxagent/ga/send_telemetry_events.py +++ b/azurelinuxagent/ga/send_telemetry_events.py @@ -70,8 +70,8 @@ def is_alive(self): def start(self): self._thread = threading.Thread(target=self._process_telemetry_thread) - self._thread.setDaemon(True) # pylint: disable=deprecated-method - self._thread.setName(self.get_thread_name()) # pylint: disable=deprecated-method + self._thread.daemon = True + self._thread.name = self.get_thread_name() self._thread.start() def stop(self): diff --git a/tests/ga/test_send_telemetry_events.py b/tests/ga/test_send_telemetry_events.py index a9c87dde9a..1d15b4ff7f 100644 --- a/tests/ga/test_send_telemetry_events.py +++ b/tests/ga/test_send_telemetry_events.py @@ -340,7 +340,7 @@ def test_it_should_enqueue_and_send_events_properly(self, mock_lib_dir, *_): with patch("os.path.getmtime", return_value=test_mtime): with patch('os.getpid', return_value=test_eventpid): with patch("threading.Thread.ident", new_callable=PropertyMock(return_value=test_eventtid)): - with patch("threading.Thread.getName", return_value=test_taskname): + with patch("threading.Thread.name", new_callable=PropertyMock(return_value=test_taskname)): monitor_handler.run() TestSendTelemetryEventsHandler._stop_handler(telemetry_handler) diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py index c263f4ec34..d595a66133 100755 --- a/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py +++ b/tests_e2e/tests/scripts/agent_cpu_quota-start_service.py @@ -34,7 +34,7 @@ def __init__(self): self._stopped = False def run(self): - threading.current_thread().setName("*Stress*") # pylint: disable=deprecated-method + threading.current_thread().name = "*Stress*" while not self._stopped: try: @@ -55,7 +55,7 @@ def stop(self): try: - threading.current_thread().setName("*StartService*") # pylint: disable=deprecated-method + threading.current_thread().name = "*StartService*" logger.set_prefix("E2ETest") logger.add_logger_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, "/var/log/waagent.log") From d8ebb487972d05def86c0474cc52cdcee43f22d1 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:01:37 -0800 Subject: [PATCH 160/240] fix agent update UT (#3051) (#3054) (cherry picked from commit d9f7ed49a6e45c57e0722579125272dc77871ba3) --- tests/ga/test_agent_update_handler.py | 1 + tests/ga/test_update.py | 2 +- tests/lib/wire_protocol_data.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index 82fcd362df..c6e41469f3 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -494,6 +494,7 @@ def test_it_should_continue_with_update_if_number_of_update_attempts_less_than_3 with self.assertRaises(AgentUpgradeExitException): agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family( str(latest_version)) + agent_update_handler._protocol.mock_wire_data.set_version_in_ga_manifest(str(latest_version)) agent_update_handler._protocol.mock_wire_data.set_incarnation(i+2) agent_update_handler._protocol.client.update_goal_state() agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 58732de5c4..6caa21f3c8 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1849,7 +1849,7 @@ def reload_conf(url, protocol): data_file['ga_manifest'] = "wire/ga_manifest_no_upgrade.xml" # Setting the prod frequency to mimic a real scenario with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf, autoupdate_frequency=6000) as (update_handler, mock_telemetry): - update_handler._protocol.mock_wire_data.set_ga_manifest_version_version(str(CURRENT_VERSION)) + update_handler._protocol.mock_wire_data.set_version_in_ga_manifest(str(CURRENT_VERSION)) update_handler._protocol.mock_wire_data.set_incarnation(20) update_handler.run(debug=True) diff --git a/tests/lib/wire_protocol_data.py b/tests/lib/wire_protocol_data.py index 9502a64133..6854bdcc59 100644 --- a/tests/lib/wire_protocol_data.py +++ b/tests/lib/wire_protocol_data.py @@ -469,5 +469,5 @@ def set_version_in_agent_family(self, version): def set_extension_config_is_vm_enabled_for_rsm_upgrades(self, is_vm_enabled_for_rsm_upgrades): self.ext_conf = WireProtocolData.replace_xml_element_value(self.ext_conf, "IsVMEnabledForRSMUpgrades", is_vm_enabled_for_rsm_upgrades) - def set_ga_manifest_version_version(self, version): + def set_version_in_ga_manifest(self, version): self.ga_manifest = WireProtocolData.replace_xml_element_value(self.ga_manifest, "Version", version) From 99aca99867b36e86c69c9ba81e49534c256f5b74 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:44:03 -0800 Subject: [PATCH 161/240] modify agent update flag (#3053) (#3055) (cherry picked from commit 049de5c7b16e15679f0bcc34767b1ee96795cfa8) --- tests_e2e/tests/agent_publish/agent_publish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index 617d25271c..0cf51c3311 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -62,7 +62,7 @@ def _get_agent_info(self) -> None: def _prepare_agent(self) -> None: log.info("Modifying agent update related config flags and renaming the log file") - self._run_remote_test(self._ssh_client, "sh -c 'agent-service stop && mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log && update-waagent-conf Debug.DownloadNewAgents=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y'", use_sudo=True) + self._run_remote_test(self._ssh_client, "sh -c 'agent-service stop && mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log && update-waagent-conf AutoUpdate.UpdateToLatestVersion=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y'", use_sudo=True) log.info('Renamed log file and updated agent-update DownloadNewAgents GAFamily config flags') def _check_update(self) -> None: From 2e83971a95e2390f66b943da1f321af6532c806c Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:36:55 -0800 Subject: [PATCH 162/240] skip run on flatcar (#3061) --- tests_e2e/test_suites/agent_firewall.yml | 7 ++++++- tests_e2e/test_suites/agent_update.yml | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests_e2e/test_suites/agent_firewall.yml b/tests_e2e/test_suites/agent_firewall.yml index 0e095ba39e..4697265709 100644 --- a/tests_e2e/test_suites/agent_firewall.yml +++ b/tests_e2e/test_suites/agent_firewall.yml @@ -12,4 +12,9 @@ tests: images: - "endorsed" - "endorsed-arm64" -owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. \ No newline at end of file +owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. +# Systemctl stop functionality changed and service doing restart on stop cmd in flatcar images. Thus breaking our tests. +# TODO: Enable once it is fixed +skip_on_images: + - "flatcar" + - "flatcar_arm64" \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 3d3d4918f6..5c8736a4ea 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -12,4 +12,9 @@ locations: "AzureCloud:eastus2euap" owns_vm: true skip_on_clouds: - "AzureChinaCloud" - - "AzureUSGovernment" \ No newline at end of file + - "AzureUSGovernment" +# Systemctl stop functionality changed and service doing restart on stop cmd in flatcar images. Thus breaking our tests. +# TODO: Enable once it is fixed +skip_on_images: + - "flatcar" + - "flatcar_arm64" \ No newline at end of file From f27b7b23b96b785d59aba4d3b23e26c30efb0519 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 22 Feb 2024 14:23:55 -0800 Subject: [PATCH 163/240] retry on agent cgroups tracking check (#3062) * retry on agentcgroups check * address comments --- .../agent_cgroups-check_cgroups_agent.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py index 2f3b877a0b..064f304007 100755 --- a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py +++ b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py @@ -27,6 +27,7 @@ verify_agent_cgroup_assigned_correctly from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import run_remote_test +from tests_e2e.tests.lib.retry import retry_if_false def verify_if_cgroup_controllers_are_mounted(): @@ -90,14 +91,21 @@ def verify_agent_cgroups_tracked(): tracking_agent_cgroup_message_re = r'Started tracking cgroup [^\s]+\s+\[(?P[^\s]+)\]' tracked_cgroups = [] - for record in AgentLog().read(): - match = re.search(tracking_agent_cgroup_message_re, record.message) - if match is not None: - tracked_cgroups.append(match.group('path')) - - for controller in AGENT_CONTROLLERS: - if not any(AGENT_SERVICE_NAME in cgroup_path and controller in cgroup_path for cgroup_path in tracked_cgroups): - fail('Agent {0} is not being tracked. Tracked cgroups:{1}'.format(controller, tracked_cgroups)) + def is_agent_tracking_cgroup(): + tracked_cgroups.clear() + for record in AgentLog().read(): + match = re.search(tracking_agent_cgroup_message_re, record.message) + if match is not None: + tracked_cgroups.append(match.group('path')) + + for controller in AGENT_CONTROLLERS: + if not any(AGENT_SERVICE_NAME in cgroup_path and controller in cgroup_path for cgroup_path in tracked_cgroups): + return False + return True + # Test check can happen before agent starts tracking cgroups. So, retrying the check for few times + found = retry_if_false(is_agent_tracking_cgroup) + if not found: + fail('Agent {0} is not being tracked. Tracked cgroups:{1}'.format(AGENT_CONTROLLERS, tracked_cgroups)) log.info("Agent is tracking cgroups correctly.\n%s", tracked_cgroups) From df7be2d22192de5f00fcc4595fac36d954ac861b Mon Sep 17 00:00:00 2001 From: Robert Schweikert Date: Thu, 22 Feb 2024 17:39:41 -0500 Subject: [PATCH 164/240] Recognize SLE-Micro as a SLE based distribution (#3048) Using the agent with SLE-Micro forces the agent to fallback to the common default implementation for nominally distribution specific behavior. This misses the SUSE specific implementations. Co-authored-by: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> --- azurelinuxagent/common/osutil/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index c9bbd2cc5f..0a29029c78 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -89,7 +89,7 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) if distro_name in ("flatcar", "coreos") or distro_code_name in ("flatcar", "coreos"): return CoreOSUtil() - if distro_name in ("suse", "sle_hpc", "sles", "opensuse"): + if distro_name in ("suse", "sle-micro", "sle_hpc", "sles", "opensuse"): if distro_full_name == 'SUSE Linux Enterprise Server' \ and Version(distro_version) < Version('12') \ or distro_full_name == 'openSUSE' and Version(distro_version) < Version('13.2'): From 591b5492ba180482a7df5e35bb75d7bd42687af1 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:12:14 -0800 Subject: [PATCH 165/240] Retry ssh check if connection reset (#3065) --- tests_e2e/orchestrator/lib/agent_test_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index f432c2d4c1..022bfe8304 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -491,7 +491,7 @@ def _check_ssh_connectivity(ssh_client: SshClient) -> None: break except CommandError as error: # Check for "System is booting up. Unprivileged users are not permitted to log in yet. Please come back later. For technical details, see pam_nologin(8)." - if not any(m in error.stderr for m in ["Unprivileged users are not permitted to log in yet", "Permission denied"]): + if not any(m in error.stderr for m in ["Unprivileged users are not permitted to log in yet", "Permission denied", "Connection reset by peer"]): raise if attempt >= max_attempts - 1: raise Exception(f"SSH connectivity check failed after {max_attempts} attempts, giving up [{error}]") From dd6be325d2a31ed9450f8e90d5753e2d73346b2c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 22 Feb 2024 15:41:16 -0800 Subject: [PATCH 166/240] Add distutils/version.py to azurelinuxagent (#3063) * Add distutils/version.py to azurelinuxagent --------- Co-authored-by: narrieta --- azurelinuxagent/common/future.py | 11 +- azurelinuxagent/common/osutil/factory.py | 2 +- .../common/utils/flexible_version.py | 6 +- azurelinuxagent/distutils/__init__.py | 0 azurelinuxagent/distutils/version.py | 236 ++++++++++++++++++ azurelinuxagent/ga/exthandlers.py | 4 +- azurelinuxagent/pa/deprovision/factory.py | 2 +- azurelinuxagent/pa/rdma/factory.py | 2 +- azurelinuxagent/pa/rdma/suse.py | 2 +- tests/common/utils/test_flexible_version.py | 2 +- tests/common/utils/test_text_util.py | 2 +- 11 files changed, 257 insertions(+), 12 deletions(-) create mode 100644 azurelinuxagent/distutils/__init__.py create mode 100644 azurelinuxagent/distutils/version.py diff --git a/azurelinuxagent/common/future.py b/azurelinuxagent/common/future.py index be28ba9d88..e9bb7e9339 100644 --- a/azurelinuxagent/common/future.py +++ b/azurelinuxagent/common/future.py @@ -61,7 +61,6 @@ range = xrange int = long - if sys.version_info[1] >= 7: from collections import OrderedDict # For Py 2.7+ else: @@ -69,6 +68,16 @@ else: raise ImportError("Unknown python version: {0}".format(sys.version_info)) +# +# distutils has been removed from Python >= 3.12; use the copy from azurelinuxagent instead +# +if sys.version_info[0] == 3 and sys.version_info[1] >= 12: + from azurelinuxagent.distutils import version +else: + from distutils import version # pylint: disable=deprecated-module +Version = version.Version +LooseVersion = version.LooseVersion + def get_linux_distribution(get_full_name, supported_dists): """Abstract platform.linux_distribution() call which is deprecated as of diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 0a29029c78..25a6060ada 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -16,7 +16,7 @@ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, disable=deprecated-module +from azurelinuxagent.common.future import LooseVersion as Version import azurelinuxagent.common.logger as logger from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_CODE_NAME, DISTRO_VERSION, DISTRO_FULL_NAME diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py index ac3ac9347b..633fe771a1 100644 --- a/azurelinuxagent/common/utils/flexible_version.py +++ b/azurelinuxagent/common/utils/flexible_version.py @@ -17,11 +17,11 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils import version # pylint: disable=no-name-in-module, disable=deprecated-module +from azurelinuxagent.common.future import Version import re -class FlexibleVersion(version.Version): +class FlexibleVersion(Version): """ A more flexible implementation of distutils.version.StrictVersion @@ -41,7 +41,7 @@ class FlexibleVersion(version.Version): """ def __init__(self, vstring=None, sep='.', prerel_tags=('alpha', 'beta', 'rc')): - version.Version.__init__(self) + Version.__init__(self) if sep is None: sep = '.' diff --git a/azurelinuxagent/distutils/__init__.py b/azurelinuxagent/distutils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/azurelinuxagent/distutils/version.py b/azurelinuxagent/distutils/version.py new file mode 100644 index 0000000000..acb8b0a5df --- /dev/null +++ b/azurelinuxagent/distutils/version.py @@ -0,0 +1,236 @@ +# +# A copy of distutils/version.py as Python 3.8 (minus the StrictVersion class) +# +# Implements multiple version numbering conventions for the +# Python Module Distribution Utilities. +# +# $Id$ +# + +"""Provides classes to represent module version numbers (one class for +each style of version numbering). There are currently two such classes +implemented: StrictVersion and LooseVersion. + +Every version number class implements the following interface: + * the 'parse' method takes a string and parses it to some internal + representation; if the string is an invalid version number, + 'parse' raises a ValueError exception + * the class constructor takes an optional string argument which, + if supplied, is passed to 'parse' + * __str__ reconstructs the string that was passed to 'parse' (or + an equivalent string -- ie. one that will generate an equivalent + version number instance) + * __repr__ generates Python code to recreate the version number instance + * _cmp compares the current instance with either another instance + of the same class or a string (which will be parsed to an instance + of the same class, thus must follow the same rules) +""" + +import re + +# E1101: Instance of 'Version' has no '_cmp' member (no-member) +# pylint: disable=no-member + + +class Version: + """Abstract base class for version numbering classes. Just provides + constructor (__init__) and reproducer (__repr__), because those + seem to be the same for all version numbering classes; and route + rich comparisons to _cmp. + """ + + def __init__(self, vstring=None): + if vstring: + self.parse(vstring) + + def __repr__(self): + return "%s ('%s')" % (self.__class__.__name__, str(self)) + + def __eq__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c == 0 + + def __lt__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c < 0 + + def __le__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c <= 0 + + def __gt__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c > 0 + + def __ge__(self, other): + c = self._cmp(other) + if c is NotImplemented: + return c + return c >= 0 + + +# Interface for version-number classes -- must be implemented +# by the following classes (the concrete ones -- Version should +# be treated as an abstract class). +# __init__ (string) - create and take same action as 'parse' +# (string parameter is optional) +# parse (string) - convert a string representation to whatever +# internal representation is appropriate for +# this style of version numbering +# __str__ (self) - convert back to a string; should be very similar +# (if not identical to) the string supplied to parse +# __repr__ (self) - generate Python code to recreate +# the instance +# _cmp (self, other) - compare two version numbers ('other' may +# be an unparsed version string, or another +# instance of your version class) + + +# The rules according to Greg Stein: +# 1) a version number has 1 or more numbers separated by a period or by +# sequences of letters. If only periods, then these are compared +# left-to-right to determine an ordering. +# 2) sequences of letters are part of the tuple for comparison and are +# compared lexicographically +# 3) recognize the numeric components may have leading zeroes +# +# The LooseVersion class below implements these rules: a version number +# string is split up into a tuple of integer and string components, and +# comparison is a simple tuple comparison. This means that version +# numbers behave in a predictable and obvious way, but a way that might +# not necessarily be how people *want* version numbers to behave. There +# wouldn't be a problem if people could stick to purely numeric version +# numbers: just split on period and compare the numbers as tuples. +# However, people insist on putting letters into their version numbers; +# the most common purpose seems to be: +# - indicating a "pre-release" version +# ('alpha', 'beta', 'a', 'b', 'pre', 'p') +# - indicating a post-release patch ('p', 'pl', 'patch') +# but of course this can't cover all version number schemes, and there's +# no way to know what a programmer means without asking him. +# +# The problem is what to do with letters (and other non-numeric +# characters) in a version number. The current implementation does the +# obvious and predictable thing: keep them as strings and compare +# lexically within a tuple comparison. This has the desired effect if +# an appended letter sequence implies something "post-release": +# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002". +# +# However, if letters in a version number imply a pre-release version, +# the "obvious" thing isn't correct. Eg. you would expect that +# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison +# implemented here, this just isn't so. +# +# Two possible solutions come to mind. The first is to tie the +# comparison algorithm to a particular set of semantic rules, as has +# been done in the StrictVersion class above. This works great as long +# as everyone can go along with bondage and discipline. Hopefully a +# (large) subset of Python module programmers will agree that the +# particular flavour of bondage and discipline provided by StrictVersion +# provides enough benefit to be worth using, and will submit their +# version numbering scheme to its domination. The free-thinking +# anarchists in the lot will never give in, though, and something needs +# to be done to accommodate them. +# +# Perhaps a "moderately strict" version class could be implemented that +# lets almost anything slide (syntactically), and makes some heuristic +# assumptions about non-digits in version number strings. This could +# sink into special-case-hell, though; if I was as talented and +# idiosyncratic as Larry Wall, I'd go ahead and implement a class that +# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is +# just as happy dealing with things like "2g6" and "1.13++". I don't +# think I'm smart enough to do it right though. +# +# In any case, I've coded the test suite for this module (see +# ../test/test_version.py) specifically to fail on things like comparing +# "1.2a2" and "1.2". That's not because the *code* is doing anything +# wrong, it's because the simple, obvious design doesn't match my +# complicated, hairy expectations for real-world version numbers. It +# would be a snap to fix the test suite to say, "Yep, LooseVersion does +# the Right Thing" (ie. the code matches the conception). But I'd rather +# have a conception that matches common notions about version numbers. + +# W0231: __init__ method from base class 'Version' is not called (super-init-not-called) +# R1710: Either all return statements in a function should return an expression, or none of them should. (inconsistent-return-statements) +# pylint: disable=super-init-not-called, inconsistent-return-statements + +class LooseVersion (Version): + + """Version numbering for anarchists and software realists. + Implements the standard interface for version number classes as + described above. A version number consists of a series of numbers, + separated by either periods or strings of letters. When comparing + version numbers, the numeric components will be compared + numerically, and the alphabetic components lexically. The following + are all valid version numbers, in no particular order: + + 1.5.1 + 1.5.2b2 + 161 + 3.10a + 8.02 + 3.4j + 1996.07.12 + 3.2.pl0 + 3.1.1.6 + 2g6 + 11g + 0.960923 + 2.2beta29 + 1.13++ + 5.5.kw + 2.0b1pl0 + + In fact, there is no such thing as an invalid version number under + this scheme; the rules for comparison are simple and predictable, + but may not always give the results you want (for some definition + of "want"). + """ + + component_re = re.compile(r'(\d+|[a-z]+|\.)', re.VERBOSE) + + def __init__(self, vstring=None): + if vstring: + self.parse(vstring) + + def parse(self, vstring): + # I've given up on thinking I can reconstruct the version string + # from the parsed tuple -- so I just store the string here for + # use by __str__ + self.vstring = vstring + components = [x for x in self.component_re.split(vstring) if x and x != '.'] + for i, obj in enumerate(components): + try: + components[i] = int(obj) + except ValueError: + pass + + self.version = components + + def __str__(self): + return self.vstring + + def __repr__(self): + return "LooseVersion ('%s')" % str(self) + + def _cmp(self, other): + if isinstance(other, str): + other = LooseVersion(other) + + if self.version == other.version: + return 0 + if self.version < other.version: + return -1 + if self.version > other.version: + return 1 + + +# end class LooseVersion diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 403579dfe1..564128fa9a 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -27,7 +27,7 @@ import tempfile import time import zipfile -from distutils.version import LooseVersion # pylint: disable=deprecated-module +from azurelinuxagent.common.future import LooseVersion as Version from collections import defaultdict from functools import partial @@ -2250,7 +2250,7 @@ def get_resource_limits(self, extension_name, str_version): This is not effective after nov 30th. """ if ExtHandlerInstance.is_azuremonitorlinuxagent(extension_name): - if LooseVersion(str_version) < LooseVersion("1.12"): + if Version(str_version) < Version("1.12"): test_man = { "resourceLimits": { "services": [ diff --git a/azurelinuxagent/pa/deprovision/factory.py b/azurelinuxagent/pa/deprovision/factory.py index 24b2e5b897..321c982ca7 100644 --- a/azurelinuxagent/pa/deprovision/factory.py +++ b/azurelinuxagent/pa/deprovision/factory.py @@ -15,7 +15,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, deprecated-module +from azurelinuxagent.common.future import LooseVersion as Version from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, DISTRO_FULL_NAME from .arch import ArchDeprovisionHandler diff --git a/azurelinuxagent/pa/rdma/factory.py b/azurelinuxagent/pa/rdma/factory.py index 3e8d152c45..ac0dd7638a 100644 --- a/azurelinuxagent/pa/rdma/factory.py +++ b/azurelinuxagent/pa/rdma/factory.py @@ -15,7 +15,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module, import-error, deprecated-module +from azurelinuxagent.common.future import LooseVersion as Version import azurelinuxagent.common.logger as logger from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_FULL_NAME, DISTRO_VERSION diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py index e4541b600b..4de6cf9538 100644 --- a/azurelinuxagent/pa/rdma/suse.py +++ b/azurelinuxagent/pa/rdma/suse.py @@ -24,7 +24,7 @@ from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_VERSION -from distutils.version import LooseVersion as Version # pylint: disable=deprecated-module +from azurelinuxagent.common.future import LooseVersion as Version class SUSERDMAHandler(RDMAHandler): diff --git a/tests/common/utils/test_flexible_version.py b/tests/common/utils/test_flexible_version.py index 7463f4f2c3..89b827bb0f 100644 --- a/tests/common/utils/test_flexible_version.py +++ b/tests/common/utils/test_flexible_version.py @@ -1,9 +1,9 @@ -import random # pylint: disable=unused-import import re import unittest from azurelinuxagent.common.utils.flexible_version import FlexibleVersion + class TestFlexibleVersion(unittest.TestCase): def setUp(self): diff --git a/tests/common/utils/test_text_util.py b/tests/common/utils/test_text_util.py index 5029cfb921..8378ddfab6 100644 --- a/tests/common/utils/test_text_util.py +++ b/tests/common/utils/test_text_util.py @@ -18,7 +18,7 @@ import hashlib import os import unittest -from distutils.version import LooseVersion as Version # pylint: disable=no-name-in-module,import-error +from azurelinuxagent.common.future import LooseVersion as Version import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.future import ustr From dfd912ec757a9a3a5810ede623f53013b2207c47 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 26 Feb 2024 15:29:13 -0800 Subject: [PATCH 167/240] Run pylint on Python 3.11 (#3067) * Run pylint on Python 3.11 --------- Co-authored-by: narrieta --- .github/workflows/ci_pr.yml | 54 ++++++++++++++++-------- azurelinuxagent/common/utils/textutil.py | 3 +- azurelinuxagent/ga/cgroupconfigurator.py | 9 ++-- ci/2.7.pylintrc | 42 ------------------ ci/3.6.pylintrc | 40 ------------------ ci/{3.10.pylintrc => pylintrc} | 1 - tests/common/test_event.py | 4 +- tests/common/test_singletonperthread.py | 8 ++-- tests/ga/test_multi_config_extension.py | 24 +++++------ tests/ga/test_remoteaccess_handler.py | 5 +-- tests/lib/tools.py | 4 +- 11 files changed, 66 insertions(+), 128 deletions(-) delete mode 100644 ci/2.7.pylintrc delete mode 100644 ci/3.6.pylintrc rename ci/{3.10.pylintrc => pylintrc} (98%) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 153d5392fd..bf308624ab 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -9,7 +9,7 @@ on: jobs: test-python-2_6-and-3_4-versions: - + strategy: fail-fast: false matrix: @@ -87,32 +87,19 @@ jobs: matrix: include: - python-version: "3.5" - PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e,makepkg.py" - - python-version: "3.6" - PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: "3.7" - PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: "3.8" - PYLINTOPTS: "--rcfile=ci/3.6.pylintrc --ignore=tests_e2e" - - python-version: "3.9" - PYLINTOPTS: "--rcfile=ci/3.6.pylintrc" additional-nose-opts: "--with-coverage --cover-erase --cover-inclusive --cover-branches --cover-package=azurelinuxagent" - - python-version: "3.10" - PYLINTOPTS: "--rcfile=ci/3.10.pylintrc --ignore=tests" + - python-version: "3.11" name: "Python ${{ matrix.python-version }} Unit Tests" runs-on: ubuntu-20.04 env: - PYLINTOPTS: ${{ matrix.PYLINTOPTS }} - PYLINTFILES: "azurelinuxagent setup.py makepkg.py tests tests_e2e" NOSEOPTS: "--with-timer ${{ matrix.additional-nose-opts }}" - PYTHON_VERSION: ${{ matrix.python-version }} steps: @@ -130,13 +117,46 @@ jobs: sudo env "PATH=$PATH" python -m pip install --upgrade pip sudo env "PATH=$PATH" pip install -r requirements.txt sudo env "PATH=$PATH" pip install -r test-requirements.txt + sudo env "PATH=$PATH" pip install --upgrade pylint - name: Run pylint run: | - pylint $PYLINTOPTS --jobs=0 $PYLINTFILES + # + # List of files/directories to be checked by pylint. + # The end-to-end tests run only on Python 3.9 and we lint them only on that version. + # + PYLINT_FILES="azurelinuxagent setup.py makepkg.py tests" + if [[ "${{ matrix.python-version }}" == "3.9" ]]; then + PYLINT_FILES="$PYLINT_FILES tests_e2e" + fi + + # + # Command-line options for pylint. + # * "unused-private-member" is not implemented on 3.5 and will produce "E0012: Bad option value 'unused-private-member' (bad-option-value)" + # so we suppress "bad-option-value". + # * 3.9 will produce "no-member" for several properties/methods that are added to the mocks used by the unit tests (e.g + # "E1101: Instance of 'WireProtocol' has no 'aggregate_status' member") so we suppress that warning. + # * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.9 and is no longer used by default. It needs + # to be suppressed for previous versions (3.0-3.8), though. + # + PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0" + if [[ "${{ matrix.python-version }}" == "3.5" ]]; then + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=bad-option-value" + fi + if [[ "${{ matrix.python-version }}" == "3.9" ]]; then + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member" + fi + if [[ "${{ matrix.python-version }}" =~ ^3\.[0-8]$ ]]; then + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use" + fi + + echo "PYLINT_OPTIONS: $PYLINT_OPTIONS" + echo "PYLINT_FILES: $PYLINT_FILES" + + pylint $PYLINT_OPTIONS $PYLINT_FILES - name: Test with nosetests - if: matrix.python-version != '3.10' && (success() || (failure() && steps.install-dependencies.outcome == 'success')) + if: contains(fromJSON('["3.10", "3.11"]'), matrix.python-version) == false && (success() || (failure() && steps.install-dependencies.outcome == 'success')) run: | ./ci/nosetests.sh exit $? diff --git a/azurelinuxagent/common/utils/textutil.py b/azurelinuxagent/common/utils/textutil.py index 1ff7a7e912..9e857274db 100644 --- a/azurelinuxagent/common/utils/textutil.py +++ b/azurelinuxagent/common/utils/textutil.py @@ -17,7 +17,8 @@ # Requires Python 2.6+ and Openssl 1.0+ import base64 -import crypt +# W4901: Deprecated module 'crypt' (deprecated-module) +import crypt # pylint: disable=deprecated-module import hashlib import random import re diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index fa4dbe2027..09eb8b55ad 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -350,8 +350,9 @@ def __reload_systemd_config(): except Exception as exception: _log_cgroup_warning("daemon-reload failed (create azure slice): {0}", ustr(exception)) + # W0238: Unused private member `_Impl.__create_unit_file(path, contents)` (unused-private-member) @staticmethod - def __create_unit_file(path, contents): + def __create_unit_file(path, contents): # pylint: disable=unused-private-member parent, _ = os.path.split(path) if not os.path.exists(parent): fileutil.mkdir(parent, mode=0o755) @@ -359,8 +360,9 @@ def __create_unit_file(path, contents): fileutil.write_file(path, contents) _log_cgroup_info("{0} {1}", "Updated" if exists else "Created", path) + # W0238: Unused private member `_Impl.__cleanup_unit_file(path)` (unused-private-member) @staticmethod - def __cleanup_unit_file(path): + def __cleanup_unit_file(path): # pylint: disable=unused-private-member if os.path.exists(path): try: os.remove(path) @@ -522,8 +524,9 @@ def __reset_agent_cpu_quota(): _log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(systemd.get_agent_unit_name(), "CPUQuotaPerSecUSec")) + # W0238: Unused private member `_Impl.__try_set_cpu_quota(quota)` (unused-private-member) @staticmethod - def __try_set_cpu_quota(quota): + def __try_set_cpu_quota(quota): # pylint: disable=unused-private-member try: drop_in_file = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA) contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(quota) diff --git a/ci/2.7.pylintrc b/ci/2.7.pylintrc deleted file mode 100644 index 0cba65ee9d..0000000000 --- a/ci/2.7.pylintrc +++ /dev/null @@ -1,42 +0,0 @@ -# python2.7 uses pylint 1.9.5, whose docs can be found here: http://pylint.pycqa.org/en/1.9/technical_reference/features.html#messages -# python3.4 uses pylint 2.3.1, whose docs can be found here: http://pylint.pycqa.org/en/pylint-2.3.1/technical_reference/features.html - -[MESSAGES CONTROL] - -disable=C, # (C) convention, for programming standard violation - consider-using-dict-comprehension, # (R1717): *Consider using a dictionary comprehension* - consider-using-in, # (R1714): *Consider merging these comparisons with "in" to %r* - consider-using-set-comprehension, # (R1718): *Consider using a set comprehension* - consider-using-with, # (R1732): *Emitted if a resource-allocating assignment or call may be replaced by a 'with' block* - duplicate-code, # (R0801): *Similar lines in %s files* - no-init, # (W0232): Class has no __init__ method - no-else-break, # (R1723): *Unnecessary "%s" after "break"* - no-else-continue, # (R1724): *Unnecessary "%s" after "continue"* - no-else-raise, # (R1720): *Unnecessary "%s" after "raise"* - no-else-return, # (R1705): *Unnecessary "%s" after "return"* - no-self-use, # (R0201): *Method could be a function* - protected-access, # (W0212): Access to a protected member of a client class - simplifiable-if-expression, # (R1719): *The if expression can be replaced with %s* - simplifiable-if-statement, # (R1703): *The if statement can be replaced with %s* - super-with-arguments, # (R1725): *Consider using Python 3 style super() without arguments* - too-few-public-methods, # (R0903): *Too few public methods (%s/%s)* - too-many-ancestors, # (R0901): *Too many ancestors (%s/%s)* - too-many-arguments, # (R0913): *Too many arguments (%s/%s)* - too-many-boolean-expressions, # (R0916): *Too many boolean expressions in if statement (%s/%s)* - too-many-branches, # (R0912): *Too many branches (%s/%s)* - too-many-instance-attributes, # (R0902): *Too many instance attributes (%s/%s)* - too-many-locals, # (R0914): *Too many local variables (%s/%s)* - too-many-nested-blocks, # (R1702): *Too many nested blocks (%s/%s)* - too-many-public-methods, # (R0904): *Too many public methods (%s/%s)* - too-many-return-statements, # (R0911): *Too many return statements (%s/%s)* - too-many-statements, # (R0915): *Too many statements (%s/%s)* - useless-object-inheritance, # (R0205): *Class %r inherits from object, can be safely removed from bases in python3* - useless-return, # (R1711): *Useless return at end of function or method* - bad-continuation, # Buggy, **REMOVED in pylint-2.6.0** - bad-option-value, # pylint does not recognize the error code/symbol (needed to supress breaking changes across pylint versions) - bad-whitespace, # Used when a wrong number of spaces is used around an operator, bracket or block opener. - broad-except, # Used when an except catches a too general exception, possibly burying unrelated errors. - deprecated-lambda, # Used when a lambda is the first argument to “map” or “filter”. It could be clearer as a list comprehension or generator expression. (2.7 only) - missing-docstring, # Used when a module, function, class or method has no docstring - old-style-class, # Used when a class is defined that does not inherit from another class and does not inherit explicitly from “object”. (2.7 only) - fixme, # Used when a warning note as FIXME or TODO is detected diff --git a/ci/3.6.pylintrc b/ci/3.6.pylintrc deleted file mode 100644 index fcbae93831..0000000000 --- a/ci/3.6.pylintrc +++ /dev/null @@ -1,40 +0,0 @@ -# python 3.6+ uses the latest pylint version, whose docs can be found here: http://pylint.pycqa.org/en/stable/technical_reference/features.html - -[MESSAGES CONTROL] - -disable=C, # (C) convention, for programming standard violation - broad-except, # (W0703): *Catching too general exception %s* - consider-using-dict-comprehension, # (R1717): *Consider using a dictionary comprehension* - consider-using-in, # (R1714): *Consider merging these comparisons with "in" to %r* - consider-using-set-comprehension, # (R1718): *Consider using a set comprehension* - consider-using-with, # (R1732): *Emitted if a resource-allocating assignment or call may be replaced by a 'with' block* - duplicate-code, # (R0801): *Similar lines in %s files* - fixme, # Used when a warning note as FIXME or TODO is detected - no-else-break, # (R1723): *Unnecessary "%s" after "break"* - no-else-continue, # (R1724): *Unnecessary "%s" after "continue"* - no-else-raise, # (R1720): *Unnecessary "%s" after "raise"* - no-else-return, # (R1705): *Unnecessary "%s" after "return"* - no-init, # (W0232): Class has no __init__ method - no-self-use, # (R0201): *Method could be a function* - protected-access, # (W0212): Access to a protected member of a client class - raise-missing-from, # (W0707): *Consider explicitly re-raising using the 'from' keyword* - redundant-u-string-prefix, # The u prefix for strings is no longer necessary in Python >=3.0 - simplifiable-if-expression, # (R1719): *The if expression can be replaced with %s* - simplifiable-if-statement, # (R1703): *The if statement can be replaced with %s* - super-with-arguments, # (R1725): *Consider using Python 3 style super() without arguments* - too-few-public-methods, # (R0903): *Too few public methods (%s/%s)* - too-many-ancestors, # (R0901): *Too many ancestors (%s/%s)* - too-many-arguments, # (R0913): *Too many arguments (%s/%s)* - too-many-boolean-expressions, # (R0916): *Too many boolean expressions in if statement (%s/%s)* - too-many-branches, # (R0912): *Too many branches (%s/%s)* - too-many-instance-attributes, # (R0902): *Too many instance attributes (%s/%s)* - too-many-locals, # (R0914): *Too many local variables (%s/%s)* - too-many-nested-blocks, # (R1702): *Too many nested blocks (%s/%s)* - too-many-public-methods, # (R0904): *Too many public methods (%s/%s)* - too-many-return-statements, # (R0911): *Too many return statements (%s/%s)* - too-many-statements, # (R0915): *Too many statements (%s/%s)* - unspecified-encoding, # (W1514): Using open without explicitly specifying an encoding - use-a-generator, # (R1729): *Use a generator instead '%s(%s)'* - useless-object-inheritance, # (R0205): *Class %r inherits from object, can be safely removed from bases in python3* - useless-return, # (R1711): *Useless return at end of function or method* - diff --git a/ci/3.10.pylintrc b/ci/pylintrc similarity index 98% rename from ci/3.10.pylintrc rename to ci/pylintrc index 43b8172c28..7625abad43 100644 --- a/ci/3.10.pylintrc +++ b/ci/pylintrc @@ -16,7 +16,6 @@ disable=C, # (C) convention, for programming standard violation no-else-continue, # R1724: *Unnecessary "%s" after "continue"* no-else-raise, # R1720: *Unnecessary "%s" after "raise"* no-else-return, # R1705: *Unnecessary "%s" after "return"* - no-self-use, # R0201: Method could be a function protected-access, # W0212: Access to a protected member of a client class raise-missing-from, # W0707: *Consider explicitly re-raising using the 'from' keyword* redundant-u-string-prefix, # The u prefix for strings is no longer necessary in Python >=3.0 diff --git a/tests/common/test_event.py b/tests/common/test_event.py index 435ac2e80d..a760c7f9f9 100644 --- a/tests/common/test_event.py +++ b/tests/common/test_event.py @@ -60,7 +60,7 @@ def setUp(self): self.event_dir = os.path.join(self.tmp_dir, EVENTS_DIRECTORY) EventLoggerTools.initialize_event_logger(self.event_dir) - threading.current_thread().setName("TestEventThread") + threading.current_thread().name = "TestEventThread" osutil = get_osutil() self.expected_common_parameters = { @@ -70,7 +70,7 @@ def setUp(self): CommonTelemetryEventSchema.ContainerId: AgentGlobals.get_container_id(), CommonTelemetryEventSchema.EventTid: threading.current_thread().ident, CommonTelemetryEventSchema.EventPid: os.getpid(), - CommonTelemetryEventSchema.TaskName: threading.current_thread().getName(), + CommonTelemetryEventSchema.TaskName: threading.current_thread().name, CommonTelemetryEventSchema.KeywordName: json.dumps({"CpuArchitecture": platform.machine()}), # common parameters computed from the OS platform CommonTelemetryEventSchema.OSVersion: EventLoggerTools.get_expected_os_version(), diff --git a/tests/common/test_singletonperthread.py b/tests/common/test_singletonperthread.py index 7b1972635e..80dedcb7a3 100644 --- a/tests/common/test_singletonperthread.py +++ b/tests/common/test_singletonperthread.py @@ -1,6 +1,6 @@ import uuid from multiprocessing import Queue -from threading import Thread, currentThread +from threading import Thread, current_thread from azurelinuxagent.common.singletonperthread import SingletonPerThread from tests.lib.tools import AgentTestCase, clear_singleton_instances @@ -32,7 +32,7 @@ class TestClassToTestSingletonPerThread(SingletonPerThread): def __init__(self): # Set the name of the object to the current thread name - self.name = currentThread().getName() + self.name = current_thread().name # Unique identifier for a class object self.uuid = str(uuid.uuid4()) @@ -53,8 +53,8 @@ def _setup_multithread_and_execute(self, func1, args1, func2, args2, t1_name=Non t1 = Thread(target=func1, args=args1) t2 = Thread(target=func2, args=args2) - t1.setName(t1_name if t1_name else self.THREAD_NAME_1) - t2.setName(t2_name if t2_name else self.THREAD_NAME_2) + t1.name = t1_name if t1_name else self.THREAD_NAME_1 + t2.name = t2_name if t2_name else self.THREAD_NAME_2 t1.start() t2.start() t1.join() diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 0fe8dea5a3..127535a54a 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -41,7 +41,7 @@ def __init__(self, name, version, state="enabled"): self.version = version self.state = state self.is_invalid_setting = False - self.settings = dict() + self.settings = {} class _TestExtensionObject: def __init__(self, name, seq_no, dependency_level="0", state="enabled"): @@ -94,12 +94,11 @@ def _get_mock_expected_handler_data(self, rc_extensions, vmaccess_extensions, ge def test_it_should_parse_multi_config_settings_properly(self): self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, "ext_conf_with_multi_config.xml") - rc_extensions = dict() - rc_extensions["firstRunCommand"] = self._TestExtensionObject(name="firstRunCommand", seq_no=2) - rc_extensions["secondRunCommand"] = self._TestExtensionObject(name="secondRunCommand", seq_no=2, - dependency_level="3") - rc_extensions["thirdRunCommand"] = self._TestExtensionObject(name="thirdRunCommand", seq_no=1, - dependency_level="4") + rc_extensions = { + "firstRunCommand": self._TestExtensionObject(name="firstRunCommand", seq_no=2), + "secondRunCommand": self._TestExtensionObject(name="secondRunCommand", seq_no=2, dependency_level="3"), + "thirdRunCommand": self._TestExtensionObject(name="thirdRunCommand", seq_no=1, dependency_level="4") + } vmaccess_extensions = { "Microsoft.Compute.VMAccessAgent": self._TestExtensionObject(name="Microsoft.Compute.VMAccessAgent", @@ -115,12 +114,11 @@ def test_it_should_parse_multi_config_with_disable_state_properly(self): self.test_data['ext_conf'] = os.path.join(self._MULTI_CONFIG_TEST_DATA, "ext_conf_with_disabled_multi_config.xml") - rc_extensions = dict() - rc_extensions["firstRunCommand"] = self._TestExtensionObject(name="firstRunCommand", seq_no=3) - rc_extensions["secondRunCommand"] = self._TestExtensionObject(name="secondRunCommand", seq_no=3, - dependency_level="1") - rc_extensions["thirdRunCommand"] = self._TestExtensionObject(name="thirdRunCommand", seq_no=1, - dependency_level="4", state="disabled") + rc_extensions = { + "firstRunCommand": self._TestExtensionObject(name="firstRunCommand", seq_no=3), + "secondRunCommand": self._TestExtensionObject(name="secondRunCommand", seq_no=3, dependency_level="1"), + "thirdRunCommand": self._TestExtensionObject(name="thirdRunCommand", seq_no=1, dependency_level="4", state="disabled") + } vmaccess_extensions = { "Microsoft.Compute.VMAccessAgent": self._TestExtensionObject(name="Microsoft.Compute.VMAccessAgent", diff --git a/tests/ga/test_remoteaccess_handler.py b/tests/ga/test_remoteaccess_handler.py index d4f1579260..d555c55a88 100644 --- a/tests/ga/test_remoteaccess_handler.py +++ b/tests/ga/test_remoteaccess_handler.py @@ -75,15 +75,14 @@ def mock_add_event(name, op, is_success, version, message): class TestRemoteAccessHandler(AgentTestCase): - eventing_data = [()] + eventing_data = () def setUp(self): super(TestRemoteAccessHandler, self).setUp() # Since ProtocolUtil is a singleton per thread, we need to clear it to ensure that the test cases do not # reuse a previous state clear_singleton_instances(ProtocolUtil) - for data in TestRemoteAccessHandler.eventing_data: - del data + TestRemoteAccessHandler.eventing_data = () # add_user tests @patch('azurelinuxagent.common.utils.cryptutil.CryptUtil.decrypt_secret', return_value="]aPPEv}uNg1FPnl?") diff --git a/tests/lib/tools.py b/tests/lib/tools.py index 11bd801917..194850ee22 100644 --- a/tests/lib/tools.py +++ b/tests/lib/tools.py @@ -29,7 +29,7 @@ import time import unittest from functools import wraps -from threading import currentThread +from threading import current_thread import azurelinuxagent.common.conf as conf import azurelinuxagent.common.event as event @@ -543,6 +543,6 @@ def wrapper(self, *args, **kwargs): def clear_singleton_instances(cls): # Adding this lock to avoid any race conditions with cls._lock: - obj_name = "%s__%s" % (cls.__name__, currentThread().getName()) # Object Name = className__threadName + obj_name = "%s__%s" % (cls.__name__, current_thread().name) # Object Name = className__threadName if obj_name in cls._instances: del cls._instances[obj_name] From edd827102086e3a7096bdb8527c16cfbed2baad7 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 27 Feb 2024 09:59:19 -0800 Subject: [PATCH 168/240] Fix pylint warnings (#3069) * Fix pylint warnings * Update .github/workflows/ci_pr.yml Co-authored-by: maddieford <93676569+maddieford@users.noreply.github.com> --------- Co-authored-by: narrieta Co-authored-by: maddieford <93676569+maddieford@users.noreply.github.com> --- .github/workflows/ci_pr.yml | 6 +++--- ci/pylintrc | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index bf308624ab..0c6b816846 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -136,8 +136,8 @@ jobs: # so we suppress "bad-option-value". # * 3.9 will produce "no-member" for several properties/methods that are added to the mocks used by the unit tests (e.g # "E1101: Instance of 'WireProtocol' has no 'aggregate_status' member") so we suppress that warning. - # * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.9 and is no longer used by default. It needs - # to be suppressed for previous versions (3.0-3.8), though. + # * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.8 and is no longer used by default. It needs + # to be suppressed for previous versions (3.0-3.7), though. # PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0" if [[ "${{ matrix.python-version }}" == "3.5" ]]; then @@ -146,7 +146,7 @@ jobs: if [[ "${{ matrix.python-version }}" == "3.9" ]]; then PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member" fi - if [[ "${{ matrix.python-version }}" =~ ^3\.[0-8]$ ]]; then + if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use" fi diff --git a/ci/pylintrc b/ci/pylintrc index 7625abad43..f57949ab72 100644 --- a/ci/pylintrc +++ b/ci/pylintrc @@ -6,6 +6,8 @@ disable=C, # (C) convention, for programming standard violation consider-using-dict-comprehension, # R1717: *Consider using a dictionary comprehension* consider-using-from-import, # R0402: Use 'from foo import bar' instead consider-using-in, # R1714: *Consider merging these comparisons with "in" to %r* + consider-using-max-builtin, # R1731: Consider using 'a = max(a, b)' instead of unnecessary if block + consider-using-min-builtin, # R1730: Consider using 'a = min(a, b)' instead of unnecessary if block consider-using-set-comprehension, # R1718: *Consider using a set comprehension* consider-using-with, # R1732: *Emitted if a resource-allocating assignment or call may be replaced by a 'with' block* duplicate-code, # R0801: *Similar lines in %s files* @@ -35,5 +37,6 @@ disable=C, # (C) convention, for programming standard violation too-many-statements, # R0915: *Too many statements %s/%s)* unspecified-encoding, # W1514: Using open without explicitly specifying an encoding use-a-generator, # R1729: *Use a generator instead '%s%s)'* + use-yield-from, # R1737: Use 'yield from' directly instead of yielding each element one by one useless-object-inheritance, # R0205: *Class %r inherits from object, can be safely removed from bases in python3* useless-return, # R1711: *Useless return at end of function or method* From 5759501cb20b7c0b032a881fe2579d54f055f109 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 27 Feb 2024 10:05:20 -0800 Subject: [PATCH 169/240] reset uphold setting for agent service in flatcar distro (#3066) * reset uphold settings for flatcar images * updated comment * stop the rebbot service * address comments --- tests_e2e/orchestrator/scripts/install-agent | 19 +++++++++++++++++++ tests_e2e/test_suites/agent_firewall.yml | 7 +------ tests_e2e/test_suites/agent_update.yml | 4 ++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 61181b44d3..d28164f6d3 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -140,6 +140,25 @@ if [[ $(uname -a) == *"flatcar"* ]]; then if [[ ! -f /usr/share/oem/waagent.conf ]]; then ln -s "$waagent_conf_path" /usr/share/oem/waagent.conf fi + + # New flatcar images set the uphold property for agent service that is causing automatic restart on stop cmd + # [Upholds= dependency on it has a continuous effect, constantly restarting the unit if necessary] + # Resetting the uphold property as workaround for now + uphold_target=$(systemctl show waagent --property=UpheldBy) + # example output: UpheldBy=multi-user.target + if [[ $uphold_target == *".target"* ]]; then + target_name="${uphold_target#*=}" + if [[ ! -d /etc/systemd/system/$target_name.d ]]; then + mkdir -p /etc/systemd/system/$target_name.d + fi + echo -e "[Unit]\nUpholds=" > /etc/systemd/system/$target_name.d/10-waagent-sysext.conf + systemctl daemon-reload + fi + # Flatcar images does automatic reboot without user input, so turning it off + # Broadcast message from locksmithd at 2024-02-23 19:48:55.478412272 +0000 UTC m= + # System reboot in 5 minutes! + echo "REBOOT_STRATEGY=off" > /etc/flatcar/update.conf + systemctl restart locksmithd fi # diff --git a/tests_e2e/test_suites/agent_firewall.yml b/tests_e2e/test_suites/agent_firewall.yml index 4697265709..0e095ba39e 100644 --- a/tests_e2e/test_suites/agent_firewall.yml +++ b/tests_e2e/test_suites/agent_firewall.yml @@ -12,9 +12,4 @@ tests: images: - "endorsed" - "endorsed-arm64" -owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. -# Systemctl stop functionality changed and service doing restart on stop cmd in flatcar images. Thus breaking our tests. -# TODO: Enable once it is fixed -skip_on_images: - - "flatcar" - - "flatcar_arm64" \ No newline at end of file +owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. \ No newline at end of file diff --git a/tests_e2e/test_suites/agent_update.yml b/tests_e2e/test_suites/agent_update.yml index 5c8736a4ea..e53f2f9211 100644 --- a/tests_e2e/test_suites/agent_update.yml +++ b/tests_e2e/test_suites/agent_update.yml @@ -13,8 +13,8 @@ owns_vm: true skip_on_clouds: - "AzureChinaCloud" - "AzureUSGovernment" -# Systemctl stop functionality changed and service doing restart on stop cmd in flatcar images. Thus breaking our tests. -# TODO: Enable once it is fixed +# Since Flatcar read-only filesystem, we can't edit the version file. This test relies on the version to be updated in version file. +# TODO: Enable once we find workaround for this skip_on_images: - "flatcar" - "flatcar_arm64" \ No newline at end of file From 5af66211331c34c7ade04e8bad5e921409769b29 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:21:30 -0800 Subject: [PATCH 170/240] retry on quota reset check (#3068) --- .../tests/scripts/ext_cgroups-check_cgroups_extensions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py b/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py index 48bd3f902e..8d97da3f79 100755 --- a/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py +++ b/tests_e2e/tests/scripts/ext_cgroups-check_cgroups_extensions.py @@ -29,6 +29,7 @@ print_cgroups from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import run_remote_test +from tests_e2e.tests.lib.retry import retry_if_false def verify_custom_script_cgroup_assigned_correctly(): @@ -218,7 +219,7 @@ def main(): run_remote_test(main) except Exception as e: # It is possible that agent cgroup can be disabled due to UNKNOWN process or throttled before we run this check, in that case, we should ignore the validation - if check_agent_quota_disabled() and check_cgroup_disabled_with_unknown_process(): + if check_cgroup_disabled_with_unknown_process() and retry_if_false(check_agent_quota_disabled()): log.info("Cgroup is disabled due to UNKNOWN process, ignoring ext cgroups validations") else: raise From 6de4652b8d2bb0f261e36f78af44f31b524ce3e5 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 27 Feb 2024 14:58:44 -0800 Subject: [PATCH 171/240] Use legacycrypt instead of crypt on Python >= 3.13 (#3070) * Use legacycrypt instead of crypt on Python >= 3.13 * remove ModuleNotFound --------- Co-authored-by: narrieta --- azurelinuxagent/common/osutil/default.py | 27 ++++++++++++++++--- azurelinuxagent/common/osutil/freebsd.py | 2 +- azurelinuxagent/common/osutil/gaia.py | 3 +-- azurelinuxagent/common/utils/textutil.py | 14 ---------- requirements.txt | 3 ++- setup.py | 17 +++++++----- tests/common/osutil/test_default.py | 9 +++++++ .../{utils => osutil}/test_passwords.txt | 0 tests/common/utils/test_text_util.py | 10 ------- 9 files changed, 47 insertions(+), 38 deletions(-) rename tests/common/{utils => osutil}/test_passwords.txt (100%) diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index c52146ca7c..26114a2827 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -16,6 +16,7 @@ # Requires Python 2.6+ and Openssl 1.0+ # +import array import base64 import datetime import errno @@ -26,15 +27,26 @@ import os import platform import pwd +import random import re import shutil import socket +import string import struct import sys import time from pwd import getpwall -import array +from azurelinuxagent.common.exception import OSUtilError +# 'crypt' was removed in Python 3.13; use legacycrypt instead +if sys.version_info[0] == 3 and sys.version_info[1] >= 13 or sys.version_info[0] > 3: + try: + from legacycrypt import crypt + except ImportError: + def crypt(password, salt): + raise OSUtilError("Please install the legacycrypt Python module to use this feature.") +else: + from crypt import crypt # pylint: disable=deprecated-module from azurelinuxagent.common import conf from azurelinuxagent.common import logger @@ -42,7 +54,6 @@ from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils import textutil -from azurelinuxagent.common.exception import OSUtilError from azurelinuxagent.common.future import ustr, array_to_bytes from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.flexible_version import FlexibleVersion @@ -433,11 +444,21 @@ def chpasswd(self, username, password, crypt_id=6, salt_len=10): if self.is_sys_user(username): raise OSUtilError(("User {0} is a system user, " "will not set password.").format(username)) - passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len) + passwd_hash = DefaultOSUtil.gen_password_hash(password, crypt_id, salt_len) self._run_command_raising_OSUtilError(["usermod", "-p", passwd_hash, username], err_msg="Failed to set password for {0}".format(username)) + @staticmethod + def gen_password_hash(password, crypt_id, salt_len): + collection = string.ascii_letters + string.digits + salt = ''.join(random.choice(collection) for _ in range(salt_len)) + salt = "${0}${1}".format(crypt_id, salt) + if sys.version_info[0] == 2: + # if python 2.*, encode to type 'str' to prevent Unicode Encode Error from crypt.crypt + password = password.encode('utf-8') + return crypt(password, salt) + def get_users(self): return getpwall() diff --git a/azurelinuxagent/common/osutil/freebsd.py b/azurelinuxagent/common/osutil/freebsd.py index ea3a83e1a6..d2adc00275 100644 --- a/azurelinuxagent/common/osutil/freebsd.py +++ b/azurelinuxagent/common/osutil/freebsd.py @@ -77,7 +77,7 @@ def chpasswd(self, username, password, crypt_id=6, salt_len=10): if self.is_sys_user(username): raise OSUtilError(("User {0} is a system user, " "will not set password.").format(username)) - passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len) + passwd_hash = DefaultOSUtil.gen_password_hash(password, crypt_id, salt_len) self._run_command_raising_OSUtilError(['pw', 'usermod', username, '-H', '0'], cmd_input=passwd_hash, err_msg="Failed to set password for {0}".format(username)) diff --git a/azurelinuxagent/common/osutil/gaia.py b/azurelinuxagent/common/osutil/gaia.py index 8a0f04b0d0..e11f482db5 100644 --- a/azurelinuxagent/common/osutil/gaia.py +++ b/azurelinuxagent/common/osutil/gaia.py @@ -29,7 +29,6 @@ from azurelinuxagent.common.utils.cryptutil import CryptUtil import azurelinuxagent.common.utils.fileutil as fileutil import azurelinuxagent.common.utils.shellutil as shellutil -import azurelinuxagent.common.utils.textutil as textutil class GaiaOSUtil(DefaultOSUtil): @@ -64,7 +63,7 @@ def useradd(self, username, expiration=None, comment=None): def chpasswd(self, username, password, crypt_id=6, salt_len=10): logger.info('chpasswd') - passwd_hash = textutil.gen_password_hash(password, crypt_id, salt_len) + passwd_hash = DefaultOSUtil.gen_password_hash(password, crypt_id, salt_len) ret, out = self._run_clish( 'set user admin password-hash ' + passwd_hash) if ret != 0: diff --git a/azurelinuxagent/common/utils/textutil.py b/azurelinuxagent/common/utils/textutil.py index 9e857274db..4a0f9a7541 100644 --- a/azurelinuxagent/common/utils/textutil.py +++ b/azurelinuxagent/common/utils/textutil.py @@ -17,12 +17,8 @@ # Requires Python 2.6+ and Openssl 1.0+ import base64 -# W4901: Deprecated module 'crypt' (deprecated-module) -import crypt # pylint: disable=deprecated-module import hashlib -import random import re -import string import struct import sys import traceback @@ -288,16 +284,6 @@ def remove_bom(c): return c -def gen_password_hash(password, crypt_id, salt_len): - collection = string.ascii_letters + string.digits - salt = ''.join(random.choice(collection) for _ in range(salt_len)) - salt = "${0}${1}".format(crypt_id, salt) - if sys.version_info[0] == 2: - # if python 2.*, encode to type 'str' to prevent Unicode Encode Error from crypt.crypt - password = password.encode('utf-8') - return crypt.crypt(password, salt) - - def get_bytes_from_pem(pem_str): base64_bytes = "" for line in pem_str.split('\n'): diff --git a/requirements.txt b/requirements.txt index b0b7c87457..ab6958a732 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ distro; python_version >= '3.8' -pyasn1 \ No newline at end of file +pyasn1 +legacycrypt; python_version >= '3.13' diff --git a/setup.py b/setup.py index 6b54d09e76..2d51fae8c2 100755 --- a/setup.py +++ b/setup.py @@ -314,13 +314,16 @@ def run(self): # Note to packagers and users from source. -# In version 3.5 of Python distribution information handling in the platform -# module was deprecated. Depending on the Linux distribution the -# implementation may be broken prior to Python 3.7 wher the functionality -# will be removed from Python 3 -requires = [] # pylint: disable=invalid-name -if sys.version_info[0] >= 3 and sys.version_info[1] >= 7: - requires = ['distro'] # pylint: disable=invalid-name +# * In version 3.5 of Python distribution information handling in the platform +# module was deprecated. Depending on the Linux distribution the +# implementation may be broken prior to Python 3.8 where the functionality +# will be removed from Python 3. +# * In version 3.13 of Python, the crypt module was removed and legacycrypt is +# required instead. +requires = [ + "distro;python_version>='3.8'", + "legacycrypt;python_version>='3.13'", +] modules = [] # pylint: disable=invalid-name diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 68bd282d70..79a802c7b1 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -1111,6 +1111,15 @@ def test_get_hostname_record_should_initialize_the_host_name_using_cloud_init_in self.assertEqual(expected, actual, "get_hostname_record returned an incorrect hostname") self.assertEqual(expected, self.__get_published_hostname_contents(), "get_hostname_record returned an incorrect hostname") + def test_get_password_hash(self): + with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_passwords.txt'), 'rb') as in_file: + for data in in_file: + # Remove bom on bytes data before it is converted into string. + data = textutil.remove_bom(data) + data = ustr(data, encoding='utf-8') + password_hash = osutil.DefaultOSUtil.gen_password_hash(data, 6, 10) + self.assertNotEqual(None, password_hash) + if __name__ == '__main__': unittest.main() diff --git a/tests/common/utils/test_passwords.txt b/tests/common/osutil/test_passwords.txt similarity index 100% rename from tests/common/utils/test_passwords.txt rename to tests/common/osutil/test_passwords.txt diff --git a/tests/common/utils/test_text_util.py b/tests/common/utils/test_text_util.py index 8378ddfab6..a9f8a9df9f 100644 --- a/tests/common/utils/test_text_util.py +++ b/tests/common/utils/test_text_util.py @@ -16,7 +16,6 @@ # import hashlib -import os import unittest from azurelinuxagent.common.future import LooseVersion as Version @@ -26,15 +25,6 @@ class TestTextUtil(AgentTestCase): - def test_get_password_hash(self): - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_passwords.txt'), 'rb') as in_file: - for data in in_file: - # Remove bom on bytes data before it is converted into string. - data = textutil.remove_bom(data) - data = ustr(data, encoding='utf-8') - password_hash = textutil.gen_password_hash(data, 6, 10) - self.assertNotEqual(None, password_hash) - def test_replace_non_ascii(self): data = ustr(b'\xef\xbb\xbfhehe', encoding='utf-8') self.assertEqual('hehe', textutil.replace_non_ascii(data)) From 8e02671547095fc37f988adbbb4cca3d1cdfef68 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 28 Feb 2024 12:31:04 -0800 Subject: [PATCH 172/240] Skip network unreachable error in publish hostname test (#3071) Co-authored-by: narrieta --- tests_e2e/tests/publish_hostname/publish_hostname.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index 6ec97ae2e7..c25c36bf42 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -210,10 +210,10 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: # # We may see temporary network unreachable warnings since we are bringing the network interface down # 2024-02-16T09:27:14.114569Z WARNING MonitorHandler ExtHandler Error in SendHostPluginHeartbeat: [HttpError] [HTTP Failed] GET http://168.63.129.16:32526/health -- IOError [Errno 101] Network is unreachable -- 1 attempts made --- [NOTE: Will not log the same error for the next hour] + # 2024-02-28T05:37:55.562065Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: 28de1093-ecb5-4515-ba8e-2ed0c7778e34 eTag: 4648629460326038775]: Request failed: [Errno 101] Network is unreachable # { - 'message': r"SendHostPluginHeartbeat:.*GET http://168.63.129.16:32526/health -- IOError [Errno 101] Network is unreachable", - 'if': lambda r: r.level == "WARNING" + 'message': r"GET (http://168.63.129.16:32526/health|vmSettings).*\[Errno 101\] Network is unreachable", } ] return ignore_rules From cc6c5efecd48cee2494ade55ac2d034a7732b1f6 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 28 Feb 2024 12:36:25 -0800 Subject: [PATCH 173/240] Fix osutil/default route_add to pass string array. (#3072) Co-authored-by: narrieta --- azurelinuxagent/common/osutil/default.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 26114a2827..dac85d8c18 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -1159,7 +1159,7 @@ def route_add(self, net, mask, gateway): # pylint: disable=W0613 Add specified route """ try: - cmd = ["ip", "route", "add", net, "via", gateway] + cmd = ["ip", "route", "add", str(net), "via", gateway] return shellutil.run_command(cmd) except CommandError: return "" From 8ac7e66b2773ba16a33abee41262ad0144c80cd3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 28 Feb 2024 18:55:49 -0800 Subject: [PATCH 174/240] Fix argument to GoalState.__init__ (#3073) Co-authored-by: narrieta --- azurelinuxagent/daemon/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/daemon/main.py b/azurelinuxagent/daemon/main.py index 342daf4ac9..3a3923a8ff 100644 --- a/azurelinuxagent/daemon/main.py +++ b/azurelinuxagent/daemon/main.py @@ -160,7 +160,7 @@ def daemon(self, child_args=None): # current values. protocol = self.protocol_util.get_protocol() - goal_state = GoalState(protocol, goal_state_properties=GoalStateProperties.SharedConfig) + goal_state = GoalState(protocol.client, goal_state_properties=GoalStateProperties.SharedConfig) setup_rdma_device(nd_version, goal_state.shared_conf) except Exception as e: From 6697ea8669723b19b3ad1bd35b0ada9ef280b0a3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 29 Feb 2024 15:20:55 -0800 Subject: [PATCH 175/240] Ignore network unreachable error in hostname test (#3074) * Ignore network unreachable error in hostname test --------- Co-authored-by: narrieta --- .github/workflows/ci_pr.yml | 3 ++- tests/ga/test_cgroupconfigurator.py | 6 +++--- tests_e2e/tests/publish_hostname/publish_hostname.py | 4 +++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 0c6b816846..2e1ac5ab47 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -136,6 +136,7 @@ jobs: # so we suppress "bad-option-value". # * 3.9 will produce "no-member" for several properties/methods that are added to the mocks used by the unit tests (e.g # "E1101: Instance of 'WireProtocol' has no 'aggregate_status' member") so we suppress that warning. + # * On 3.9 pylint crashes when parsing azurelinuxagent/daemon/main.py (see https://github.com/pylint-dev/pylint/issues/9473), so we ignore it. # * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.8 and is no longer used by default. It needs # to be suppressed for previous versions (3.0-3.7), though. # @@ -144,7 +145,7 @@ jobs: PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=bad-option-value" fi if [[ "${{ matrix.python-version }}" == "3.9" ]]; then - PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member" + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member --ignore=main.py" fi if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use" diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index b5a9e09941..0d46bae8a6 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -644,11 +644,11 @@ def mock_popen(command, *args, **kwargs): # Popen can accept both strings and lists, handle both here. if isinstance(command, str): - systemd_command = command.replace('systemd-run', 'systemd-run syntax_error') + command = command.replace('systemd-run', 'systemd-run syntax_error') elif isinstance(command, list) and command[0] == 'systemd-run': - systemd_command = ['systemd-run', 'syntax_error'] + command[1:] + command = ['systemd-run', 'syntax_error'] + command[1:] - return original_popen(systemd_command, *args, **kwargs) + return original_popen(command, *args, **kwargs) expected_output = "[stdout]\n{0}\n\n\n[stderr]\n" diff --git a/tests_e2e/tests/publish_hostname/publish_hostname.py b/tests_e2e/tests/publish_hostname/publish_hostname.py index c25c36bf42..19f7b10b46 100644 --- a/tests_e2e/tests/publish_hostname/publish_hostname.py +++ b/tests_e2e/tests/publish_hostname/publish_hostname.py @@ -209,11 +209,13 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: ignore_rules = [ # # We may see temporary network unreachable warnings since we are bringing the network interface down + # # 2024-02-16T09:27:14.114569Z WARNING MonitorHandler ExtHandler Error in SendHostPluginHeartbeat: [HttpError] [HTTP Failed] GET http://168.63.129.16:32526/health -- IOError [Errno 101] Network is unreachable -- 1 attempts made --- [NOTE: Will not log the same error for the next hour] # 2024-02-28T05:37:55.562065Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] GET vmSettings [correlation ID: 28de1093-ecb5-4515-ba8e-2ed0c7778e34 eTag: 4648629460326038775]: Request failed: [Errno 101] Network is unreachable + # 2024-02-29T09:30:40.702293Z ERROR ExtHandler ExtHandler Error fetching the goal state: [ProtocolError] [Wireserver Exception] [HttpError] [HTTP Failed] GET http://168.63.129.16/machine/ -- IOError [Errno 101] Network is unreachable -- 6 attempts made # { - 'message': r"GET (http://168.63.129.16:32526/health|vmSettings).*\[Errno 101\] Network is unreachable", + 'message': r"GET (http://168.63.129.16:32526/health|vmSettings|http://168.63.129.16/machine).*\[Errno 101\] Network is unreachable", } ] return ignore_rules From dd6c465aded36435a3095fb2a2e004888a70f5bc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 1 Mar 2024 11:24:30 -0800 Subject: [PATCH 176/240] Add lock around access to fast_track.json (#3076) Co-authored-by: narrieta --- azurelinuxagent/common/protocol/hostplugin.py | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 0aaff2184d..cdc0219ae2 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -20,6 +20,7 @@ import datetime import json import os.path +import threading import uuid from azurelinuxagent.common import logger, conf @@ -423,19 +424,24 @@ def _get_fast_track_state_file(): # This file keeps the timestamp of the most recent goal state if it was retrieved via Fast Track return os.path.join(conf.get_lib_dir(), "fast_track.json") + # Multiple threads create instances of HostPluginProtocol; we use this lock to protect access to the state file for Fast Track + _fast_track_state_lock = threading.RLock() + @staticmethod def _save_fast_track_state(timestamp): try: - with open(HostPluginProtocol._get_fast_track_state_file(), "w") as file_: - json.dump({"timestamp": timestamp}, file_) + with HostPluginProtocol._fast_track_state_lock: + with open(HostPluginProtocol._get_fast_track_state_file(), "w") as file_: + json.dump({"timestamp": timestamp}, file_) except Exception as e: logger.warn("Error updating the Fast Track state ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(), ustr(e)) @staticmethod def clear_fast_track_state(): try: - if os.path.exists(HostPluginProtocol._get_fast_track_state_file()): - os.remove(HostPluginProtocol._get_fast_track_state_file()) + with HostPluginProtocol._fast_track_state_lock: + if os.path.exists(HostPluginProtocol._get_fast_track_state_file()): + os.remove(HostPluginProtocol._get_fast_track_state_file()) except Exception as e: logger.warn("Error clearing the current state for Fast Track ({0}): {1}", HostPluginProtocol._get_fast_track_state_file(), ustr(e)) @@ -446,16 +452,17 @@ def get_fast_track_timestamp(): Returns the timestamp of the most recent FastTrack goal state retrieved by fetch_vm_settings(), or None if the most recent goal state was Fabric or fetch_vm_settings() has not been invoked. """ - if not os.path.exists(HostPluginProtocol._get_fast_track_state_file()): - return timeutil.create_timestamp(datetime.datetime.min) + with HostPluginProtocol._fast_track_state_lock: + if not os.path.exists(HostPluginProtocol._get_fast_track_state_file()): + return timeutil.create_timestamp(datetime.datetime.min) - try: - with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_: - return json.load(file_)["timestamp"] - except Exception as e: - logger.warn("Can't retrieve the timestamp for the most recent Fast Track goal state ({0}), will assume the current time. Error: {1}", - HostPluginProtocol._get_fast_track_state_file(), ustr(e)) - return timeutil.create_timestamp(datetime.datetime.utcnow()) + try: + with open(HostPluginProtocol._get_fast_track_state_file(), "r") as file_: + return json.load(file_)["timestamp"] + except Exception as e: + logger.warn("Can't retrieve the timestamp for the most recent Fast Track goal state ({0}), will assume the current time. Error: {1}", + HostPluginProtocol._get_fast_track_state_file(), ustr(e)) + return timeutil.create_timestamp(datetime.datetime.utcnow()) def fetch_vm_settings(self, force_update=False): """ From cc94d46faaddf434837aa1bf5b9936f32067618c Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 1 Mar 2024 12:47:14 -0800 Subject: [PATCH 177/240] added retries for agent cgroups test (#3075) * retries for agent cgroups test * pylint warn * addressed comment --- tests_e2e/tests/lib/cgroup_helpers.py | 37 +++++++++++-------- .../agent_cgroups-check_cgroups_agent.py | 24 +++++++----- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index 6da2865c21..5c552ef19e 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -8,6 +8,7 @@ from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false BASE_CGROUP = '/sys/fs/cgroup' AGENT_CGROUP_NAME = 'WALinuxAgent' @@ -93,23 +94,27 @@ def verify_agent_cgroup_assigned_correctly(): This method checks agent is running and assigned to the correct cgroup using service status output """ log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd") - service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) - log.info("Agent service status output:\n%s", service_status) - is_active = False - is_cgroup_assigned = False cgroup_mount_path = get_agent_cgroup_mount_path() - is_active_pattern = re.compile(r".*Active:\s+active.*") - - for line in service_status.splitlines(): - if re.match(is_active_pattern, line): - is_active = True - elif cgroup_mount_path in line: - is_cgroup_assigned = True - - if not is_active: - fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status)) - if not is_cgroup_assigned: - fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path)) + service_status = "" + + def check_agent_service_cgroup(): + is_active = False + is_cgroup_assigned = False + service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) + log.info("Agent service status output:\n%s", service_status) + is_active_pattern = re.compile(r".*Active:\s+active.*") + + for line in service_status.splitlines(): + if re.match(is_active_pattern, line): + is_active = True + elif cgroup_mount_path in line: + is_cgroup_assigned = True + + return is_active and is_cgroup_assigned + + # Test check can happen before correct cgroup assigned and relfected in service status. So, retrying the check for few times + if not retry_if_false(check_agent_service_cgroup): + fail('walinuxagent service was not assigned to the expected cgroup:{0}. Current agent status:{1}'.format(cgroup_mount_path, service_status)) log.info("Successfully verified the agent cgroup assigned correctly by systemd\n") diff --git a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py index 064f304007..4f6444462c 100755 --- a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py +++ b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py @@ -61,22 +61,26 @@ def verify_agent_cgroup_created_on_file_system(): """ log.info("===== Verifying the agent cgroup paths exist on file system") agent_cgroup_mount_path = get_agent_cgroup_mount_path() - all_agent_cgroup_controllers_path_exist = True + log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) + missing_agent_cgroup_controllers_path = [] verified_agent_cgroup_controllers_path = [] - log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) + def is_agent_cgroup_controllers_path_exist(): + all_controllers_path_exist = True - for controller in AGENT_CONTROLLERS: - agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) + for controller in AGENT_CONTROLLERS: + agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) - if not os.path.exists(agent_controller_path): - all_agent_cgroup_controllers_path_exist = False - missing_agent_cgroup_controllers_path.append(agent_controller_path) - else: - verified_agent_cgroup_controllers_path.append(agent_controller_path) + if not os.path.exists(agent_controller_path): + all_controllers_path_exist = False + missing_agent_cgroup_controllers_path.append(agent_controller_path) + else: + verified_agent_cgroup_controllers_path.append(agent_controller_path) + return all_controllers_path_exist - if not all_agent_cgroup_controllers_path_exist: + # Test check can happen before agent setup cgroup configuration. So, retrying the check for few times + if not retry_if_false(is_agent_cgroup_controllers_path_exist): fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path)) log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path)) From f947ddeaadd5081f9f3fcd0ba6c33a5da638a4c2 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:30:01 -0800 Subject: [PATCH 178/240] cron job script (#3077) --- .../agent_persist_firewall-access_wireserver | 33 ++++++++++++++++++- ...ent_persist_firewall-check_connectivity.py | 30 +++++++++++++++++ .../scripts/agent_persist_firewall-test_setup | 4 +-- 3 files changed, 64 insertions(+), 3 deletions(-) create mode 100755 tests_e2e/tests/scripts/agent_persist_firewall-check_connectivity.py diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver b/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver index c38e0a5706..e4afc406a5 100755 --- a/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver +++ b/tests_e2e/tests/scripts/agent_persist_firewall-access_wireserver @@ -19,6 +19,11 @@ # Helper script which tries to access Wireserver on system reboot. Also prints out iptable rules if non-root and still # able to access Wireserver +if [[ $# -ne 1 ]]; then + echo "Usage: agent_persist_firewall-access_wireserver " + exit 1 +fi +TEST_USER=$1 USER=$(whoami) echo "$(date --utc +%FT%T.%3NZ): Running as user: $USER" @@ -27,12 +32,25 @@ function check_online ping 8.8.8.8 -c 1 -i .2 -t 30 > /dev/null 2>&1 && echo 0 || echo 1 } +function ping_localhost +{ + ping 127.0.0.1 -c 1 -i .2 -t 30 > /dev/null 2>&1 && echo 0 || echo 1 +} + +function socket_connection +{ + output=$(python3 /home/"$TEST_USER"/bin/agent_persist_firewall-check_connectivity.py 2>&1) + echo $output +} + # Check more, sleep less MAX_CHECKS=10 # Initial starting value for checks CHECKS=0 IS_ONLINE=$(check_online) +echo "Checking network connectivity..." +echo "Running ping to 8.8.8.8 option" # Loop while we're not online. while [ "$IS_ONLINE" -eq 1 ]; do @@ -48,6 +66,19 @@ while [ "$IS_ONLINE" -eq 1 ]; do done +# logging other options output to compare and evaluate which option is more stable when ping to 8.8.8.8 failed +if [ "$IS_ONLINE" -eq 1 ]; then + echo "Checking other options to see if network is accessible" + echo "Running ping to localhost option" + PING_LOCAL=$(ping_localhost) + if [ "$PING_LOCAL" -eq 1 ]; then + echo "Ping to localhost failed" + else + echo "Ping to localhost succeeded" + fi + echo "Running socket connection to wireserver:53 option" + socket_connection +fi if [ "$IS_ONLINE" -eq 1 ]; then # We will never be able to get online. Kill script. echo "Unable to connect to network, exiting now" @@ -60,7 +91,7 @@ echo "Trying to contact Wireserver as $USER to see if accessible" echo "" echo "IPTables before accessing Wireserver" -sudo iptables -t security -L -nxv +sudo iptables -t security -L -nxv -w echo "" WIRE_IP=$(cat /var/lib/waagent/WireServerEndpoint 2>/dev/null || echo '168.63.129.16' | tr -d '[:space:]') diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-check_connectivity.py b/tests_e2e/tests/scripts/agent_persist_firewall-check_connectivity.py new file mode 100755 index 0000000000..523109dc49 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_persist_firewall-check_connectivity.py @@ -0,0 +1,30 @@ +import socket +import sys + +WIRESERVER_ENDPOINT_FILE = '/var/lib/waagent/WireServerEndpoint' +WIRESERVER_IP = '168.63.129.16' + + +def get_wireserver_ip() -> str: + try: + with open(WIRESERVER_ENDPOINT_FILE, 'r') as f: + wireserver_ip = f.read() + except Exception: + wireserver_ip = WIRESERVER_IP + return wireserver_ip + + +def main(): + try: + wireserver_ip = get_wireserver_ip() + socket.setdefaulttimeout(3) + socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((wireserver_ip, 53)) + + print('Socket connection to wire server:53 success') + except: # pylint: disable=W0702 + print('Socket connection to wire server:53 failed') + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_persist_firewall-test_setup b/tests_e2e/tests/scripts/agent_persist_firewall-test_setup index a157e58cbe..2784158a4b 100755 --- a/tests_e2e/tests/scripts/agent_persist_firewall-test_setup +++ b/tests_e2e/tests/scripts/agent_persist_firewall-test_setup @@ -25,6 +25,6 @@ if [[ $# -ne 1 ]]; then exit 1 fi -echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver > /tmp/reboot-cron-root.log 2>&1" | crontab -u root - -echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver > /tmp/reboot-cron-$1.log 2>&1" | crontab -u $1 - +echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver $1 > /tmp/reboot-cron-root.log 2>&1" | crontab -u root - +echo "@reboot /home/$1/bin/agent_persist_firewall-access_wireserver $1 > /tmp/reboot-cron-$1.log 2>&1" | crontab -u $1 - update-waagent-conf OS.EnableFirewall=y \ No newline at end of file From 68b77af280837cfff82ef5282687a2b60bd7da83 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 6 Mar 2024 10:22:42 -0800 Subject: [PATCH 179/240] Fix mock for cgroup unit test (#3079) * Fix mock for cgroup unit test --------- Co-authored-by: narrieta --- tests/common/osutil/test_default.py | 2 +- tests/common/test_event.py | 2 +- tests/ga/test_cgroupconfigurator.py | 7 +++++-- tests/ga/test_update.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 79a802c7b1..0814d112f1 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -964,7 +964,7 @@ def test_remove_firewall_should_not_retry_invalid_rule(self): self.assertFalse(osutil._enable_firewall) - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") def test_get_nic_state(self): state = osutil.DefaultOSUtil().get_nic_state() self.assertNotEqual(state, {}) diff --git a/tests/common/test_event.py b/tests/common/test_event.py index a760c7f9f9..51eb15726d 100644 --- a/tests/common/test_event.py +++ b/tests/common/test_event.py @@ -415,7 +415,7 @@ def test_collect_events_should_be_able_to_process_events_with_non_ascii_characte self.assertEqual(len(event_list), 1) self.assertEqual(TestEvent._get_event_message(event_list[0]), u'World\u05e2\u05d9\u05d5\u05ea \u05d0\u05d7\u05e8\u05d5\u05ea\u0906\u091c') - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") def test_collect_events_should_ignore_invalid_event_files(self): self._create_test_event_file("custom_script_1.tld") # a valid event self._create_test_event_file("custom_script_utf-16.tld") diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 0d46bae8a6..63c6ee90bd 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -526,7 +526,7 @@ def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_d self.assertEqual(len(CGroupsTelemetry._tracked), 0, "No cgroups should have been created") - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") @attr('requires_sudo') @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_not_use_fallback_option_if_extension_fails(self, *args): @@ -564,7 +564,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_fai # wasn't truncated. self.assertIn("Running scope as unit", ustr(context_manager.exception)) - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") @attr('requires_sudo') @patch('time.sleep', side_effect=lambda _: mock_sleep()) @patch("azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN", 5) @@ -632,6 +632,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_tim self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginHandlerScriptTimedout) self.assertIn("Timeout", ustr(context_manager.exception)) + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_capture_only_the_last_subprocess_output(self, _): with self._get_cgroup_configurator() as configurator: @@ -647,6 +648,8 @@ def mock_popen(command, *args, **kwargs): command = command.replace('systemd-run', 'systemd-run syntax_error') elif isinstance(command, list) and command[0] == 'systemd-run': command = ['systemd-run', 'syntax_error'] + command[1:] + elif command == ['systemctl', 'daemon-reload']: + command = ['echo', 'systemctl', 'daemon-reload'] return original_popen(command, *args, **kwargs) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 6caa21f3c8..c257cefed0 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1037,7 +1037,7 @@ def _mock_popen(cmd, *args, **kwargs): "Not setting up persistent firewall rules as OS.EnableFirewall=False" == args[0] for (args, _) in patch_info.call_args_list), "Info not logged properly, got: {0}".format(patch_info.call_args_list)) - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") def test_it_should_setup_persistent_firewall_rules_on_startup(self): iterations = 1 executed_commands = [] From 3e4fb9a8eebf960189218bf1dbbebd6d9865875c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 6 Mar 2024 10:41:09 -0800 Subject: [PATCH 180/240] Add DistroVersion class to compare distro versions (#3078) * Add DistroVersion class to compare distro versions * comment * python 2 --------- Co-authored-by: narrieta --- azurelinuxagent/common/future.py | 10 - azurelinuxagent/common/osutil/factory.py | 26 +- .../common/utils/distro_version.py | 115 ++ .../common/utils/flexible_version.py | 11 +- azurelinuxagent/distutils/__init__.py | 0 azurelinuxagent/distutils/version.py | 236 --- azurelinuxagent/ga/exthandlers.py | 3 +- azurelinuxagent/pa/deprovision/factory.py | 4 +- azurelinuxagent/pa/rdma/factory.py | 4 +- azurelinuxagent/pa/rdma/suse.py | 5 +- tests/common/utils/test_distro_version.py | 128 ++ tests/common/utils/test_text_util.py | 18 - tests/data/distro_versions.txt | 1501 +++++++++++++++++ 13 files changed, 1769 insertions(+), 292 deletions(-) create mode 100644 azurelinuxagent/common/utils/distro_version.py delete mode 100644 azurelinuxagent/distutils/__init__.py delete mode 100644 azurelinuxagent/distutils/version.py create mode 100644 tests/common/utils/test_distro_version.py create mode 100644 tests/data/distro_versions.txt diff --git a/azurelinuxagent/common/future.py b/azurelinuxagent/common/future.py index e9bb7e9339..bb914775ab 100644 --- a/azurelinuxagent/common/future.py +++ b/azurelinuxagent/common/future.py @@ -68,16 +68,6 @@ else: raise ImportError("Unknown python version: {0}".format(sys.version_info)) -# -# distutils has been removed from Python >= 3.12; use the copy from azurelinuxagent instead -# -if sys.version_info[0] == 3 and sys.version_info[1] >= 12: - from azurelinuxagent.distutils import version -else: - from distutils import version # pylint: disable=deprecated-module -Version = version.Version -LooseVersion = version.LooseVersion - def get_linux_distribution(get_full_name, supported_dists): """Abstract platform.linux_distribution() call which is deprecated as of diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 25a6060ada..58afd0af11 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -16,10 +16,9 @@ # -from azurelinuxagent.common.future import LooseVersion as Version - import azurelinuxagent.common.logger as logger from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_CODE_NAME, DISTRO_VERSION, DISTRO_FULL_NAME +from azurelinuxagent.common.utils.distro_version import DistroVersion from .alpine import AlpineOSUtil from .arch import ArchUtil from .bigip import BigIpOSUtil @@ -66,14 +65,14 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) return ClearLinuxUtil() if distro_name == "ubuntu": - ubuntu_version = Version(distro_version) - if ubuntu_version in [Version("12.04"), Version("12.10")]: + ubuntu_version = DistroVersion(distro_version) + if ubuntu_version in [DistroVersion("12.04"), DistroVersion("12.10")]: return Ubuntu12OSUtil() - if ubuntu_version in [Version("14.04"), Version("14.10")]: + if ubuntu_version in [DistroVersion("14.04"), DistroVersion("14.10")]: return Ubuntu14OSUtil() - if ubuntu_version in [Version('16.04'), Version('16.10'), Version('17.04')]: + if ubuntu_version in [DistroVersion('16.04'), DistroVersion('16.10'), DistroVersion('17.04')]: return Ubuntu16OSUtil() - if Version('18.04') <= ubuntu_version <= Version('24.04'): + if DistroVersion('18.04') <= ubuntu_version <= DistroVersion('24.04'): return Ubuntu18OSUtil() if distro_full_name == "Snappy Ubuntu Core": return UbuntuSnappyOSUtil() @@ -91,14 +90,14 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) if distro_name in ("suse", "sle-micro", "sle_hpc", "sles", "opensuse"): if distro_full_name == 'SUSE Linux Enterprise Server' \ - and Version(distro_version) < Version('12') \ - or distro_full_name == 'openSUSE' and Version(distro_version) < Version('13.2'): + and DistroVersion(distro_version) < DistroVersion('12') \ + or distro_full_name == 'openSUSE' and DistroVersion(distro_version) < DistroVersion('13.2'): return SUSE11OSUtil() return SUSEOSUtil() if distro_name == "debian": - if "sid" in distro_version or Version(distro_version) > Version("7"): + if "sid" in distro_version or DistroVersion(distro_version) > DistroVersion("7"): return DebianOSModernUtil() return DebianOSBaseUtil() @@ -109,16 +108,15 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) # to distinguish between debian and devuan. The new distro.linux_distribution module # is able to distinguish between the two. - if distro_name == "devuan" and Version(distro_version) >= Version("4"): + if distro_name == "devuan" and DistroVersion(distro_version) >= DistroVersion("4"): return DevuanOSUtil() - if distro_name in ("redhat", "rhel", "centos", "oracle", "almalinux", "cloudlinux", "rocky"): - if Version(distro_version) < Version("7"): + if DistroVersion(distro_version) < DistroVersion("7"): return Redhat6xOSUtil() - if Version(distro_version) >= Version("8.6"): + if DistroVersion(distro_version) >= DistroVersion("8.6"): return RedhatOSModernUtil() return RedhatOSUtil() diff --git a/azurelinuxagent/common/utils/distro_version.py b/azurelinuxagent/common/utils/distro_version.py new file mode 100644 index 0000000000..8a447f6b21 --- /dev/null +++ b/azurelinuxagent/common/utils/distro_version.py @@ -0,0 +1,115 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2020 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ +# + +""" +""" + +import re + + +class DistroVersion(object): + """ + Distro versions (as exposed by azurelinuxagent.common.version.DISTRO_VERSION) can be very arbitrary: + + 9.2.0 + 0.0.0.0_99496 + 10.0_RC2 + 1.4-rolling-202402090309 + 2015.11-git + 2023 + 2023.02.1 + 2.1-systemd-rc1 + 2308a + 3.11.2-dev20240212t1512utc-autotag + 3.11.2-rc.1 + 3.1.22-1.8 + 8.1.3-p1-24838 + 8.1.3-p8-khilan.unadkat-08415223c9a99546b566df0dbc683ffa378cfd77 + 9.13.1P8X1 + 9.13.1RC1 + 9.2.0-beta1-25971 + a + ArrayOS + bookworm/sid + Clawhammer__9.14.0 + FFFF + h + JNPR-11.0-20200922.4042921_build + lighthouse-23.10.0 + Lighthouse__9.13.1 + linux-os-31700 + Mightysquirrel__9.15.0 + n/a + NAME="SLES" + ngfw-6.10.13.26655.fips.2 + r11427-9ce6aa9d8d + SonicOSX 7.1.1-7047-R3003-HF24239 + unstable + vsbc-x86_pi3-6.10.3 + vsbc-x86_pi3-6.12.2pre02 + + The DistroVersion allows to compare these versions following an strategy similar to the now deprecated distutils.LooseVersion: + versions consist of a series of sequences of numbers, alphabetic characters, or any other characters, optionally separated dots + (the dots themselves are stripped out). When comparing versions the numeric components are compared numerically, while the + other components are compared lexicographically. + + NOTE: For entities with simpler version schemes (e.g. extensions and the Agent), use FlexibleVersion. + + """ + def __init__(self, version): + self._version = version + self._fragments = [ + int(x) if DistroVersion._number_re.match(x) else x + for x in DistroVersion._fragment_re.split(self._version) if x != '' and x != '.' + ] + + _fragment_re = re.compile(r'(\d+|[a-z]+|\.)', re.IGNORECASE) + + _number_re = re.compile(r'\d+') + + def __str__(self): + return self._version + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return self._compare(other) == 0 + + def __lt__(self, other): + return self._compare(other) < 0 + + def __le__(self, other): + return self._compare(other) <= 0 + + def __gt__(self, other): + return self._compare(other) > 0 + + def __ge__(self, other): + return self._compare(other) >= 0 + + def _compare(self, other): + if isinstance(other, str): + other = DistroVersion(other) + + if self._fragments < other._fragments: + return -1 + if self._fragments > other._fragments: + return 1 + return 0 diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py index 633fe771a1..83762eb850 100644 --- a/azurelinuxagent/common/utils/flexible_version.py +++ b/azurelinuxagent/common/utils/flexible_version.py @@ -17,13 +17,16 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from azurelinuxagent.common.future import Version import re -class FlexibleVersion(Version): +class FlexibleVersion(object): """ - A more flexible implementation of distutils.version.StrictVersion + A more flexible implementation of distutils.version.StrictVersion. + + NOTE: Use this class for generic version comparisons, e.g. extension and Agent + versions. Distro versions can be very arbitrary and should be handled + using the DistroVersion class. The implementation allows to specify: - an arbitrary number of version numbers: @@ -41,8 +44,6 @@ class FlexibleVersion(Version): """ def __init__(self, vstring=None, sep='.', prerel_tags=('alpha', 'beta', 'rc')): - Version.__init__(self) - if sep is None: sep = '.' if prerel_tags is None: diff --git a/azurelinuxagent/distutils/__init__.py b/azurelinuxagent/distutils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/azurelinuxagent/distutils/version.py b/azurelinuxagent/distutils/version.py deleted file mode 100644 index acb8b0a5df..0000000000 --- a/azurelinuxagent/distutils/version.py +++ /dev/null @@ -1,236 +0,0 @@ -# -# A copy of distutils/version.py as Python 3.8 (minus the StrictVersion class) -# -# Implements multiple version numbering conventions for the -# Python Module Distribution Utilities. -# -# $Id$ -# - -"""Provides classes to represent module version numbers (one class for -each style of version numbering). There are currently two such classes -implemented: StrictVersion and LooseVersion. - -Every version number class implements the following interface: - * the 'parse' method takes a string and parses it to some internal - representation; if the string is an invalid version number, - 'parse' raises a ValueError exception - * the class constructor takes an optional string argument which, - if supplied, is passed to 'parse' - * __str__ reconstructs the string that was passed to 'parse' (or - an equivalent string -- ie. one that will generate an equivalent - version number instance) - * __repr__ generates Python code to recreate the version number instance - * _cmp compares the current instance with either another instance - of the same class or a string (which will be parsed to an instance - of the same class, thus must follow the same rules) -""" - -import re - -# E1101: Instance of 'Version' has no '_cmp' member (no-member) -# pylint: disable=no-member - - -class Version: - """Abstract base class for version numbering classes. Just provides - constructor (__init__) and reproducer (__repr__), because those - seem to be the same for all version numbering classes; and route - rich comparisons to _cmp. - """ - - def __init__(self, vstring=None): - if vstring: - self.parse(vstring) - - def __repr__(self): - return "%s ('%s')" % (self.__class__.__name__, str(self)) - - def __eq__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c == 0 - - def __lt__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c < 0 - - def __le__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c <= 0 - - def __gt__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c > 0 - - def __ge__(self, other): - c = self._cmp(other) - if c is NotImplemented: - return c - return c >= 0 - - -# Interface for version-number classes -- must be implemented -# by the following classes (the concrete ones -- Version should -# be treated as an abstract class). -# __init__ (string) - create and take same action as 'parse' -# (string parameter is optional) -# parse (string) - convert a string representation to whatever -# internal representation is appropriate for -# this style of version numbering -# __str__ (self) - convert back to a string; should be very similar -# (if not identical to) the string supplied to parse -# __repr__ (self) - generate Python code to recreate -# the instance -# _cmp (self, other) - compare two version numbers ('other' may -# be an unparsed version string, or another -# instance of your version class) - - -# The rules according to Greg Stein: -# 1) a version number has 1 or more numbers separated by a period or by -# sequences of letters. If only periods, then these are compared -# left-to-right to determine an ordering. -# 2) sequences of letters are part of the tuple for comparison and are -# compared lexicographically -# 3) recognize the numeric components may have leading zeroes -# -# The LooseVersion class below implements these rules: a version number -# string is split up into a tuple of integer and string components, and -# comparison is a simple tuple comparison. This means that version -# numbers behave in a predictable and obvious way, but a way that might -# not necessarily be how people *want* version numbers to behave. There -# wouldn't be a problem if people could stick to purely numeric version -# numbers: just split on period and compare the numbers as tuples. -# However, people insist on putting letters into their version numbers; -# the most common purpose seems to be: -# - indicating a "pre-release" version -# ('alpha', 'beta', 'a', 'b', 'pre', 'p') -# - indicating a post-release patch ('p', 'pl', 'patch') -# but of course this can't cover all version number schemes, and there's -# no way to know what a programmer means without asking him. -# -# The problem is what to do with letters (and other non-numeric -# characters) in a version number. The current implementation does the -# obvious and predictable thing: keep them as strings and compare -# lexically within a tuple comparison. This has the desired effect if -# an appended letter sequence implies something "post-release": -# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002". -# -# However, if letters in a version number imply a pre-release version, -# the "obvious" thing isn't correct. Eg. you would expect that -# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison -# implemented here, this just isn't so. -# -# Two possible solutions come to mind. The first is to tie the -# comparison algorithm to a particular set of semantic rules, as has -# been done in the StrictVersion class above. This works great as long -# as everyone can go along with bondage and discipline. Hopefully a -# (large) subset of Python module programmers will agree that the -# particular flavour of bondage and discipline provided by StrictVersion -# provides enough benefit to be worth using, and will submit their -# version numbering scheme to its domination. The free-thinking -# anarchists in the lot will never give in, though, and something needs -# to be done to accommodate them. -# -# Perhaps a "moderately strict" version class could be implemented that -# lets almost anything slide (syntactically), and makes some heuristic -# assumptions about non-digits in version number strings. This could -# sink into special-case-hell, though; if I was as talented and -# idiosyncratic as Larry Wall, I'd go ahead and implement a class that -# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is -# just as happy dealing with things like "2g6" and "1.13++". I don't -# think I'm smart enough to do it right though. -# -# In any case, I've coded the test suite for this module (see -# ../test/test_version.py) specifically to fail on things like comparing -# "1.2a2" and "1.2". That's not because the *code* is doing anything -# wrong, it's because the simple, obvious design doesn't match my -# complicated, hairy expectations for real-world version numbers. It -# would be a snap to fix the test suite to say, "Yep, LooseVersion does -# the Right Thing" (ie. the code matches the conception). But I'd rather -# have a conception that matches common notions about version numbers. - -# W0231: __init__ method from base class 'Version' is not called (super-init-not-called) -# R1710: Either all return statements in a function should return an expression, or none of them should. (inconsistent-return-statements) -# pylint: disable=super-init-not-called, inconsistent-return-statements - -class LooseVersion (Version): - - """Version numbering for anarchists and software realists. - Implements the standard interface for version number classes as - described above. A version number consists of a series of numbers, - separated by either periods or strings of letters. When comparing - version numbers, the numeric components will be compared - numerically, and the alphabetic components lexically. The following - are all valid version numbers, in no particular order: - - 1.5.1 - 1.5.2b2 - 161 - 3.10a - 8.02 - 3.4j - 1996.07.12 - 3.2.pl0 - 3.1.1.6 - 2g6 - 11g - 0.960923 - 2.2beta29 - 1.13++ - 5.5.kw - 2.0b1pl0 - - In fact, there is no such thing as an invalid version number under - this scheme; the rules for comparison are simple and predictable, - but may not always give the results you want (for some definition - of "want"). - """ - - component_re = re.compile(r'(\d+|[a-z]+|\.)', re.VERBOSE) - - def __init__(self, vstring=None): - if vstring: - self.parse(vstring) - - def parse(self, vstring): - # I've given up on thinking I can reconstruct the version string - # from the parsed tuple -- so I just store the string here for - # use by __str__ - self.vstring = vstring - components = [x for x in self.component_re.split(vstring) if x and x != '.'] - for i, obj in enumerate(components): - try: - components[i] = int(obj) - except ValueError: - pass - - self.version = components - - def __str__(self): - return self.vstring - - def __repr__(self): - return "LooseVersion ('%s')" % str(self) - - def _cmp(self, other): - if isinstance(other, str): - other = LooseVersion(other) - - if self.version == other.version: - return 0 - if self.version < other.version: - return -1 - if self.version > other.version: - return 1 - - -# end class LooseVersion diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 564128fa9a..3499b706c4 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -27,7 +27,6 @@ import tempfile import time import zipfile -from azurelinuxagent.common.future import LooseVersion as Version from collections import defaultdict from functools import partial @@ -2250,7 +2249,7 @@ def get_resource_limits(self, extension_name, str_version): This is not effective after nov 30th. """ if ExtHandlerInstance.is_azuremonitorlinuxagent(extension_name): - if Version(str_version) < Version("1.12"): + if FlexibleVersion(str_version) < FlexibleVersion("1.12"): test_man = { "resourceLimits": { "services": [ diff --git a/azurelinuxagent/pa/deprovision/factory.py b/azurelinuxagent/pa/deprovision/factory.py index 321c982ca7..6da78a2d28 100644 --- a/azurelinuxagent/pa/deprovision/factory.py +++ b/azurelinuxagent/pa/deprovision/factory.py @@ -15,9 +15,9 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from azurelinuxagent.common.future import LooseVersion as Version from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION, DISTRO_FULL_NAME +from azurelinuxagent.common.utils.distro_version import DistroVersion from .arch import ArchDeprovisionHandler from .clearlinux import ClearLinuxDeprovisionHandler from .coreos import CoreOSDeprovisionHandler @@ -31,7 +31,7 @@ def get_deprovision_handler(distro_name=DISTRO_NAME, if distro_name == "arch": return ArchDeprovisionHandler() if distro_name == "ubuntu": - if Version(distro_version) >= Version('18.04'): + if DistroVersion(distro_version) >= DistroVersion('18.04'): return Ubuntu1804DeprovisionHandler() else: return UbuntuDeprovisionHandler() diff --git a/azurelinuxagent/pa/rdma/factory.py b/azurelinuxagent/pa/rdma/factory.py index ac0dd7638a..34034818d8 100644 --- a/azurelinuxagent/pa/rdma/factory.py +++ b/azurelinuxagent/pa/rdma/factory.py @@ -15,10 +15,10 @@ # Requires Python 2.6+ and Openssl 1.0+ # -from azurelinuxagent.common.future import LooseVersion as Version import azurelinuxagent.common.logger as logger from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_FULL_NAME, DISTRO_VERSION +from azurelinuxagent.common.utils.distro_version import DistroVersion from .centos import CentOSRDMAHandler from .suse import SUSERDMAHandler from .ubuntu import UbuntuRDMAHandler @@ -33,7 +33,7 @@ def get_rdma_handler( (distro_full_name == 'SUSE Linux Enterprise Server' or distro_full_name == 'SLES' or distro_full_name == 'SLE_HPC') and - Version(distro_version) > Version('11') + DistroVersion(distro_version) > DistroVersion('11') ): return SUSERDMAHandler() diff --git a/azurelinuxagent/pa/rdma/suse.py b/azurelinuxagent/pa/rdma/suse.py index 4de6cf9538..347f3eeecb 100644 --- a/azurelinuxagent/pa/rdma/suse.py +++ b/azurelinuxagent/pa/rdma/suse.py @@ -23,8 +23,7 @@ from azurelinuxagent.pa.rdma.rdma import RDMAHandler from azurelinuxagent.common.version import DISTRO_VERSION - -from azurelinuxagent.common.future import LooseVersion as Version +from azurelinuxagent.common.utils.distro_version import DistroVersion class SUSERDMAHandler(RDMAHandler): @@ -32,7 +31,7 @@ class SUSERDMAHandler(RDMAHandler): def install_driver(self): # pylint: disable=R1710 """Install the appropriate driver package for the RDMA firmware""" - if Version(DISTRO_VERSION) >= Version('15'): + if DistroVersion(DISTRO_VERSION) >= DistroVersion('15'): msg = 'SLE 15 and later only supports PCI pass through, no ' msg += 'special driver needed for IB interface' logger.info(msg) diff --git a/tests/common/utils/test_distro_version.py b/tests/common/utils/test_distro_version.py new file mode 100644 index 0000000000..bc279377f8 --- /dev/null +++ b/tests/common/utils/test_distro_version.py @@ -0,0 +1,128 @@ +import os +import sys +import unittest + +from tests.lib.tools import AgentTestCase, data_dir + +from azurelinuxagent.common.utils.distro_version import DistroVersion +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion + + +class TestDistroVersion(AgentTestCase): + + def test_it_should_implement_all_comparison_operators(self): + self.assertTrue(DistroVersion("1.0.0") < DistroVersion("1.1.0")) + self.assertTrue(DistroVersion("1.0.0") <= DistroVersion("1.0.0")) + self.assertTrue(DistroVersion("1.0.0") <= DistroVersion("1.1.0")) + + self.assertTrue(DistroVersion("1.1.0") > DistroVersion("1.0.0")) + self.assertTrue(DistroVersion("1.1.0") >= DistroVersion("1.1.0")) + self.assertTrue(DistroVersion("1.1.0") >= DistroVersion("1.0.0")) + + self.assertTrue(DistroVersion("1.1.0") != DistroVersion("1.0.0")) + self.assertTrue(DistroVersion("1.1.0") == DistroVersion("1.1.0")) + + def test_it_should_compare_digit_sequences_numerically(self): + self.assertTrue(DistroVersion("2.0.0") < DistroVersion("10.0.0")) + self.assertTrue(DistroVersion("1.2.0") < DistroVersion("1.10.0")) + self.assertTrue(DistroVersion("1.0.2") < DistroVersion("1.0.10")) + self.assertTrue(DistroVersion("2.0.rc.2") < DistroVersion("2.0.rc.10")) + self.assertTrue(DistroVersion("2.0.rc2") < DistroVersion("2.0.rc10")) + + def test_it_should_compare_non_digit_sequences_lexicographically(self): + self.assertTrue(DistroVersion("2.0.alpha") < DistroVersion("2.0.beta")) + self.assertTrue(DistroVersion("2.0.alpha.2") < DistroVersion("2.0.beta.1")) + self.assertTrue(DistroVersion("alpha") < DistroVersion("beta")) + self.assertTrue(DistroVersion("<1.0.0>") < DistroVersion(">1.0.0>")) + + def test_it_should_parse_common_distro_versions(self): + """ + Test that DistroVersion can parse the versions given by azurelinuxagent.common.version.DISTRO_VERSION + (the values in distro_versions.txt are current values from telemetry.) + """ + data_file = os.path.join(data_dir, "distro_versions.txt") + + with open(data_file, "r") as f: + for line in f: + line = line.rstrip() + version = DistroVersion(line) + self.assertNotEqual([], version._fragments) + + self.assertEqual([], DistroVersion("")._fragments) + + def test_it_should_compare_commonly_used_versions(self): + """ + Test that DistroVersion does some common comparisons correctly. + """ + self.assertTrue(DistroVersion("1.0.0") < DistroVersion("2.0.0.")) + self.assertTrue(DistroVersion("1.0.0") < DistroVersion("1.1.0")) + self.assertTrue(DistroVersion("1.0.0") < DistroVersion("1.0.1")) + + self.assertTrue(DistroVersion("1.0.0") == DistroVersion("1.0.0")) + self.assertTrue(DistroVersion("1.0.0") != DistroVersion("2.0.0")) + + self.assertTrue(DistroVersion("13") != DistroVersion("13.0")) + self.assertTrue(DistroVersion("13") < DistroVersion("13.0")) + self.assertTrue(DistroVersion("13") < DistroVersion("13.1")) + + ubuntu_version = DistroVersion("16.10") + self.assertTrue(ubuntu_version in [DistroVersion('16.04'), DistroVersion('16.10'), DistroVersion('17.04')]) + + ubuntu_version = DistroVersion("20.10") + self.assertTrue(DistroVersion('18.04') <= ubuntu_version <= DistroVersion('24.04')) + + redhat_version = DistroVersion("7.9") + self.assertTrue(DistroVersion('7') <= redhat_version <= DistroVersion('9')) + + self.assertTrue(DistroVersion("1.0") < DistroVersion("1.1")) + self.assertTrue(DistroVersion("1.9") < DistroVersion("1.10")) + self.assertTrue(DistroVersion("1.9.9") < DistroVersion("1.10.0")) + self.assertTrue(DistroVersion("1.0.0.0") < DistroVersion("1.2.0.0")) + + self.assertTrue(DistroVersion("1.0") <= DistroVersion("1.1")) + self.assertTrue(DistroVersion("1.1") > DistroVersion("1.0")) + self.assertTrue(DistroVersion("1.1") >= DistroVersion("1.0")) + + self.assertTrue(DistroVersion("1.0") == DistroVersion("1.0")) + self.assertTrue(DistroVersion("1.0") >= DistroVersion("1.0")) + self.assertTrue(DistroVersion("1.0") <= DistroVersion("1.0")) + + def test_uncommon_versions(self): + """ + The comparisons in these tests may occur in prod, and they not always produce a result that makes sense. + More than expressing the desired behavior, these tests are meant to document the current behavior. + """ + self.assertTrue(DistroVersion("2") != DistroVersion("2.0")) + self.assertTrue(DistroVersion("2") < DistroVersion("2.0")) + + self.assertTrue(DistroVersion("10.0_RC2") != DistroVersion("10.0RC2")) + self.assertTrue(DistroVersion("10.0_RC2")._fragments == [10, 0, '_', 'RC', 2]) + self.assertTrue(DistroVersion("10.0RC2")._fragments == [10, 0, 'RC', 2]) + + self.assertTrue(DistroVersion("1.4-rolling") < DistroVersion("1.4-rolling-202402090309")) + + self.assertTrue(DistroVersion("2023") < DistroVersion("2023.02.1")) + + self.assertTrue(DistroVersion("2.1-systemd-alpha") < DistroVersion("2.1-systemd-rc")) + self.assertTrue(DistroVersion("2308a") < DistroVersion("2308beta")) + self.assertTrue(DistroVersion("6.0.0.beta4") < DistroVersion("6.0.0.beta5")) + self.assertTrue(DistroVersion("9.13.1P8X1") < DistroVersion("9.13.1RC1")) + self.assertTrue(DistroVersion("a") < DistroVersion("rc")) + self.assertTrue(DistroVersion("Clawhammer__9.14.0"), DistroVersion("Clawhammer__9.14.1")) + self.assertTrue(DistroVersion("FFFF") < DistroVersion("h")) + self.assertTrue(DistroVersion("None") < DistroVersion("n/a")) + + if sys.version_info[0] == 2: + self.assertTrue(DistroVersion("3.11.2-rc.1") < DistroVersion("3.11.2-rc.a")) + else: + # TypeError: '<' not supported between instances of 'int' and 'str' + with self.assertRaises(TypeError): + _ = DistroVersion("3.11.2-rc.1") < DistroVersion("3.11.2-rc.a") + + # AttributeError: 'FlexibleVersion' object has no attribute '_fragments' + with self.assertRaises(AttributeError): + _ = DistroVersion("1.0.0.0") == FlexibleVersion("1.0.0.0") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/common/utils/test_text_util.py b/tests/common/utils/test_text_util.py index a9f8a9df9f..531f03752f 100644 --- a/tests/common/utils/test_text_util.py +++ b/tests/common/utils/test_text_util.py @@ -17,7 +17,6 @@ import hashlib import unittest -from azurelinuxagent.common.future import LooseVersion as Version import azurelinuxagent.common.utils.textutil as textutil from azurelinuxagent.common.future import ustr @@ -68,23 +67,6 @@ def test_remove_bom(self): data = textutil.remove_bom(data) self.assertEqual(u" ", data) - def test_version_compare(self): - self.assertTrue(Version("1.0") < Version("1.1")) - self.assertTrue(Version("1.9") < Version("1.10")) - self.assertTrue(Version("1.9.9") < Version("1.10.0")) - self.assertTrue(Version("1.0.0.0") < Version("1.2.0.0")) - - self.assertTrue(Version("1.0") <= Version("1.1")) - self.assertTrue(Version("1.1") > Version("1.0")) - self.assertTrue(Version("1.1") >= Version("1.0")) - - self.assertTrue(Version("1.0") == Version("1.0")) - self.assertTrue(Version("1.0") >= Version("1.0")) - self.assertTrue(Version("1.0") <= Version("1.0")) - - self.assertTrue(Version("1.9") < "1.10") - self.assertTrue("1.9" < Version("1.10")) - def test_get_bytes_from_pem(self): content = ("-----BEGIN CERTIFICATE-----\n" "certificate\n" diff --git a/tests/data/distro_versions.txt b/tests/data/distro_versions.txt new file mode 100644 index 0000000000..aa0bb3cbd6 --- /dev/null +++ b/tests/data/distro_versions.txt @@ -0,0 +1,1501 @@ +# +0.0.0.0_99466 +0.0.0.0_99492 +0.0.0.0_99494 +0.0.0.0_99496 +0.0.0.0_99500 +0.0.0.0_99504 +0.0.0.0_99506 +0.0.0.0_99530 +0.0.0.0_99533 +0.0.0.0_99539 +0.0.0.0_99541 +0.0.0.0_99543 +0.0.0.0_99560 +0.0.0.0_99562 +0.0.0.0_99570 +0.0.0.0_99572 +0.0.0.0_99580 +0.0.0.0_99587 +0.0.0.0_99589 +0.0.0.0_99591 +0.0.0.0_99595 +0.0.0.0_99597 +0.0.0.0_99634 +0.0.0.0_99637 +0.0.0.0_99639 +0.0.0.0_99646 +0.0.0.0_99660 +0.0.0.0_99664 +0.0.0.0_99665 +0.0.0.0_99669 +0.0.0.0_99681 +0.0.0.0_99696 +0.0.0.0_99702 +0.0.0.0_99704 +0.0.0.0_99710 +0.0.0.0_99815 +0.0.0.0_99824 +0.0.0.0_99826 +0.0.0.0_99828 +0.0.0.0_99835 +0.0.0.0_99839 +0.0.0.0_99841 +0.10.1 +0.11.1 +0.12.1 +0.13.1 +0.14.1 +0.6.1 +0.6.2 +0.6.3 +0.8.1 +0.9.1 +0.999.0.0-1093544 +1.0 +10 +10.0.1.0 +10.0.2.0 +10.0.3.0 +10.0.3.1 +10.0.4.0 +10.0.5.0 +10.0.6.0 +10.0.7.0 +10.0_RC2 +10.1 +10.10 +10.11 +10.12 +10.13 +10.2 +1.0.20210807 +1.0.20210928 +1.0.20211027 +1.0.20211230 +1.0.20220122 +1.0.20220127 +1.0.20220307 +1.0.20220331 +1.0.20220504 +1.0.20220521 +1.0.20220608 +1.0.20220709 +1.0.20220805 +1.0.20220817 +1.0.20220909 +1.0.20220926 +1.0.20221007 +1.0.20221028 +1.0.20221119 +1.0.20221202 +1.0.20221220 +1.0.20230106 +1.0.20230123 +1.0.20230208 +1.0.20230225 +1.0.20230308 +1.0.20230330 +1.0.20230414 +1.0.20230427 +1.0.20230518 +1.0.20230607 +1.0.20230615 +1.0.20230713 +1.0.20230811 +10.3 +10.4 +10.5 +10.6 +1063 +1069 +10.7 +10.8 +1084 +1086 +10.9 +11 +11.0.108.0 +11.0.93.0 +11.0.96.0 +11.1 +11.2 +11.3 +11.33 +11.4 +11.7 +11.8 +11.9 +11-updates +12 +12.0 +12.04 +12.1 +12.10 +12.10.1 +12.10.2 +12.2 +12.3 +12.4 +12.5 +12.7.2 +1.27.5 +12.8 +12.8.2 +12.9 +12.9.2 +12.9.3 +12.9.4 +12-updates +13 +13.0 +13.1 +13.10 +13.2 +13.3 +1353.7.0 +14.0 +14.04 +14.1 +14.1.0.10 +14.10.1.10 +14.10.1.11 +14.11.1.10 +14.12.1.10 +14.12.1.11 +14.13.1.10 +14.14.1.10 +14.15.1.10 +14.16.1.10 +14.2 +14.2.0.0 +14.2.0.20 +14.3.0.10 +14.3.0.20 +14.3.0.21 +14.4.0.10 +14.4.0.16 +14.4.1.10 +14.5.0.11 +14.5.0.20 +14.6.0.10 +14.6.0.20 +14.6.0.30 +14.6.1.10 +14.6.1.11 +14.7.0.20 +14.7.0.30 +14.7.0.40 +14.7.0.41 +14.7.0.50 +14.7.0.60 +14.7.1.100 +14.7.1.20 +14.7.1.31 +14.7.1.40 +14.7.1.426 +14.7.1.50 +14.7.1.60 +14.7.1.61 +14.7.1.62 +14.7.1.70 +14.7.1.71 +14.7.1.80 +14.7.1.90 +14.8.1.10 +14.9.1.10 +14.9.1.11 +1.4-rolling-202402090309 +1.4-rolling-202402241557 +15 +15.0 +15.1 +15.2 +15.3 +153.1 +15.4 +15.5 +15.6 +1576.5.0 +16.04 +16.10 +16.1-11023 +16.1-11047 +16.1-11052 +16.1-11057 +16.1-11065 +16.1-11066 +16.1-11067 +16.1-11079 +1688.5.3 +17.04 +17.10 +17.3 +18 +18.04 +18.06.4 +18.10 +1855.4.0 +1883.1.0 +19 +19.04 +19.10 +1911.1.1 +1911.3.0 +2 +2.0 +20 +20.04 +20.10 +20.10.10 +20.10.12 +20.10.13 +20.10.9 +2015.11-git +2019.2 +2.0.20220124 +2.0.20220226 +2.0.20220325 +2.0.20220403 +2.0.20220409 +2.0.20220426 +2.0.20220527 +2.0.20220617 +2.0.20220625 +2.0.20220713 +2.0.20220731 +2.0.20220804 +2.0.20220824 +2.0.20220909 +2.0.20220916 +2.0.20220921 +2.0.20221004 +2.0.20221010 +2.0.20221026 +2.0.20221029 +2.0.20221110 +2.0.20221122 +2.0.20221203 +2.0.20221215 +2.0.20221218 +2.0.20221222 +2.0.20230107 +2.0.20230126 +2.0.20230208 +2.0.20230212 +2.0.20230218 +2.0.20230303 +2.0.20230321 +2.0.20230407 +2.0.20230410 +2.0.20230426 +2.0.20230518 +2.0.20230526 +2.0.20230609 +2.0.20230611 +2.0.20230621 +2.0.20230630 +2.0.20230721 +2.0.20230805 +2.0.20230811 +2.0.20230823 +2.0.20230904 +2.0.20230924 +2.0.20231004 +2.0.20231101 +2.0.20231106 +2.0.20231115 +2.0.20231130 +2.0.20240111 +2.0.20240112 +2.0.20240117 +2.0.20240123 +2.0.20240202 +2.0.20240208 +2.0.20240209 +2.0.20240211 +2.0.20240212 +2.0.20240213 +2.0.20240214 +2.0.20240215 +2.0.20240216 +2.0.20240217 +2.0.20240218 +2.0.20240219 +2.0.20240220 +2.0.20240221 +2.0.20240222 +2.0.20240223 +2.0.20240224 +2.0.20240225 +2.0.20240226 +2.0.20240227 +2.0.20240228 +2.0.20240229 +2021.1 +2021.4 +2022.2 +2022.3 +2022.4 +2023 +2023.02.1 +2023.1 +2023.2 +2023.3 +2023.4 +2023.5.0 +2024.1 +2.1 +2.10 +21.04 +2.1.1 +2.11 +21.10 +2.1.2 +2.12 +2.1.3 +2.13 +21.3 +2135.4.0 +2.14 +2.15 +2.16 +2.17 +2.18 +2.19 +2191.5.0 +2.1-systemd-rc1 +2.2 +22 +2.2.0 +22.03 +22.04 +2.2.1 +2.21 +22.10 +22.11 +22.1.10_4 +22.1.4_1 +2.22 +2.26 +22.7.11_1 +22.7_4 +22.7.9_3 +2.3 +23 +2.30 +2303.3.0 +23.04 +23.05 +2308a +2308b +2.31 +23.10 +23.10.2 +23.11 +23.1.11 +23.1.11_2 +23.1.1_2 +23.1.2 +23.1_6 +23.1.7_3 +23.1.8 +23.4.2_4 +2345.3.0 +2345.3.1 +23.7.10_1 +23.7.11 +23.7.12 +23.7.12_5 +23.7.1_3 +23.7.4 +23.7.5 +23.7.6 +23.7.9 +2.3.91 +2.4 +24 +24.04 +24.05 +24.1.1 +24.1_1 +2411.1.0 +2411.1.1 +24.1.2 +24.1.2_1 +2430.0.0 +2466.0.0 +2492.0.0 +2.5 +2512.1.0 +2512.2.0 +2512.3.0 +2512.4.0 +2512.5.0 +2513.0.0 +2513.0.1 +2513.1.0 +2513.2.0 +2513.3.0 +2.5.4 +2.5-5155 +2.5-5193 +2.5-5201 +2.5-5202 +2.5-5204 +2592.0.0 +2.6 +2605.0.0 +2605.1.0 +2605.10.0 +2605.11.0 +2605.12.0 +2605.2.0 +2605.3.0 +2605.4.0 +2605.5.0 +2605.6.0 +2605.7.0 +2605.8.0 +2605.9.0 +2632.0.0 +2632.1.0 +2643.0.0 +2643.1.0 +2643.1.1 +2661.0.0 +2671.0.0 +2697.0.0 +2705.0.0 +2705.1.0 +2705.1.1 +2705.1.2 +2723.0.0 +2748.0.0 +2765.0.0 +2765.1.0 +2765.2.0 +2765.2.1 +2765.2.2 +2765.2.3 +2765.2.4 +2765.2.5 +2765.2.6 +2783.0.0 +2.8 +2801.0.0 +2801.0.1 +2801.1.0 +2823.0.0 +2823.1.0 +2823.1.1 +2823.1.2 +2823.1.3 +2857.0.0 +2879.0.0 +2879.0.1 +2.9 +29 +2905.0.0 +2905.1.0 +2905.2.0 +2905.2.1 +2905.2.2 +2905.2.3 +2905.2.4 +2905.2.5 +2905.2.6 +2920.0.0 +2920.1.0 +2942.0.0 +2942.1.0 +2942.1.1 +2942.1.2 +2955.0.0 +2969.0.0 +2983.0.0 +2983.1.0 +2983.1.1 +2983.1.2 +2983.2.0 +2983.2.1 +3 +3.0 +3.0.0.448 +3.0.0.480 +3005.0.0 +3005.0.1 +3.0.310-6230 +3.0.310-6235 +3.0.310-6240 +3.0.310-6242 +3.0.310-6250 +3.0.310-6252 +3.0.310-6264 +3033.0.0 +3033.1.0 +3033.1.1 +3033.2.0 +3033.2.1 +3033.2.2 +3033.2.3 +3033.2.4 +3033.3.0 +3033.3.1 +3033.3.10 +3033.3.11 +3033.3.12 +3033.3.13 +3033.3.14 +3033.3.15 +3033.3.16 +3033.3.17 +3033.3.18 +3033.3.2 +3033.3.3 +3033.3.4 +3033.3.5 +3033.3.6 +3033.3.7 +3033.3.8 +3033.3.9 +3046.0.0 +3066.0.0 +3066.1.0 +3066.1.1 +3066.1.2 +3.10.3 +3.11.0 +3.11.0-20240102t2200edt-tagged +3.11.2-dev20240209t1755utc-autotag +3.11.2-dev20240212t1512utc-autotag +3.11.2-dev20240212t2004utc-autotag +3.11.2-dev20240212t2307utc-autotag +3.11.2-dev20240213t0602utc-autotag +3.11.2-dev20240214t1413utc-autotag +3.11.2-rc.1 +3.11.2-rc.2 +3.11.2-rc.3 +3.11.2-rc.4 +3115.0.0 +3.12.0 +3.1.22-1.8 +3127.0.0 +3139.0.0 +3139.1.0 +3139.1.1 +3139.2.0 +3139.2.1 +3139.2.2 +3139.2.3 +3.14.2 +3.15.0 +3.15.10 +3.15.11 +3.15.4 +3.15.7 +3.15.8 +3.15.9 +3.16.2 +3.16.4 +3165.0.0 +3.17.1 +3.17.7 +3.18.0 +3.18.5 +3185.0.0 +3185.1.0 +3185.1.1 +32 +3200.0.0 +3227.0.0 +3227.1.0 +3227.1.1 +3227.2.1 +3227.2.2 +3227.2.3 +3227.2.4 +3255.0.0 +3277.0.0 +3277.1.0 +3277.1.1 +3277.1.2 +33 +3305.0.0 +3305.0.1 +3.3.2009 +3.3.4 +3346.0.0 +3346.1.0 +3374.0.0 +3374.1.0 +3374.1.1 +3374.2.0 +3374.2.1 +3374.2.2 +3374.2.3 +3374.2.4 +3374.2.5 +34 +3402.0.0 +3402.0.1 +3402.1.0 +3417.0.0 +3417.1.0 +3432.0.0 +3432.1.0 +3446.0.0 +3446.1.0 +3446.1.1 +3480.0.0 +3493.0.0 +3493.1.0 +3.5 +35 +3.5.0 +3510.0.0 +3510.1.0 +3510.2.0 +3510.2.1 +3510.2.2 +3510.2.3 +3510.2.4 +3510.2.5 +3510.2.6 +3510.2.7 +3510.2.8 +3510.3.1 +3510.3.2 +3.5.2-dev20230505t0041edt-manual +3535.0.0 +3549.0.0 +3549.1.0 +3549.1.1 +3.5.5 +3.5.6 +3572.0.0 +3572.0.1 +3572.1.0 +36 +3602.0.0 +3602.1.0 +3602.1.1 +3602.1.2 +3602.1.3 +3602.1.4 +3602.1.5 +3602.1.6 +3602.2.0 +3602.2.1 +3602.2.2 +3602.2.3 +3619.0.0 +3637.0.0 +3654.0.0 +3665.0.0 +3689.0.0 +37 +3717.0.0 +3732.0.0 +3745.1.0 +3760.0.0 +3760.1.0 +3760.1.1 +3760.2.0 +3794.0.0 +38 +3815.0.0 +3815.1.0 +3815.2.0 +3850.0.0 +3850.1.0 +3874.0.0 +3878.0.0 +3885.0.0 +3886.0.0 +3888.0.0 +3892.0.0 +39 +4 +4.0 +40 +41 +42.3 +4.24.3.1 +4.24.3.2 +4.26.1.1 +4.27.0 +4.27.3 +4.32 +4.33 +4.3.3-117 +4.6 +4.7 +5.0 +5.1 +5.10.0-18-cloud-amd64 +5.11 +5.2 +5.3 +5.4 +5.4.0.00198 +5.4.1.00026 +5.4.1.00056 +5.6 +6 +6.0 +6.0.0.beta4 +6.1 +6.10 +6.10.0 +6.11.0 +6.11.1 +6.11.2 +6.11.3 +6.11.4 +6.11.5 +6.11.6 +6.11.7 +6.12.0 +6.1.22 +6.13.0 +6.14.0 +6.2 +6.3 +6.4 +6.5 +6.5.0 +6.5.4 +6.5.5 +6.5.6 +6.5.7 +6.6 +6.7 +6.7.2 +6.8 +6.8.2 +6.9 +6.9.1 +6.9.2 +7 +7.0 +7.0.1 +7.0.1406 +7.1 +7.10.0.0-1017741 +7.10.0.20-1023227 +7.10.1.0-1042928 +7.10.1.10-1068159 +7.10.1.1-1049892 +7.10.1.15-1078832 +7.10.1.20-1090468 +7.11 +7.11.0.0-1035502 +7.1.1503 +7.12.0.0-1053185 +7.13.0.10-1078781 +7.13.0.20-1082704 +7.13.1.0-1085623 +7.13.1.0-1093040 +7.13.1.0-1093865 +7.2 +7.2.0 +7.2.1511 +7.3 +7.3.1611 +7.4 +7.4.1708 +7.5 +7.5.0.10-680584 +7.5.1804 +7.6 +7.6.1810 +7.7 +7.7.0.7-1007134 +7.7.1.0-1007743 +7.7.1908 +7.7.5.11-1046187 +7.7.5.20-1063368 +7.7.5.25-1078970 +7.7.5.30-1089690 +7.7.5.30-1091295 +7.8 +7.8.0.0-1008134 +7.8.0.10-1009761 +7.8.0.20-1011246 +7.8.0.8.0 +7.8.1.7.0 +7.8.2003 +7.8.2.1 +7.9 +7.9.0.0-1011258 +7.9.2009 +8 +8. +8.0 +8.0.0.0-1091527 +8.0.0.0-1091581 +8.0.0.0-1091682 +8.0.0.0-1091972 +8.0.0.0-1092170 +8.0.0.0-1092707 +8.0.0.0-1092873 +8.0.0.0-1093024 +8.0.0.0-1093042 +8.0.0.0-1093255 +8.0.0.0-1094303 +8.0.1905 +8.1 +8.1.0 +8.10 +8.1.0.0-1092701 +8.1.0.0-1093328 +8.11 +8.1.1911 +8.1.3-p1-24838 +8.1.3-p2-24912 +8.1.3-p3-24955 +8.1.3-p4-25026 +8.1.3-p5-25104 +8.1.3-p6-25199 +8.1.3-p7-25298 +8.1.3-p8-25333 +8.1.3-p8-25334 +8.1.3-p8-25335 +8.1.3-p8-25336 +8.1.3-p8-25339 +8.1.3-p8-25341 +8.1.3-p8-25342 +8.1.3-p8-25343 +8.1.3-p8-25345 +8.1.3-p8-25349 +8.1.3-p8-25350 +8.1.3-p8-25351 +8.1.3-p8-25352 +8.1.3-p8-25353 +8.1.3-p8-25354 +8.1.3-p8-25355 +8.1.3-p8-25356 +8.1.3-p8-25357 +8.1.3-p8-25360 +8.1.3-p8-25361 +8.1.3-p8-25362 +8.1.3-p8-25363 +8.1.3-p8-25364 +8.1.3-p8-25365 +8.1.3-p8-25366 +8.1.3-p8-25367 +8.1.3-p8-25370 +8.1.3-p8-25371 +8.1.3-p8-25372 +8.1.3-p8-25373 +8.1.3-p8-25375 +8.1.3-p8-25376 +8.1.3-p8-khil.un-08415223c9a99546b566df0dbc683ffa378cfd77 +8.1.3-p8-khil.un-29562fd3e583d0b1529db6f92fedf409aec35c53 +8.1.3-p8-khil.un-7802727eceff485a5339f081ba97c8eccc697c62 +8.1.4-p1-25119 +8.2 +8.2.2004 +8.3 +8.3.0.6_87213 +8.3.2011 +8.3.2.1_85580 +8.3.2.2_85607 +8.3.3 +8.3.8.0_86519 +8.3.8.0_86525 +8.4 +8.4.1 +8.4.2 +8.4.2105 +8.4.3 +8.5 +8.5.0 +8.5.1 +8.5.2 +8.5.2111 +8.5.8 +8.6 +8.6.2 +8.6.3 +8.6.7 +8.7 +8.8 +8.8.1 +8.9 +9 +9.0 +9.0.0-p1-24746 +9.0.0-p2-24858 +9.0.1-24945 +9.0.1-p1-25067 +9.0.2-25173 +9.0.2-p1-25268 +9.0.3-25350 +9.0.3-p1-25395 +9.0.3-p1-25397 +9.0.3-p1-25398 +9.0.3-p1-25399 +9.0.3-p1-25400 +9.0.3-p1-25402 +9.0.3-p1-25405 +9.0.3-p1-25406 +9.0.3-p1-abhinav.agarwal-18771999cdf52e2eb4cac4515764035f673da0b4 +9.0.3-p1-khil.un-33723dc9b6a306de91bc2a9fcc7768810f1457bf +9.0.3-p2-25407 +9.0.3-p2-25408 +9.0.3-p2-25410 +9.0.3-p2-25411 +9.0.3-p2-25413 +9.0.3-p2-25414 +9.0.3-p2-25415 +9.0.3-p2-25416 +9.0.3-p2-25417 +9.0.3-p2-25418 +9.0.3-p2-25421 +9.0.3-p2-25422 +9.0.3-p2-25423 +9.0.3-p2-25424 +9.0.3-p2-25425 +9.0.3-p2-25426 +9.0.3-p2-25427 +9.0.3-p2-25428 +9.0.3-p2-25429 +9.0.3-p2-25430 +9.0.3-p2-25431 +9.0.3-p2-25432 +9.0.3-p2-25433 +9.0.3-p2-25434 +9.0.3-p2-25436 +9.0.3-p2-25437 +9.0.3-p2-25439 +9.0.3-p2-25440 +9.0.3-p2-25441 +9.0.3-p2-25442 +9.0.3-p2-25444 +9.0.3-p2-25445 +9.0.3-p2-khil.un-2bf873fb17f994904dcf673399774dc8b9c79c12 +9.0.3-p2-khil.un-ac0b199a717c00707168ad80f8e9611d3f821deb +9.0.3-p3-25446 +9.0.3-p3-25447 +9.0.3-p3-25448 +9.0.3-p3-25449 +9.0.3-p3-25450 +9.0.3-p3-25451 +9.0.3-p3-25452 +9.0.4-25401 +9.0.4-25403 +9.0.4-25435 +9.0.4-25443 +9.1 +9.1.0-27191 +9.1.0-beta5-25477 +9.1.0-beta5-25490 +9.1.0-p1-27296 +9.1.0-p1-27298 +9.1.0-p1-27302 +9.1.0-p1-27309 +9.1.0-p1-27330 +9.1.0-p1-khil.un-c49044ca59c0bc1edf7921109c15878ad8d6b9ff +9.1.0-p2-27361 +9.1.0-p2-27365 +9.1.0-p2-27367 +9.1.0-p2-27369 +9.1.0-p2-27372 +9.1.0-p2-27377 +9.1.0-p2-27379 +9.1.0-p2-27382 +9.1.0-p2-27395 +9.1.0-p2-27400 +9.1.0-p2-27401 +9.1.0-p2-27402 +9.1.0-p2-27403 +9.1.0-p2-27404 +9.1.0-p2-27405 +9.1.0-p2-27406 +9.1.0-p2-27407 +9.1.0-p2-27409 +9.1.0-p2-27418 +9.1.0-p2-khil.un-50de36250e4d05c520fadf4c780da5af8f82f52c +9.1.0-p2-khil.un-713fe3c6fb797ad684383ebda90a00cbca5e2531 +9.11 +9.1.10.0_92772 +9.1.11.0_92806 +9.1.1-27295 +9.1.1-27297 +9.1.1-27299 +9.1.1-27300 +9.1.1-27301 +9.1.1-27303 +9.1.1-27305 +9.1.1-27307 +9.1.1-27308 +9.1.1-27310 +9.1.1-27311 +9.1.1-27312 +9.1.1-27313 +9.1.1-27315 +9.1.1-27318 +9.1.1-27319 +9.1.1-27320 +9.1.1-27321 +9.1.1-27322 +9.1.1-27323 +9.1.1-27324 +9.1.1-27325 +9.1.1-27326 +9.1.1-27327 +9.1.1-27331 +9.1.1-27332 +9.1.1-27334 +9.1.1-27335 +9.1.1-27336 +9.1.1-27337 +9.1.1-27339 +9.1.1-27340 +9.1.1-27341 +9.1.1-27343 +9.1.1-27344 +9.1.1-27345 +9.1.1-27346 +9.1.1-27347 +9.1.1-27348 +9.1.1-27349 +9.1.1-27350 +9.1.1-27351 +9.1.1-27352 +9.1.1-27354 +9.1.1-27355 +9.1.1-27356 +9.1.1-27357 +9.1.1-27358 +9.1.1-27359 +9.1.1-27360 +9.1.1-27362 +9.1.1-27363 +9.1.1-27364 +9.1.1-27366 +9.1.1-27368 +9.1.1-27374 +9.1.1-27376 +9.1.1-27378 +9.1.1-27380 +9.1.1-27381 +9.1.1-27383 +9.1.1-27385 +9.1.1-27387 +9.1.1-27388 +9.1.1-27393 +9.1.1-27394 +9.1.1-27396 +9.1.1-27397 +9.1.1-27398 +9.1.1-27399 +9.1.1-27408 +9.1.1-27410 +9.1.1-27411 +9.1.1-27412 +9.1.1-27413 +9.1.1-27414 +9.1.1-27415 +9.1.1-27416 +9.1.1-27417 +9.1.1-27419 +9.1.1-beta1-27328 +9.1.1-beta1-27329 +9.1.1-beta1-27338 +9.1.1-khil.un-bce7cbcae9cc06a03b1f888f0ed88ed6818c2d66 +9.1.1-khil.un-dcc75475f02643571e902b5c2c82c25fce65dc63 +9.1.1-nagadeesh.nagaraja-a9b923254f67e1ed0a2f9100900f73985854cf55 +9.12 +9.13 +9.1.3.0_92242 +9.13.1 +9.13.1P1 +9.13.1P2 +9.13.1P3 +9.13.1P4 +9.13.1P6 +9.13.1P7 +9.13.1P8X1 +9.13.1RC1 +9.14.0 +9.14.0P1 +9.14.0P2 +9.14.0P3 +9.14.1 +9.1.4.1_92329 +9.14.1P1 +9.14.1P1X3 +9.14.1P1X4 +9.14.1RC1 +9.1.4.2_92345 +9.1.4.2_92359 +9.1.4.3_92414 +9.1.4.4_92466 +9.1.4.4_92470 +9.15.0 +9.1.5.0_92545 +9.15.1X12 +9.15.1X15 +9.1.6.0_92628 +9.1.6.2_92634 +9.1.6.2_92636 +9.1.7.0_92666 +9.1.8.0_92706 +9.1-dev-25121 +9.1-dev-25368 +9.2 +9.2.0-beta1-25971 +9.2.0-beta1-26005 +9.2.0-beta1-26033 +9.2.0-beta1-26066 +9.2.0-beta2-26101 +9.2.1 +9.2.2.0_94322 +9.2.3.0_94541 +9.2.4.0_94650 +9.2.4.0_94654 +9.2.5.0_94689 +9.2.5.1_94697 +9.2.6.0_94722 +9.2.7.0_94752 +9.2.8.0_94809 +9.2.8.0_94811 +9.2.9.0_94890 +9.2-dev-25813 +9.2-dev-25878 +9.2-dev-25879 +9.2-dev-25920 +9.2-dev-25946 +9.2-dev-25947 +9.2-dev-25948 +9.2-dev-25949 +9.2-dev-25950 +9.2-dev-25951 +9.2-dev-25952 +9.2-dev-25953 +9.2-dev-25954 +9.2-dev-25955 +9.2-dev-25956 +9.2-dev-25958 +9.2-dev-25959 +9.2-dev-25960 +9.2-dev-25961 +9.2-dev-25962 +9.2-dev-25963 +9.2-dev-25965 +9.2-dev-25966 +9.2-dev-25968 +9.2-dev-25969 +9.2-dev-25970 +9.2-dev-25972 +9.2-dev-25974 +9.2-dev-25975 +9.2-dev-25976 +9.2-dev-25977 +9.2-dev-25978 +9.2-dev-25979 +9.2-dev-25980 +9.2-dev-25982 +9.2-dev-25983 +9.2-dev-25984 +9.2-dev-25985 +9.2-dev-25986 +9.2-dev-25987 +9.2-dev-25988 +9.2-dev-25989 +9.2-dev-25990 +9.2-dev-25991 +9.2-dev-25992 +9.2-dev-25993 +9.2-dev-25994 +9.2-dev-25995 +9.2-dev-25996 +9.2-dev-25999 +9.2-dev-26000 +9.2-dev-26001 +9.2-dev-26002 +9.2-dev-26003 +9.2-dev-26009 +9.2-dev-26013 +9.2-dev-26014 +9.2-dev-26016 +9.2-dev-26017 +9.2-dev-26018 +9.2-dev-26019 +9.2-dev-26020 +9.2-dev-26021 +9.2-dev-26022 +9.2-dev-26023 +9.2-dev-26024 +9.2-dev-26025 +9.2-dev-26027 +9.2-dev-26028 +9.2-dev-26029 +9.2-dev-26030 +9.2-dev-26031 +9.2-dev-26032 +9.2-dev-26034 +9.2-dev-26036 +9.2-dev-26037 +9.2-dev-26038 +9.2-dev-26039 +9.2-dev-26040 +9.2-dev-26041 +9.2-dev-26042 +9.2-dev-26044 +9.2-dev-26046 +9.2-dev-26047 +9.2-dev-26048 +9.2-dev-26050 +9.2-dev-26052 +9.2-dev-26058 +9.2-dev-26060 +9.2-dev-26061 +9.2-dev-26062 +9.2-dev-26063 +9.2-dev-26064 +9.2-dev-26065 +9.2-dev-26067 +9.2-dev-26070 +9.2-dev-26071 +9.2-dev-26075 +9.2-dev-26077 +9.2-dev-26078 +9.2-dev-26079 +9.2-dev-26080 +9.2-dev-26081 +9.2-dev-26082 +9.2-dev-26083 +9.2-dev-26085 +9.2-dev-26086 +9.2-dev-26087 +9.2-dev-26088 +9.2-dev-26089 +9.2-dev-26090 +9.2-dev-26091 +9.2-dev-26093 +9.2-dev-26094 +9.2-dev-26095 +9.2-dev-26096 +9.2-dev-26097 +9.2-dev-26098 +9.2-dev-26104 +9.2-dev-26105 +9.2-dev-26107 +9.2-dev-26108 +9.2-dev-26109 +9.2-dev-26110 +9.2-dev-26111 +9.2-dev-adi.kris-33a772ca61f67a24283d4e71a63650282d6bd073 +9.2-dev-khil.un-6ec1bfcc230e848a0e8f1d776d0f05a35a9545e6 +9.2-dev-khil.un-c35b47d1656fd20c0ec0d6cab8583ffbf6041937 +9.2-dev-khil.un-c54da2af2e5732bee11b720c199e16fd70438968 +9.2-dev-michael.sun-ec36214183ee10fbe28d86a55b3aa46b54eb4a04 +9.3 +9.3.0.0_95721 +9.3.1.0_95994 +9.3.2.0_96093 +9.3.2.1_96098 +9.3.2.2_96105 +9.3.2.3_96127 +9.4 +9.4.1.0_98030 +9.4.1.0_98069 +9.4.2.0_98303 +9.4.2.0_98396 +9.5 +9.6 +9.7 +9.8 +9.9 +9999.0.1 +9999.9.1 +a +Accops +ArrayOS +Aruba +bookworm/sid +bullseye/sid +buster/sid +Clawhammer__9.14.0 +Clawhammer__9.14.1 +Cloudstream__9.16.0 +Epicor +FFFF +h +ip-12.1.6 +ip-13.1.4 +ip-13.1.4.1 +ip-13.1.5 +ip-13.1.5.1 +ip-14.1.4 +ip-14.1.4.1 +ip-14.1.4.2 +ip-14.1.4.4 +ip-14.1.4.5 +ip-14.1.4.6 +ip-14.1.5.1 +ip-14.1.5.2 +ip-14.1.5.3 +ip-14.1.5.4 +ip-14.1.5.5 +ip-14.1.5.6 +ip-15.1.10 +ip-15.1.10.2 +ip-15.1.10.3 +ip-15.1.2.1 +ip-15.1.3 +ip-15.1.3.1 +ip-15.1.4 +ip-15.1.5 +ip-15.1.5.1 +ip-15.1.6.1 +ip-15.1.7 +ip-15.1.8 +ip-15.1.8.1 +ip-15.1.8.2 +ip-15.1.9.1 +ip-16.0.1.1 +ip-16.0.1.2 +ip-16.1.0 +ip-16.1.1 +ip-16.1.2.1 +ip-16.1.2.2 +ip-16.1.3 +ip-16.1.3.1 +ip-16.1.3.2 +ip-16.1.3.3 +ip-16.1.3.4 +ip-16.1.3.5 +ip-16.1.4 +ip-16.1.4.1 +ip-16.1.4.2 +ip-16.1.5 +ip-17.0.0 +ip-17.1.0 +ip-17.1.0.1 +ip-17.1.0.2 +ip-17.1.0.3 +ip-17.1.1 +ip-17.1.1.1 +ip-17.1.1.2 +ip-17.5.0 +jessie/sid +JNPR-11.0-20200922.4042921_buil +JNPR-11.0-20201028.e1cef1d_buil +JNPR-11.0-20201221.5316c2e_buil +JNPR-11.0-20210220.a5d6a89_buil +JNPR-11.0-20210429.58e41ab_buil +JNPR-11.0-20210618.f43645e_buil +JNPR-12.1-20211216.232802__ci_f +JNPR-12.1-20220202.9885091_buil +JNPR-12.1-20220221.2b3c81a_buil +JNPR-12.1-20220228.82e60e3_buil +JNPR-12.1-20220817.0361d5f_buil +JNPR-12.1-20220817.43c4e23_buil +JNPR-12.1-20221021.a9737e1_buil +JNPR-12.1-20230120.6bab16a_buil +JNPR-12.1-20230321.be5f9c0_buil +JNPR-12.1-20230821.5fbe894_buil +JNPR-12.1-20231013.108e0b3_buil +JNPR-12.1-20231013.32ed862a0f7_ +JNPR-12.1-20231122.ee0e992_buil +JNPR-12.1-20231220.32ed862a0f7_ +JNPR-12.1-20240103.68b4802_buil +JNPR-12.1-20240112.32ed862a0f7_ +JNPR-12.1-20240119.32ed862a0f7_ +JNPR-12.1-20240228.033525_kahon +JNPR-15.0-20240118.32ed862a0f7_ +JNPR-15.0-20240207.32ed862a0f7_ +JNPR-15.0-20240209.212337_yhli_ +JNPR-15.0-20240221.32ed862a0f7_ +JNPR-15.0-20240224.002811_kahon +kali-rolling +leap-15.0 +leap-15.1 +leap-15.2 +leap-15.3 +leap-15.4 +leap-15.5 +Libraesva +lighthouse-23.10.0 +lighthouse-23.10.1 +lighthouse-23.10.2 +lighthouse-24.02.0 +lighthouse-24.02.0p0 +lighthouse-24.05.0p0 +Lighthouse__9.13.1 +Linux +linux-os-31700 +linux-os-31810 +linux-os-31980 +linux-os-36200 +linux-os-38790 +micro-5.5 +Mightysquirrel__9.15.0 +Mightysquirrel__9.15.1 +n/a +NAME="SLES" +ngfw-6.10.11.26551.azure.1 +ngfw-6.10.12.26603 +ngfw-6.10.13.26655.fips.2 +ngfw-6.10.14.26703 +ngfw-6.10.15.26752 +ngfw-7.0.3.28152.sip.2 +ngfw-7.1.1.29059 +ngfw-7.1.2.29102 +ngfw-7.2.0.30046.pppoe.1 +ngfw-7.2.0.30046.rnext-g02c2c7f.2402121309 +ngfw-7.2.0.30046.rnext-gf1bf778.2402120824 +ngfw-7.2.0.30047.rnext-g030ce90.2402141429 +ngfw-7.2.0.30047.rnext-g2e7c78f.2402150842 +ngfw-7.2.0.30047.rnext-g3f3db02.2402211419 +ngfw-7.2.0.30047.rnext-g58dccd6.2402151047 +ngfw-7.2.0.30047.rnext-g5d6e00a.2402212007 +ngfw-7.2.0.30047.rnext-gbd58266.2402140855 +ngfw-7.2.0.30047.rnext-gc7730bf.2402151240 +ngfw-7.2.0.30047.rnext-ge9c5065.2402192008 +ngfw-7.2.0.30048.rnext-g237a2a5.2402222007 +ngfw-7.2.0.30048.rnext-g9219487.2402260818 +ngfw-7.2.0.30048.rnext-gbfc76a4.2402261313 +ngfw-7.2.0.30048.rnext-gef6caea.2402260525 +ngfw-7.2.0.30049 +ngfw-7.2.0.30050 +ngfw-7.2.0.30050.rnext-g4152526.2402281323 +ngfw-7.2.0.30050.rnext-gb6d2048.2402291318 +ngfw-7.2.0.30050.rnext-ge84f515.2402291054 +None +PanOS +r11427-9ce6aa9d8d +rolling +Schipperke-4857 +SonicOSX 7.1.1-7038-R5354 +SonicOSX 7.1.1-7040-R2998-HF24239 +SonicOSX 7.1.1-7040-R5387 +SonicOSX 7.1.1-7040-R5389 +SonicOSX 7.1.1-7040-R5391 +SonicOSX 7.1.1-7041-R5415 +SonicOSX 7.1.1-7047-R3003-HF24239 +SonicOSX 7.1.1-7047-R5557 +SonicOSX 7.1.1-7047-R5573 +SonicOSX 7.1.1-7047-R5582 +SonicOSX 7.1.1-7047-R5587 +SonicOSX 7.1.1-7048-D14445 +SonicOSX 7.1.1-7049-D14628 +SonicOSX 7.1.1-7049-R5589 +stretch/sid +testing/unstable +trixie/sid +tumbleweed-20230902 +tumbleweed-20240106 +unstable +v3.3 +v3.4.1 +v3.5 +v3.8.1 +vsbc-x86_pi3-6.10.3 +vsbc-x86_pi3-6.10.x6 +vsbc-x86_pi3-6.12.2pre02 From 2c5f139c708f77be680af1046ef6a2bc56670a06 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:12:34 -0800 Subject: [PATCH 181/240] enable GA versioning (#3082) --- azurelinuxagent/common/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 1470bb99f2..85a7bc2f73 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -670,7 +670,7 @@ def get_enable_ga_versioning(conf=__conf__): If True, the agent looks for rsm updates(checking requested version in GS) otherwise it will fall back to self-update and finds the highest version from PIR. NOTE: This option is experimental and may be removed in later versions of the Agent. """ - return conf.get_switch("Debug.EnableGAVersioning", False) + return conf.get_switch("Debug.EnableGAVersioning", True) def get_firewall_rules_log_period(conf=__conf__): From d79ab7f9ea0cd4fa15e200737c95eb10e3b359bc Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 6 Mar 2024 16:08:51 -0800 Subject: [PATCH 182/240] Run unit tests with pytest on Python >= 3.10 (#3081) * Run unit tests with pytest on Python >= 3.10 --------- Co-authored-by: narrieta --- .github/workflows/ci_pr.yml | 11 +- ci/nosetests.sh | 8 +- ci/pytest.sh | 19 +++ test-requirements.txt | 5 +- tests/common/test_logger.py | 2 +- tests/common/test_telemetryevent.py | 30 ++--- tests/ga/test_cgroupconfigurator.py | 115 +---------------- tests/ga/test_cgroupconfigurator_sudo.py | 157 +++++++++++++++++++++++ tests/ga/test_extension.py | 4 - tests/test_agent.py | 7 + 10 files changed, 216 insertions(+), 142 deletions(-) create mode 100755 ci/pytest.sh create mode 100644 tests/ga/test_cgroupconfigurator_sudo.py diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 2e1ac5ab47..f470b4428c 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -156,11 +156,14 @@ jobs: pylint $PYLINT_OPTIONS $PYLINT_FILES - - name: Test with nosetests - if: contains(fromJSON('["3.10", "3.11"]'), matrix.python-version) == false && (success() || (failure() && steps.install-dependencies.outcome == 'success')) + - name: Execute Unit Tests + if: success() || (failure() && steps.install-dependencies.outcome == 'success') run: | - ./ci/nosetests.sh - exit $? + if [[ "${{ matrix.python-version }}" =~ ^3\.[1-9][0-9]+$ ]]; then + ./ci/pytest.sh + else + ./ci/nosetests.sh + fi - name: Compile Coverage if: matrix.python-version == '3.9' diff --git a/ci/nosetests.sh b/ci/nosetests.sh index faefd902a9..15b83860ec 100755 --- a/ci/nosetests.sh +++ b/ci/nosetests.sh @@ -5,18 +5,18 @@ set -u EXIT_CODE=0 echo "=========================================" -echo "nosetests -a '!requires_sudo' output" +echo "**** nosetests non-sudo tests ****" echo "=========================================" -nosetests -a '!requires_sudo' tests $NOSEOPTS || EXIT_CODE=$(($EXIT_CODE || $?)) +nosetests --ignore-files test_cgroupconfigurator_sudo.py tests $NOSEOPTS || EXIT_CODE=$(($EXIT_CODE || $?)) echo EXIT_CODE no_sudo nosetests = $EXIT_CODE [[ -f .coverage ]] && \ sudo mv .coverage coverage.$(uuidgen).no_sudo.data echo "=========================================" -echo "nosetests -a 'requires_sudo' output" +echo "**** nosetests sudo tests ****" echo "=========================================" -sudo env "PATH=$PATH" nosetests -a 'requires_sudo' tests $NOSEOPTS || EXIT_CODE=$(($EXIT_CODE || $?)) +sudo env "PATH=$PATH" nosetests tests/ga/test_cgroupconfigurator_sudo.py $NOSEOPTS || EXIT_CODE=$(($EXIT_CODE || $?)) echo EXIT_CODE with_sudo nosetests = $EXIT_CODE [[ -f .coverage ]] && \ diff --git a/ci/pytest.sh b/ci/pytest.sh new file mode 100755 index 0000000000..c166e5853e --- /dev/null +++ b/ci/pytest.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -u + +EXIT_CODE=0 + +echo "=========================================" +echo "**** pytest *** non-sudo tests ****" +echo "=========================================" +pytest --ignore-glob '*/test_cgroupconfigurator_sudo.py' --verbose tests || EXIT_CODE=$(($EXIT_CODE || $?)) +echo EXIT_CODE pytests non-sudo = $EXIT_CODE + +echo "=========================================" +echo "**** pytest *** sudo tests ****" +echo "=========================================" +sudo env "PATH=$PATH" pytest --verbose tests/ga/test_cgroupconfigurator_sudo.py || EXIT_CODE=$(($EXIT_CODE || $?)) +echo EXIT_CODE pytests sudo = $EXIT_CODE + +exit "$EXIT_CODE" diff --git a/test-requirements.txt b/test-requirements.txt index 2b9467870e..0d653912ea 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -3,8 +3,9 @@ mock==2.0.0; python_version == '2.6' mock==3.0.5; python_version >= '2.7' and python_version <= '3.5' mock==4.0.2; python_version >= '3.6' distro; python_version >= '3.8' -nose -nose-timer; python_version >= '2.7' +nose; python_version <= '3.9' +nose-timer; python_version >= '2.7' and python_version <= '3.9' +pytest; python_version >= '3.10' # Pinning the wrapt requirement to 1.12.0 due to the bug - https://github.com/GrahamDumpleton/wrapt/issues/188 wrapt==1.12.0; python_version > '2.6' and python_version < '3.6' diff --git a/tests/common/test_logger.py b/tests/common/test_logger.py index d792eb8577..4e72e00109 100644 --- a/tests/common/test_logger.py +++ b/tests/common/test_logger.py @@ -15,7 +15,6 @@ # Requires Python 2.6+ and Openssl 1.0+ # -import json # pylint: disable=unused-import import os import tempfile from datetime import datetime, timedelta @@ -49,6 +48,7 @@ def tearDown(self): AgentTestCase.tearDown(self) logger.reset_periodic() logger.DEFAULT_LOGGER.appenders *= 0 + logger.set_prefix(None) fileutil.rm_dirs(self.event_dir) @patch('azurelinuxagent.common.logger.Logger.verbose') diff --git a/tests/common/test_telemetryevent.py b/tests/common/test_telemetryevent.py index ce232dab0b..27a808ddc7 100644 --- a/tests/common/test_telemetryevent.py +++ b/tests/common/test_telemetryevent.py @@ -19,23 +19,23 @@ from tests.lib.tools import AgentTestCase -def get_test_event(name="DummyExtension", op="Unknown", is_success=True, duration=0, version="foo", evt_type="", is_internal=False, - message="DummyMessage", eventId=1): - event = TelemetryEvent(eventId, "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Name, name)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Version, str(version))) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.IsInternal, is_internal)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Operation, op)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.OperationSuccess, is_success)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Message, message)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Duration, duration)) - event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.ExtensionType, evt_type)) - return event - - class TestTelemetryEvent(AgentTestCase): + @staticmethod + def _get_test_event(name="DummyExtension", op="Unknown", is_success=True, duration=0, version="foo", evt_type="", is_internal=False, + message="DummyMessage", eventId=1): + event = TelemetryEvent(eventId, "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Name, name)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Version, str(version))) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.IsInternal, is_internal)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Operation, op)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.OperationSuccess, is_success)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Message, message)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.Duration, duration)) + event.parameters.append(TelemetryEventParam(GuestAgentExtensionEventsSchema.ExtensionType, evt_type)) + return event + def test_contains_works_for_TelemetryEvent(self): - test_event = get_test_event(message="Dummy Event") + test_event = TestTelemetryEvent._get_test_event(message="Dummy Event") self.assertTrue(GuestAgentExtensionEventsSchema.Name in test_event) self.assertTrue(GuestAgentExtensionEventsSchema.Version in test_event) diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 63c6ee90bd..82c86c956f 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -26,20 +26,17 @@ import time import threading -from nose.plugins.attrib import attr - from azurelinuxagent.common import conf from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import WALAEventOperation -from azurelinuxagent.common.exception import CGroupsException, ExtensionError, ExtensionErrorCodes, \ - AgentMemoryExceededException +from azurelinuxagent.common.exception import CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils import shellutil, fileutil from tests.lib.mock_environment import MockCommand from tests.lib.mock_cgroup_environment import mock_cgroup_environment, UnitFilePaths -from tests.lib.tools import AgentTestCase, patch, mock_sleep, i_am_root, data_dir, is_python_version_26_or_34, skip_if_predicate_true +from tests.lib.tools import AgentTestCase, patch, mock_sleep, data_dir, is_python_version_26_or_34, skip_if_predicate_true from tests.lib.miscellaneous_tools import format_processes, wait_for @@ -526,112 +523,6 @@ def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_d self.assertEqual(len(CGroupsTelemetry._tracked), 0, "No cgroups should have been created") - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") - @attr('requires_sudo') - @patch('time.sleep', side_effect=lambda _: mock_sleep()) - def test_start_extension_command_should_not_use_fallback_option_if_extension_fails(self, *args): - self.assertTrue(i_am_root(), "Test does not run when non-root") - - with self._get_cgroup_configurator() as configurator: - pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below - - command = "ls folder_does_not_exist" - - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: - with self.assertRaises(ExtensionError) as context_manager: - configurator.start_extension_command( - extension_name="Microsoft.Compute.TestExtension-1.2.3", - command=command, - cmd_name="test", - timeout=300, - shell=True, - cwd=self.tmp_dir, - env={}, - stdout=stdout, - stderr=stderr) - - extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if command in args[0]] - - self.assertEqual(1, len(extension_calls), "The extension should have been invoked exactly once") - self.assertIn("systemd-run", extension_calls[0], - "The first call to the extension should have used systemd") - - self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginUnknownFailure) - self.assertIn("Non-zero exit code", ustr(context_manager.exception)) - # The scope name should appear in the process output since systemd-run was invoked and stderr - # wasn't truncated. - self.assertIn("Running scope as unit", ustr(context_manager.exception)) - - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") - @attr('requires_sudo') - @patch('time.sleep', side_effect=lambda _: mock_sleep()) - @patch("azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN", 5) - def test_start_extension_command_should_not_use_fallback_option_if_extension_fails_with_long_output(self, *args): - self.assertTrue(i_am_root(), "Test does not run when non-root") - - with self._get_cgroup_configurator() as configurator: - pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below - - long_output = "a"*20 # large enough to ensure both stdout and stderr are truncated - long_stdout_stderr_command = "echo {0} && echo {0} >&2 && ls folder_does_not_exist".format(long_output) - - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: - with self.assertRaises(ExtensionError) as context_manager: - configurator.start_extension_command( - extension_name="Microsoft.Compute.TestExtension-1.2.3", - command=long_stdout_stderr_command, - cmd_name="test", - timeout=300, - shell=True, - cwd=self.tmp_dir, - env={}, - stdout=stdout, - stderr=stderr) - - extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if long_stdout_stderr_command in args[0]] - - self.assertEqual(1, len(extension_calls), "The extension should have been invoked exactly once") - self.assertIn("systemd-run", extension_calls[0], - "The first call to the extension should have used systemd") - - self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginUnknownFailure) - self.assertIn("Non-zero exit code", ustr(context_manager.exception)) - # stdout and stderr should have been truncated, so the scope name doesn't appear in stderr - # even though systemd-run ran - self.assertNotIn("Running scope as unit", ustr(context_manager.exception)) - - @attr('requires_sudo') - def test_start_extension_command_should_not_use_fallback_option_if_extension_times_out(self, *args): # pylint: disable=unused-argument - self.assertTrue(i_am_root(), "Test does not run when non-root") - - with self._get_cgroup_configurator() as configurator: - pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below - - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: - with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: - with patch("azurelinuxagent.ga.extensionprocessutil.wait_for_process_completion_or_timeout", - return_value=[True, None, 0]): - with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupsApi._is_systemd_failure", - return_value=False): - with self.assertRaises(ExtensionError) as context_manager: - configurator.start_extension_command( - extension_name="Microsoft.Compute.TestExtension-1.2.3", - command="date", - cmd_name="test", - timeout=300, - shell=True, - cwd=self.tmp_dir, - env={}, - stdout=stdout, - stderr=stderr) - - self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginHandlerScriptTimedout) - self.assertIn("Timeout", ustr(context_manager.exception)) - @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4, they run on containers where the OS commands needed by the test are not present.") @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_capture_only_the_last_subprocess_output(self, _): @@ -1024,4 +915,4 @@ def test_check_agent_memory_usage_should_raise_a_cgroups_exception_when_the_limi tracked_metrics.return_value = metrics configurator.check_agent_memory_usage() - self.assertIn("The agent memory limit {0} bytes exceeded".format(conf.get_agent_memory_quota()), ustr(context_manager.exception), "An incorrect exception was raised") \ No newline at end of file + self.assertIn("The agent memory limit {0} bytes exceeded".format(conf.get_agent_memory_quota()), ustr(context_manager.exception), "An incorrect exception was raised") diff --git a/tests/ga/test_cgroupconfigurator_sudo.py b/tests/ga/test_cgroupconfigurator_sudo.py new file mode 100644 index 0000000000..30db194086 --- /dev/null +++ b/tests/ga/test_cgroupconfigurator_sudo.py @@ -0,0 +1,157 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.4+ and Openssl 1.0+ +# + +from __future__ import print_function + +import contextlib +import subprocess +import tempfile + +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes +from azurelinuxagent.common.future import ustr +from tests.lib.mock_cgroup_environment import mock_cgroup_environment +from tests.lib.tools import AgentTestCase, patch, mock_sleep, i_am_root, is_python_version_26_or_34, skip_if_predicate_true + + +class CGroupConfiguratorSystemdTestCaseSudo(AgentTestCase): + @classmethod + def tearDownClass(cls): + CGroupConfigurator._instance = None + AgentTestCase.tearDownClass() + + @contextlib.contextmanager + def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=None): + CGroupConfigurator._instance = None + configurator = CGroupConfigurator.get_instance() + CGroupsTelemetry.reset() + with mock_cgroup_environment(self.tmp_dir) as mock_environment: + if mock_commands is not None: + for command in mock_commands: + mock_environment.add_command(command) + configurator.mocks = mock_environment + if initialize: + if not enable: + with patch.object(configurator, "enable"): + configurator.initialize() + else: + configurator.initialize() + yield configurator + + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @patch('time.sleep', side_effect=lambda _: mock_sleep()) + def test_start_extension_command_should_not_use_fallback_option_if_extension_fails(self, *args): + self.assertTrue(i_am_root(), "Test does not run when non-root") + + with self._get_cgroup_configurator() as configurator: + pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below + + command = "ls folder_does_not_exist" + + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with self.assertRaises(ExtensionError) as context_manager: + configurator.start_extension_command( + extension_name="Microsoft.Compute.TestExtension-1.2.3", + command=command, + cmd_name="test", + timeout=300, + shell=True, + cwd=self.tmp_dir, + env={}, + stdout=stdout, + stderr=stderr) + + extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if command in args[0]] + + self.assertEqual(1, len(extension_calls), "The extension should have been invoked exactly once") + self.assertIn("systemd-run", extension_calls[0], + "The first call to the extension should have used systemd") + + self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginUnknownFailure) + self.assertIn("Non-zero exit code", ustr(context_manager.exception)) + # The scope name should appear in the process output since systemd-run was invoked and stderr + # wasn't truncated. + self.assertIn("Running scope as unit", ustr(context_manager.exception)) + + @skip_if_predicate_true(is_python_version_26_or_34, "Disabled on Python 2.6 and 3.4 for now. Need to revisit to fix it") + @patch('time.sleep', side_effect=lambda _: mock_sleep()) + @patch("azurelinuxagent.ga.extensionprocessutil.TELEMETRY_MESSAGE_MAX_LEN", 5) + def test_start_extension_command_should_not_use_fallback_option_if_extension_fails_with_long_output(self, *args): + self.assertTrue(i_am_root(), "Test does not run when non-root") + + with self._get_cgroup_configurator() as configurator: + pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below + + long_output = "a"*20 # large enough to ensure both stdout and stderr are truncated + long_stdout_stderr_command = "echo {0} && echo {0} >&2 && ls folder_does_not_exist".format(long_output) + + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: + with self.assertRaises(ExtensionError) as context_manager: + configurator.start_extension_command( + extension_name="Microsoft.Compute.TestExtension-1.2.3", + command=long_stdout_stderr_command, + cmd_name="test", + timeout=300, + shell=True, + cwd=self.tmp_dir, + env={}, + stdout=stdout, + stderr=stderr) + + extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if long_stdout_stderr_command in args[0]] + + self.assertEqual(1, len(extension_calls), "The extension should have been invoked exactly once") + self.assertIn("systemd-run", extension_calls[0], + "The first call to the extension should have used systemd") + + self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginUnknownFailure) + self.assertIn("Non-zero exit code", ustr(context_manager.exception)) + # stdout and stderr should have been truncated, so the scope name doesn't appear in stderr + # even though systemd-run ran + self.assertNotIn("Running scope as unit", ustr(context_manager.exception)) + + def test_start_extension_command_should_not_use_fallback_option_if_extension_times_out(self, *args): # pylint: disable=unused-argument + self.assertTrue(i_am_root(), "Test does not run when non-root") + + with self._get_cgroup_configurator() as configurator: + pass # release the mocks used to create the test CGroupConfigurator so that they do not conflict the mock Popen below + + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: + with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: + with patch("azurelinuxagent.ga.extensionprocessutil.wait_for_process_completion_or_timeout", + return_value=[True, None, 0]): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupsApi._is_systemd_failure", + return_value=False): + with self.assertRaises(ExtensionError) as context_manager: + configurator.start_extension_command( + extension_name="Microsoft.Compute.TestExtension-1.2.3", + command="date", + cmd_name="test", + timeout=300, + shell=True, + cwd=self.tmp_dir, + env={}, + stdout=stdout, + stderr=stderr) + + self.assertEqual(context_manager.exception.code, ExtensionErrorCodes.PluginHandlerScriptTimedout) + self.assertIn("Timeout", ustr(context_manager.exception)) diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 62bd11099d..95b2427bce 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -63,10 +63,6 @@ SUCCESS_CODE_FROM_STATUS_FILE = 1 -def do_not_run_test(): - return True - - def raise_system_exception(): raise Exception diff --git a/tests/test_agent.py b/tests/test_agent.py index f892f090e2..cbf223aa52 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -17,6 +17,8 @@ import os.path +import azurelinuxagent.common.logger as logger + from azurelinuxagent.agent import parse_args, Agent, usage, AgentCommands from azurelinuxagent.common import conf from azurelinuxagent.ga import logcollector, cgroupconfigurator @@ -101,6 +103,11 @@ class TestAgent(AgentTestCase): + def tearDown(self): + # These tests instantiate the Agent class, which has the side effect + # of initializing the global logger and conf objects; reset them. + logger.DEFAULT_LOGGER = logger.Logger() + conf.__conf__.values = {} def test_accepts_configuration_path(self): conf_path = os.path.join(data_dir, "test_waagent.conf") From c96a7f4d35e24d1a71b3b499512e091d2af77490 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 7 Mar 2024 14:44:40 -0800 Subject: [PATCH 183/240] Fix pytest warnings (#3084) Co-authored-by: narrieta --- azurelinuxagent/agent.py | 2 +- azurelinuxagent/common/osutil/default.py | 6 +++--- azurelinuxagent/common/osutil/freebsd.py | 4 ++-- azurelinuxagent/common/utils/flexible_version.py | 2 +- azurelinuxagent/common/version.py | 14 +++++++------- azurelinuxagent/ga/env.py | 6 +++--- azurelinuxagent/ga/update.py | 2 +- azurelinuxagent/pa/rdma/centos.py | 6 +++--- azurelinuxagent/pa/rdma/rdma.py | 4 ++-- azurelinuxagent/pa/rdma/ubuntu.py | 10 +++++----- ci/pytest.ini | 3 +++ ci/pytest.sh | 4 ++-- tests/common/test_singletonperthread.py | 14 +++++++------- tests/lib/tools.py | 2 -- 14 files changed, 40 insertions(+), 39 deletions(-) create mode 100644 ci/pytest.ini diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 0dae70f195..ee68bd678a 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -328,7 +328,7 @@ def parse_args(sys_args): if arg == "": # Don't parse an empty parameter continue - m = re.match("^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg) # pylint: disable=W1401 + m = re.match(r"^(?:[-/]*)configuration-path:([\w/\.\-_]+)", arg) if not m is None: conf_file_path = m.group(1) if not os.path.exists(conf_file_path): diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index dac85d8c18..3b9c504e76 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -69,7 +69,7 @@ def crypt(password, salt): if needed. """ -_IPTABLES_VERSION_PATTERN = re.compile("^[^\d\.]*([\d\.]+).*$") # pylint: disable=W1401 +_IPTABLES_VERSION_PATTERN = re.compile(r"^[^\d\.]*([\d\.]+).*$") _IPTABLES_LOCKING_VERSION = FlexibleVersion('1.4.21') @@ -117,7 +117,7 @@ def get_firewall_delete_conntrack_drop_command(wait, destination): "--ctstate", "INVALID,NEW", "-j", "DROP"]) -PACKET_PATTERN = "^\s*(\d+)\s+(\d+)\s+DROP\s+.*{0}[^\d]*$" # pylint: disable=W1401 +PACKET_PATTERN = r"^\s*(\d+)\s+(\d+)\s+DROP\s+.*{0}[^\d]*$" ALL_CPUS_REGEX = re.compile('^cpu .*') ALL_MEMS_REGEX = re.compile('^Mem.*') @@ -134,7 +134,7 @@ def get_firewall_delete_conntrack_drop_command(wait, destination): IOCTL_SIOCGIFHWADDR = 0x8927 IFNAMSIZ = 16 -IP_COMMAND_OUTPUT = re.compile('^\d+:\s+(\w+):\s+(.*)$') # pylint: disable=W1401 +IP_COMMAND_OUTPUT = re.compile(r'^\d+:\s+(\w+):\s+(.*)$') STORAGE_DEVICE_PATH = '/sys/bus/vmbus/devices/' GEN2_DEVICE_ID = 'f8b3781a-1e82-4818-a1c3-63d806ec15bb' diff --git a/azurelinuxagent/common/osutil/freebsd.py b/azurelinuxagent/common/osutil/freebsd.py index d2adc00275..1fcfa91677 100644 --- a/azurelinuxagent/common/osutil/freebsd.py +++ b/azurelinuxagent/common/osutil/freebsd.py @@ -551,7 +551,7 @@ def device_for_ide_port(self, port_id): err, output = shellutil.run_get_output(cmd_search_blkvsc) if err == 0: output = output.rstrip() - cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) # pylint: disable=W1401 + cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \\( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) err, output = shellutil.run_get_output(cmd_search_dev) if err == 0: for possible in output.rstrip().split(','): @@ -562,7 +562,7 @@ def device_for_ide_port(self, port_id): err, output = shellutil.run_get_output(cmd_search_storvsc) if err == 0: output = output.rstrip() - cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) # pylint: disable=W1401 + cmd_search_dev = "camcontrol devlist | grep {0} | awk -F \\( '{{print $2}}'|sed -e 's/.*(//'| sed -e 's/).*//'".format(output) err, output = shellutil.run_get_output(cmd_search_dev) if err == 0: for possible in output.rstrip().split(','): diff --git a/azurelinuxagent/common/utils/flexible_version.py b/azurelinuxagent/common/utils/flexible_version.py index 83762eb850..40fd5306c4 100644 --- a/azurelinuxagent/common/utils/flexible_version.py +++ b/azurelinuxagent/common/utils/flexible_version.py @@ -196,7 +196,7 @@ def _compile_pattern(self): if self.prerel_tags: tags = '|'.join(re.escape(tag) for tag in self.prerel_tags) self.prerel_tags_set = dict(zip(self.prerel_tags, range(len(self.prerel_tags)))) - release_re = '(?:{prerel_sep}(?P<{tn}>{tags})(?P<{nn}>\d*))?'.format( # pylint: disable=W1401 + release_re = r'(?:{prerel_sep}(?P<{tn}>{tags})(?P<{nn}>\d*))?'.format( prerel_sep=self._re_prerel_sep, tags=tags, tn=self._nn_prerel_tag, diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index ff9c903b93..98065489c3 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -62,8 +62,8 @@ def get_f5_platform(): the version and product information is contained in the /VERSION file. """ result = [None, None, None, None] - f5_version = re.compile("^Version: (\d+\.\d+\.\d+)") # pylint: disable=W1401 - f5_product = re.compile("^Product: ([\w-]+)") # pylint: disable=W1401 + f5_version = re.compile(r"^Version: (\d+\.\d+\.\d+)") + f5_product = re.compile(r"^Product: ([\w-]+)") with open('/VERSION', 'r') as fh: content = fh.readlines() @@ -105,15 +105,15 @@ def get_checkpoint_platform(): def get_distro(): if 'FreeBSD' in platform.system(): - release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401 + release = re.sub(r'\-.*\Z', '', ustr(platform.release())) osinfo = ['freebsd', release, '', 'freebsd'] elif 'OpenBSD' in platform.system(): - release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401 + release = re.sub(r'\-.*\Z', '', ustr(platform.release())) osinfo = ['openbsd', release, '', 'openbsd'] elif 'Linux' in platform.system(): osinfo = get_linux_distribution(0, 'alpine') elif 'NS-BSD' in platform.system(): - release = re.sub('\-.*\Z', '', ustr(platform.release())) # pylint: disable=W1401 + release = re.sub(r'\-.*\Z', '', ustr(platform.release())) osinfo = ['nsbsd', release, '', 'nsbsd'] else: try: @@ -222,13 +222,13 @@ def has_logrotate(): AGENT_PATTERN = "{0}-(.*)".format(AGENT_NAME) AGENT_NAME_PATTERN = re.compile(AGENT_PATTERN) -AGENT_PKG_PATTERN = re.compile(AGENT_PATTERN+"\.zip") # pylint: disable=W1401 +AGENT_PKG_PATTERN = re.compile(AGENT_PATTERN+r"\.zip") AGENT_DIR_PATTERN = re.compile(".*/{0}".format(AGENT_PATTERN)) # The execution mode of the VM - IAAS or PAAS. Linux VMs are only executed in IAAS mode. AGENT_EXECUTION_MODE = "IAAS" -EXT_HANDLER_PATTERN = b".*/WALinuxAgent-(\d+.\d+.\d+[.\d+]*).*-run-exthandlers" # pylint: disable=W1401 +EXT_HANDLER_PATTERN = br".*/WALinuxAgent-(\d+.\d+.\d+[.\d+]*).*-run-exthandlers" EXT_HANDLER_REGEX = re.compile(EXT_HANDLER_PATTERN) __distro__ = get_distro() diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 6b1dd451b8..fa02b64ae1 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -35,9 +35,9 @@ from azurelinuxagent.ga.periodic_operation import PeriodicOperation CACHE_PATTERNS = [ - re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) # pylint: disable=W1401 + re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), + re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), + re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) ] MAXIMUM_CACHED_FILES = 50 diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index fa554b0d85..845f096866 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -896,7 +896,7 @@ def _get_pid_parts(self): pid_file = conf.get_agent_pid_file_path() pid_dir = os.path.dirname(pid_file) pid_name = os.path.basename(pid_file) - pid_re = re.compile("(\d+)_{0}".format(re.escape(pid_name))) # pylint: disable=W1401 + pid_re = re.compile(r"(\d+)_{0}".format(re.escape(pid_name))) return pid_dir, pid_name, pid_re def _get_pid_files(self): diff --git a/azurelinuxagent/pa/rdma/centos.py b/azurelinuxagent/pa/rdma/centos.py index 5e82acf531..b02b785283 100644 --- a/azurelinuxagent/pa/rdma/centos.py +++ b/azurelinuxagent/pa/rdma/centos.py @@ -82,7 +82,7 @@ def is_rdma_package_up_to_date(self, pkg, fw_version): # Example match (pkg name, -, followed by 3 segments, fw_version and -): # - pkg=microsoft-hyper-v-rdma-4.1.0.142-20160323.x86_64 # - fw_version=142 - pattern = '{0}-(\d+\.){{3,}}({1})-'.format(self.rdma_user_mode_package_name, fw_version) # pylint: disable=W1401 + pattern = r'{0}-(\d+\.){{3,}}({1})-'.format(self.rdma_user_mode_package_name, fw_version) return re.match(pattern, pkg) @staticmethod @@ -155,7 +155,7 @@ def install_rdma_drivers(self, fw_version): # Install kernel mode driver (kmod-microsoft-hyper-v-rdma-*) kmod_pkg = self.get_file_by_pattern( - pkgs, "%s-(\d+\.){3,}(%s)-\d{8}\.x86_64.rpm" % (self.rdma_kernel_mode_package_name, fw_version)) # pylint: disable=W1401 + pkgs, r"%s-(\d+\.){3,}(%s)-\d{8}\.x86_64.rpm" % (self.rdma_kernel_mode_package_name, fw_version)) if not kmod_pkg: raise Exception("RDMA kernel mode package not found") kmod_pkg_path = os.path.join(pkg_dir, kmod_pkg) @@ -164,7 +164,7 @@ def install_rdma_drivers(self, fw_version): # Install user mode driver (microsoft-hyper-v-rdma-*) umod_pkg = self.get_file_by_pattern( - pkgs, "%s-(\d+\.){3,}(%s)-\d{8}\.x86_64.rpm" % (self.rdma_user_mode_package_name, fw_version)) # pylint: disable=W1401 + pkgs, r"%s-(\d+\.){3,}(%s)-\d{8}\.x86_64.rpm" % (self.rdma_user_mode_package_name, fw_version)) if not umod_pkg: raise Exception("RDMA user mode package not found") umod_pkg_path = os.path.join(pkg_dir, umod_pkg) diff --git a/azurelinuxagent/pa/rdma/rdma.py b/azurelinuxagent/pa/rdma/rdma.py index aabd05541e..edd6f2b555 100644 --- a/azurelinuxagent/pa/rdma/rdma.py +++ b/azurelinuxagent/pa/rdma/rdma.py @@ -246,7 +246,7 @@ def provision_network_direct_rdma(self): return retcode, out = shellutil.run_get_output("modinfo %s" % module_name) if retcode == 0: - version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) # pylint: disable=W1401 + version = re.search(r"version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) if version: v1 = int(version.groups(0)[0]) v2 = int(version.groups(0)[1]) @@ -473,7 +473,7 @@ def update_dat_conf(paths, ipv4_addr): @staticmethod def replace_dat_conf_contents(cfg, ipv4_addr): - old = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"\S+ 0\"" # pylint: disable=W1401 + old = r"ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"\S+ 0\"" new = "ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 \"{0} 0\"".format( ipv4_addr) return re.sub(old, new, cfg) diff --git a/azurelinuxagent/pa/rdma/ubuntu.py b/azurelinuxagent/pa/rdma/ubuntu.py index bef152f2e4..dd8652197d 100644 --- a/azurelinuxagent/pa/rdma/ubuntu.py +++ b/azurelinuxagent/pa/rdma/ubuntu.py @@ -37,7 +37,7 @@ def install_driver(self): logger.error("RDMA: Could not determine firmware version. No driver will be installed") return #replace . with _, we are looking for number like 144_0 - nd_version = re.sub('\.', '_', nd_version) # pylint: disable=W1401 + nd_version = re.sub(r'\.', '_', nd_version) #Check to see if we need to reconfigure driver status,module_name = shellutil.run_get_output('modprobe -R hv_network_direct', chk_err=False) @@ -79,13 +79,13 @@ def install_driver(self): status,output = shellutil.run_get_output('apt-cache show --no-all-versions linux-azure') if status != 0: return - r = re.search('Version: (\S+)', output) # pylint: disable=W1401 + r = re.search(r'Version: (\S+)', output) if not r: logger.error("RDMA: version not found in package linux-azure.") return package_version = r.groups()[0] #Remove the ending . after - package_version = re.sub("\.\d+$", "", package_version) # pylint: disable=W1401 + package_version = re.sub(r"\.\d+$", "", package_version) logger.info('RDMA: kernel_version=%s package_version=%s' % (kernel_version, package_version)) kernel_version_array = [ int(x) for x in kernel_version.split('.') ] @@ -111,9 +111,9 @@ def update_modprobed_conf(self, nd_version): with open(modprobed_file, 'r') as f: lines = f.read() - r = re.search('alias hv_network_direct hv_network_direct_\S+', lines) # pylint: disable=W1401 + r = re.search(r'alias hv_network_direct hv_network_direct_\S+', lines) if r: - lines = re.sub('alias hv_network_direct hv_network_direct_\S+', 'alias hv_network_direct hv_network_direct_%s' % nd_version, lines) # pylint: disable=W1401 + lines = re.sub(r'alias hv_network_direct hv_network_direct_\S+', 'alias hv_network_direct hv_network_direct_%s' % nd_version, lines) else: lines += '\nalias hv_network_direct hv_network_direct_%s\n' % nd_version with open('/etc/modprobe.d/vmbus-rdma.conf', 'w') as f: diff --git a/ci/pytest.ini b/ci/pytest.ini new file mode 100644 index 0000000000..7e0cb25396 --- /dev/null +++ b/ci/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore:distro.linux_distribution\(\) is deprecated diff --git a/ci/pytest.sh b/ci/pytest.sh index c166e5853e..de240a3b89 100755 --- a/ci/pytest.sh +++ b/ci/pytest.sh @@ -7,13 +7,13 @@ EXIT_CODE=0 echo "=========================================" echo "**** pytest *** non-sudo tests ****" echo "=========================================" -pytest --ignore-glob '*/test_cgroupconfigurator_sudo.py' --verbose tests || EXIT_CODE=$(($EXIT_CODE || $?)) +pytest --verbose --config-file ci/pytest.ini --ignore-glob '*/test_cgroupconfigurator_sudo.py' tests || EXIT_CODE=$(($EXIT_CODE || $?)) echo EXIT_CODE pytests non-sudo = $EXIT_CODE echo "=========================================" echo "**** pytest *** sudo tests ****" echo "=========================================" -sudo env "PATH=$PATH" pytest --verbose tests/ga/test_cgroupconfigurator_sudo.py || EXIT_CODE=$(($EXIT_CODE || $?)) +sudo env "PATH=$PATH" pytest --verbose --config-file ci/pytest.ini tests/ga/test_cgroupconfigurator_sudo.py || EXIT_CODE=$(($EXIT_CODE || $?)) echo EXIT_CODE pytests sudo = $EXIT_CODE exit "$EXIT_CODE" diff --git a/tests/common/test_singletonperthread.py b/tests/common/test_singletonperthread.py index 80dedcb7a3..91681f2bd7 100644 --- a/tests/common/test_singletonperthread.py +++ b/tests/common/test_singletonperthread.py @@ -6,7 +6,7 @@ from tests.lib.tools import AgentTestCase, clear_singleton_instances -class TestClassToTestSingletonPerThread(SingletonPerThread): +class Singleton(SingletonPerThread): """ Since these tests deal with testing in a multithreaded environment, we employ the use of multiprocessing.Queue() to ensure that the data is consistent. @@ -47,7 +47,7 @@ def setUp(self): # In a multi-threaded environment, exceptions thrown in the child thread will not be propagated to the parent # thread. In order to achieve that, adding all exceptions to a Queue and then checking that in parent thread. self.errors = Queue() - clear_singleton_instances(TestClassToTestSingletonPerThread) + clear_singleton_instances(Singleton) def _setup_multithread_and_execute(self, func1, args1, func2, args2, t1_name=None, t2_name=None): @@ -69,7 +69,7 @@ def _setup_multithread_and_execute(self, func1, args1, func2, args2, t1_name=Non @staticmethod def _get_test_class_instance(q, err): try: - obj = TestClassToTestSingletonPerThread() + obj = Singleton() q.put(obj) except Exception as e: err.put(str(e)) @@ -91,8 +91,8 @@ def check_obj(name): return t1_object, t2_object def test_it_should_have_only_one_instance_for_same_thread(self): - obj1 = TestClassToTestSingletonPerThread() - obj2 = TestClassToTestSingletonPerThread() + obj1 = Singleton() + obj2 = Singleton() self.assertEqual(obj1.uuid, obj2.uuid) @@ -137,7 +137,7 @@ def test_singleton_object_should_match_thread_name(self): t1_name = str(uuid.uuid4()) t2_name = str(uuid.uuid4()) - test_class_obj_name = lambda t_name: "%s__%s" % (TestClassToTestSingletonPerThread.__name__, t_name) + test_class_obj_name = lambda t_name: "%s__%s" % (Singleton.__name__, t_name) self._setup_multithread_and_execute(func1=self._get_test_class_instance, args1=(instances, self.errors), @@ -146,7 +146,7 @@ def test_singleton_object_should_match_thread_name(self): t1_name=t1_name, t2_name=t2_name) - singleton_instances = TestClassToTestSingletonPerThread._instances # pylint: disable=no-member + singleton_instances = Singleton._instances # pylint: disable=no-member # Assert instance names are consistent with the thread names self.assertIn(test_class_obj_name(t1_name), singleton_instances) diff --git a/tests/lib/tools.py b/tests/lib/tools.py index 194850ee22..dd0d961724 100644 --- a/tests/lib/tools.py +++ b/tests/lib/tools.py @@ -156,8 +156,6 @@ def setUpClass(cls): cls.assertIsNone = cls.emulate_assertIsNone if not hasattr(cls, "assertIsNotNone"): cls.assertIsNotNone = cls.emulate_assertIsNotNone - if hasattr(cls, "assertRaisesRegexp"): - cls.assertRaisesRegex = cls.assertRaisesRegexp if not hasattr(cls, "assertRaisesRegex"): cls.assertRaisesRegex = cls.emulate_raises_regex if not hasattr(cls, "assertListEqual"): From 3b2c905a545e09d6403a7117733352da6bb32eac Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 8 Mar 2024 11:45:31 -0800 Subject: [PATCH 184/240] update setup (#3088) --- tests_e2e/tests/agent_update/self_update.py | 3 +-- tests_e2e/tests/scripts/agent_update-self_update_test_setup | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests_e2e/tests/agent_update/self_update.py b/tests_e2e/tests/agent_update/self_update.py index 2aedb72f41..947c26ecc8 100644 --- a/tests_e2e/tests/agent_update/self_update.py +++ b/tests_e2e/tests/agent_update/self_update.py @@ -123,13 +123,12 @@ def _check_agent_version(latest_version: str) -> bool: else: return False - waagent_version: str = "" log.info("Verifying agent updated to latest version: {0}".format(latest_version)) success: bool = retry_if_false(lambda: _check_agent_version(latest_version), delay=60) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) if not success: fail("Guest agent didn't update to latest version {0} but found \n {1}".format( latest_version, waagent_version)) - waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info( f"Successfully verified agent updated to latest version. Current agent version running:\n {waagent_version}") diff --git a/tests_e2e/tests/scripts/agent_update-self_update_test_setup b/tests_e2e/tests/scripts/agent_update-self_update_test_setup index 512beb322b..22a0f4becb 100755 --- a/tests_e2e/tests/scripts/agent_update-self_update_test_setup +++ b/tests_e2e/tests/scripts/agent_update-self_update_test_setup @@ -61,10 +61,13 @@ if [ "$#" -ne 0 ] || [ -z ${package+x} ] || [ -z ${version+x} ]; then fi echo "updating the related to self-update flags" -update-waagent-conf AutoUpdate.UpdateToLatestVersion=$update_to_latest_version Debug.EnableGAVersioning=n Debug.SelfUpdateHotfixFrequency=120 Debug.SelfUpdateRegularFrequency=120 Autoupdate.Frequency=120 +update-waagent-conf AutoUpdate.UpdateToLatestVersion=$update_to_latest_version AutoUpdate.GAFamily=Test Debug.EnableGAVersioning=n Debug.SelfUpdateHotfixFrequency=120 Debug.SelfUpdateRegularFrequency=120 Autoupdate.Frequency=120 agent-service stop mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log +# Some distros may pre-install higher version than custom version that test installs, so we need to lower the version to install custom version +agent_update-modify_agent_version 2.2.53 + echo "Cleaning up the existing agents" rm -rf /var/lib/waagent/WALinuxAgent-* From e72f9e8a3fc44e1738dd70273e35215cd971f51c Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 11 Mar 2024 13:04:08 -0700 Subject: [PATCH 185/240] Add keyvault test to daily run + Specify tests suite as a list (#3089) Co-authored-by: narrieta --- .../orchestrator/lib/agent_test_loader.py | 22 ++------------ .../lib/agent_test_suite_combinator.py | 26 +++++++++------- tests_e2e/orchestrator/runbook.yml | 30 +++++++++++++++++-- 3 files changed, 46 insertions(+), 32 deletions(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index 11e665c13f..f952f1160b 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -88,7 +88,7 @@ class AgentTestLoader(object): """ Loads a given set of test suites from the YAML configuration files. """ - def __init__(self, test_suites: str, cloud: str): + def __init__(self, test_suites: List[str], cloud: str): """ Loads the specified 'test_suites', which are given as a string of comma-separated suite names or a YAML description of a single test_suite. @@ -175,25 +175,9 @@ def _parse_image(image: str) -> str: if suite_skip_image not in self.images: raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {suite_skip_image} in images.yml") - @staticmethod - def _load_test_suites(test_suites: str) -> List[TestSuiteInfo]: - # - # Attempt to parse 'test_suites' as the YML description of a single suite - # - parsed = yaml.safe_load(test_suites) - - # - # A comma-separated list (e.g. "foo", "foo, bar", etc.) is valid YAML, but it is parsed as a string. An actual test suite would - # be parsed as a dictionary. If it is a dict, take is as the YML description of a single test suite - # - if isinstance(parsed, dict): - return [AgentTestLoader._load_test_suite(parsed)] - - # - # If test_suites is not YML, then it should be a comma-separated list of description files - # - description_files: List[Path] = [AgentTestLoader._SOURCE_CODE_ROOT/"test_suites"/f"{t.strip()}.yml" for t in test_suites.split(',')] + def _load_test_suites(test_suites: List[str]) -> List[TestSuiteInfo]: + description_files: List[Path] = [AgentTestLoader._SOURCE_CODE_ROOT/"test_suites"/f"{t}.yml" for t in test_suites] return [AgentTestLoader._load_test_suite(f) for f in description_files] @staticmethod diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index ffecaf3630..1450398c8d 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -46,6 +46,7 @@ class AgentTestSuitesCombinatorSchema(schema.Combinator): resource_group_name: str = field(default_factory=str, metadata=field_metadata(required=True)) subscription_id: str = field(default_factory=str, metadata=field_metadata(required=True)) test_suites: str = field(default_factory=str, metadata=field_metadata(required=True)) + default_test_suites: List[str] = field(default_factory=list, metadata=field_metadata(required=True)) user: str = field(default_factory=str, metadata=field_metadata(required=True)) vm_name: str = field(default_factory=str, metadata=field_metadata(required=True)) vm_size: str = field(default_factory=str, metadata=field_metadata(required=True)) @@ -81,20 +82,25 @@ def __init__(self, runbook: AgentTestSuitesCombinatorSchema) -> None: if self.runbook.resource_group_name == '': raise Exception("Invalid runbook parameters: The 'vmss_name' parameter indicates an existing VMSS, a 'resource_group_name' must be specified.") + if self.runbook.test_suites != "": + test_suites = [t.strip() for t in self.runbook.test_suites.split(',')] + else: + test_suites = self.runbook.default_test_suites + self._log: logging.Logger = logging.getLogger("lisa") with set_thread_name("AgentTestSuitesCombinator"): if self.runbook.vm_name != '': - self._environments = [self.create_existing_vm_environment()] + self._environments = [self.create_existing_vm_environment(test_suites)] elif self.runbook.vmss_name != '': - self._environments = [self.create_existing_vmss_environment()] + self._environments = [self.create_existing_vmss_environment(test_suites)] else: - self._environments = self.create_environment_list() + self._environments = self.create_environment_list(test_suites) self._index = 0 @classmethod def type_name(cls) -> str: - return "agent_test_suites" + return "agent_test_suite_combinator" @classmethod def type_schema(cls) -> Type[schema.TypedSchema]: @@ -125,7 +131,7 @@ def _next(self) -> Optional[Dict[str, Any]]: "AzureUSGovernment": "usgovarizona", } - def create_environment_list(self) -> List[Dict[str, Any]]: + def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]]: """ Examines the test_suites specified in the runbook and returns a list of the environments (i.e. test VMs or scale sets) that need to be created in order to execute these suites. @@ -136,7 +142,7 @@ def create_environment_list(self) -> List[Dict[str, Any]]: environments: List[Dict[str, Any]] = [] shared_environments: Dict[str, Dict[str, Any]] = {} # environments shared by multiple test suites - loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + loader = AgentTestLoader(test_suites, self.runbook.cloud) runbook_images = self._get_runbook_images(loader) @@ -260,8 +266,8 @@ def create_environment_list(self) -> List[Dict[str, Any]]: return environments - def create_existing_vm_environment(self) -> Dict[str, Any]: - loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + def create_existing_vm_environment(self, test_suites: List[str]) -> Dict[str, Any]: + loader = AgentTestLoader(test_suites, self.runbook.cloud) vm: VirtualMachineClient = VirtualMachineClient( cloud=self.runbook.cloud, @@ -300,8 +306,8 @@ def create_existing_vm_environment(self) -> Dict[str, Any]: "c_test_suites": loader.test_suites, } - def create_existing_vmss_environment(self) -> Dict[str, Any]: - loader = AgentTestLoader(self.runbook.test_suites, self.runbook.cloud) + def create_existing_vmss_environment(self, test_suites: List[str]) -> Dict[str, Any]: + loader = AgentTestLoader(test_suites, self.runbook.cloud) vmss = VirtualMachineScaleSetClient( cloud=self.runbook.cloud, diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 8b0ef37ec7..722ceba61d 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -26,10 +26,33 @@ variable: is_case_visible: true # - # Test suites to execute + # Test suites to execute. + # + # Use "test_suites" to specify from the command-line the test suites to execute. If not specifies, the "default_test_suites" are executed. # - name: test_suites - value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips, agent_ext_workflow, agent_status, multi_config_ext, agent_cgroups, ext_cgroups, agent_firewall, ext_telemetry_pipeline, ext_sequencing, agent_persist_firewall, publish_hostname, agent_update, recover_network_interface" + value: "" + + - name: default_test_suites + value: + - agent_bvt + - agent_cgroups + - agent_ext_workflow + - agent_firewall + - agent_not_provisioned + - agent_persist_firewall + - agent_status + - agent_update + - ext_cgroups + - extensions_disabled + - ext_sequencing + - ext_telemetry_pipeline + - fips + - keyvault_certificates + - multi_config_ext + - no_outbound_connections + - publish_hostname + - recover_network_interface # # Parameters used to create test VMs @@ -183,7 +206,7 @@ environment: $(c_environment) platform: $(c_platform) combinator: - type: agent_test_suites + type: agent_test_suite_combinator allow_ssh: $(allow_ssh) cloud: $(cloud) identity_file: $(identity_file) @@ -193,6 +216,7 @@ combinator: resource_group_name: $(resource_group_name) subscription_id: $(subscription_id) test_suites: $(test_suites) + default_test_suites: $(default_test_suites) user: $(user) vm_name: $(vm_name) vm_size: $(vm_size) From ee6eb7d290e29b547f9c3858dcb81e95e7cf824f Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:10:43 -0700 Subject: [PATCH 186/240] ignore case (#3093) --- tests_e2e/pipeline/pipeline-cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index 69e929be50..d8d894612f 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -52,5 +52,5 @@ steps: --url-parameters api-version=2021-04-01 \$expand=createdTime \ --output json \ --query value \ - | jq --arg date "$date" '.[] | select (.createdTime < $date).name | match("'${pattern}'"; "g").string' \ + | jq --arg date "$date" '.[] | select (.createdTime < $date).name | match("'${pattern}'"; "i").string' \ | xargs -l -t -r az group delete --subscription "${subscription_id}" --no-wait -y -n From 5d40813bf13753d10654ef8be14626dcd887ade9 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 18 Mar 2024 13:31:03 -0700 Subject: [PATCH 187/240] Add retry on keyvault test (#3095) * Add retry on keyvault test * newline --------- Co-authored-by: narrieta --- .../test_suites/keyvault_certificates.yml | 8 +++++- .../keyvault_certificates.py | 27 ++++++++++++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/tests_e2e/test_suites/keyvault_certificates.yml b/tests_e2e/test_suites/keyvault_certificates.yml index 00c51db7d2..c63a4be1f7 100644 --- a/tests_e2e/test_suites/keyvault_certificates.yml +++ b/tests_e2e/test_suites/keyvault_certificates.yml @@ -1,5 +1,10 @@ # -# This test verifies that the Agent can download and extract KeyVault certificates that use different encryption algorithms +# This test verifies that the Agent can download and extract KeyVault certificates that use different encryption +# algorithms (currently RSA and EC). +# +# The test needs exclusive use of the VM because support for EC certificates was added on version 2.10. Daemons +# older than that version will fail to parse the certificates, and go on an infinite loop when fetching the goal +# state. # name: "KeyvaultCertificates" tests: @@ -7,3 +12,4 @@ tests: images: - "endorsed" - "endorsed-arm64" +owns_vm: true diff --git a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py index 7be3f272c0..0638eda305 100755 --- a/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py +++ b/tests_e2e/tests/keyvault_certificates/keyvault_certificates.py @@ -20,6 +20,9 @@ # # This test verifies that the Agent can download and extract KeyVault certificates that use different encryption algorithms (currently EC and RSA). # +import datetime +import time + from assertpy import fail from tests_e2e.tests.lib.agent_test import AgentVmTest @@ -82,13 +85,23 @@ def run(self): log.info("Reapplying the goal state to ensure the test certificates are downloaded.") self._context.vm.reapply() - try: - output = ssh_client.run_command(f"ls {expected_certificates}", use_sudo=True) - log.info("Found all the expected certificates:\n%s", output) - except CommandError as error: - if error.stdout != "": - log.info("Found some of the expected certificates:\n%s", error.stdout) - fail(f"Failed to find certificates\n{error.stderr}") + # If the goal state includes only the certificates, but no extensions, the update/reapply operations may complete before the Agent has downloaded the certificates + # so we retry for a few minutes to ensure the certificates are downloaded. + timed_out = datetime.datetime.utcnow() + datetime.timedelta(minutes=5) + while True: + try: + output = ssh_client.run_command(f"ls {expected_certificates}", use_sudo=True) + log.info("Found all the expected certificates:\n%s", output) + break + except CommandError as error: + if error.stdout == "": + if datetime.datetime.utcnow() < timed_out: + log.info("The certificates have not been downloaded yet, will retry after a short delay.") + time.sleep(30) + continue + else: + log.info("Found some of the expected certificates:\n%s", error.stdout) + fail(f"Failed to find certificates\n{error.stderr}") if __name__ == "__main__": From d98580381e569f3bd8a17e2fd9ae747491a6d414 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 19 Mar 2024 13:53:53 -0700 Subject: [PATCH 188/240] Reboot Vm if CSE timesout so logs are collected (#3097) --- .../recover_network_interface.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests_e2e/tests/recover_network_interface/recover_network_interface.py b/tests_e2e/tests/recover_network_interface/recover_network_interface.py index 39799d3752..8ea8f8ea12 100644 --- a/tests_e2e/tests/recover_network_interface/recover_network_interface.py +++ b/tests_e2e/tests/recover_network_interface/recover_network_interface.py @@ -103,7 +103,15 @@ def run(self): log.info("") log.info("Using CSE to bring the primary network interface down and call the OSUtil to bring the interface back up. Command to execute: {0}".format(script)) custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript") - custom_script.enable(protected_settings={'commandToExecute': script}, settings={}) + try: + custom_script.enable(protected_settings={'commandToExecute': script}, settings={}) + except TimeoutError: + # Custom script may timeout if attempt to recover the network interface was not successful. The agent won't + # be able to report status for the extension if network is down. Reboot the VM to bring the network back up + # so logs can be collected. + log.info("Custom script did not complete within the timeout. Rebooting the VM in attempt to bring the network interface back up...") + self._context.vm.restart(wait_for_boot=True, ssh_client=self._ssh_client) + fail("Custom script did not complete within the timoeut, which indicates the agent may be unable to report status due to network issues.") # Check that the interface was down and brought back up in instance view log.info("") From af77271c56f4f173cbb20781d6b3dced2f91b66c Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 20 Mar 2024 17:29:02 -0700 Subject: [PATCH 189/240] LogCollector should skip and log warning for files that don't exist (#3098) * Skip collection on files that do not exist * Fix pylint * Separate error handling * log file to collect --- azurelinuxagent/ga/logcollector.py | 59 +++++++++++++++++++----------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/azurelinuxagent/ga/logcollector.py b/azurelinuxagent/ga/logcollector.py index e21b1f51f1..eda6106b65 100644 --- a/azurelinuxagent/ga/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -304,23 +304,27 @@ def _get_final_list_for_archive(self, priority_file_queue): final_files_to_collect = [] while priority_file_queue: - file_path = heappop(priority_file_queue)[1] # (priority, file_path) - file_size = min(os.path.getsize(file_path), _FILE_SIZE_LIMIT) - - if total_uncompressed_size + file_size > _UNCOMPRESSED_ARCHIVE_SIZE_LIMIT: - _LOGGER.warning("Archive too big, done with adding files.") - break - - if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT: - final_files_to_collect.append(file_path) - _LOGGER.info("Adding file %s, size %s b", file_path, file_size) - else: - truncated_file_path = self._truncate_large_file(file_path) - if truncated_file_path: - _LOGGER.info("Adding truncated file %s, size %s b", truncated_file_path, file_size) - final_files_to_collect.append(truncated_file_path) - - total_uncompressed_size += file_size + try: + file_path = heappop(priority_file_queue)[1] # (priority, file_path) + file_size = min(os.path.getsize(file_path), _FILE_SIZE_LIMIT) + + if total_uncompressed_size + file_size > _UNCOMPRESSED_ARCHIVE_SIZE_LIMIT: + _LOGGER.warning("Archive too big, done with adding files.") + break + + if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT: + final_files_to_collect.append(file_path) + _LOGGER.info("Adding file %s, size %s b", file_path, file_size) + else: + truncated_file_path = self._truncate_large_file(file_path) + if truncated_file_path: + _LOGGER.info("Adding truncated file %s, size %s b", truncated_file_path, file_size) + final_files_to_collect.append(truncated_file_path) + + total_uncompressed_size += file_size + except IOError as e: + if e.errno == 2: # [Errno 2] No such file or directory + _LOGGER.warning("File %s does not exist, skipping collection for this file", file_path) _LOGGER.info("Uncompressed archive size is %s b", total_uncompressed_size) @@ -357,21 +361,32 @@ def collect_logs_and_get_archive(self): compressed_archive = None + def handle_add_file_to_archive_error(error_count, max_errors, file_to_collect, exception): + error_count += 1 + if error_count >= max_errors: + raise Exception("Too many errors, giving up. Last error: {0}".format(ustr(exception))) + else: + _LOGGER.warning("Failed to add file %s to the archive: %s", file_to_collect, ustr(exception)) + return error_count + try: compressed_archive = zipfile.ZipFile(COMPRESSED_ARCHIVE_PATH, "w", compression=zipfile.ZIP_DEFLATED) max_errors = 8 error_count = 0 + for file_to_collect in files_to_collect: try: archive_file_name = LogCollector._convert_file_name_to_archive_name(file_to_collect) compressed_archive.write(file_to_collect.encode("utf-8"), arcname=archive_file_name) - except Exception as e: - error_count += 1 - if error_count >= max_errors: - raise Exception("Too many errors, giving up. Last error: {0}".format(ustr(e))) + except IOError as e: + if e.errno == 2: # [Errno 2] No such file or directory + _LOGGER.warning("File %s does not exist, skipping collection for this file", + file_to_collect) else: - _LOGGER.warning("Failed to add file %s to the archive: %s", file_to_collect, ustr(e)) + error_count = handle_add_file_to_archive_error(error_count, max_errors, file_to_collect, e) + except Exception as e: + error_count = handle_add_file_to_archive_error(error_count, max_errors, file_to_collect, e) compressed_archive_size = os.path.getsize(COMPRESSED_ARCHIVE_PATH) _LOGGER.info("Successfully compressed files. Compressed archive size is %s b", compressed_archive_size) From c125edc1bbc93f77fdab0736ccdbc7bb7e662b5a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Wed, 20 Mar 2024 19:07:30 -0700 Subject: [PATCH 190/240] wait for provision to complete before install test agent (#3094) * wait for provision to complete * address comments --- tests_e2e/orchestrator/scripts/install-agent | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index d28164f6d3..240be052ad 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -85,19 +85,19 @@ echo "Agent Version:" $python "$waagent" --version echo "Service Status:" -# Sometimes the service can take a while to start; give it a few minutes, +# We need to wait for the provisioning code to complete before stopping the agent's service to do the test setup started=false for i in {1..6} do - if service-status $service_name; then + if [[ -f /var/lib/waagent/provisioned ]]; then started=true break fi - echo "Waiting for service to start..." + echo "Waiting for agent to complete provisioning." sleep 30 done if [ $started == false ]; then - echo "Service failed to start." + echo "Provisioning did not complete within the given timeout (cannot find /var/lib/waagent/provisioned)" exit 1 fi From 101cc1ed10dc12af0a370a0e8d0d5981427cc6d8 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 28 Mar 2024 09:25:17 -0700 Subject: [PATCH 191/240] agent publish refactor (#3091) * agent publish refactor * support arm 64vm * convert dict to str * address comments * pylint * new comments * updated comment --- .../orchestrator/lib/agent_test_suite.py | 26 +++- tests_e2e/orchestrator/runbook.yml | 6 + tests_e2e/pipeline/pipeline.yml | 6 + tests_e2e/pipeline/scripts/execute_tests.sh | 4 + .../tests/agent_publish/agent_publish.py | 127 ++++++++++++++---- tests_e2e/tests/agent_update/rsm_update.py | 75 +---------- tests_e2e/tests/lib/agent_test.py | 2 + tests_e2e/tests/lib/agent_update_helpers.py | 93 +++++++++++++ .../scripts/agent_publish-check_update.py | 31 +++-- 9 files changed, 263 insertions(+), 107 deletions(-) create mode 100644 tests_e2e/tests/lib/agent_update_helpers.py diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 022bfe8304..799ea441bf 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -149,6 +149,8 @@ def __init__(self, metadata: TestSuiteMetadata) -> None: self._test_suites: List[AgentTestSuite] # Test suites to execute in the environment + self._test_args: Dict[str, str] # Additional arguments pass to the test suite + self._cloud: str # Azure cloud where test VMs are located self._subscription_id: str # Azure subscription where test VMs are located self._location: str # Azure location (region) where test VMs are located @@ -209,6 +211,7 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_ self._environment_name = variables["c_env_name"] self._test_suites = variables["c_test_suites"] + self._test_args = self._get_test_args(variables["test_args"]) self._cloud = variables["cloud"] self._subscription_id = variables["subscription_id"] @@ -812,12 +815,15 @@ def _create_test_context(self,) -> AgentTestContext: subscription=self._subscription_id, resource_group=self._resource_group_name, name=self._vm_name) - return AgentVmTestContext( + vm_test_context = AgentVmTestContext( working_directory=self._working_directory, vm=vm, ip_address=self._vm_ip_address, username=self._user, identity_file=self._identity_file) + for key in self._test_args: + setattr(vm_test_context, key, self._test_args[key]) + return vm_test_context else: log.info("Creating test context for scale set") if self._create_scale_set: @@ -836,11 +842,27 @@ def _create_test_context(self,) -> AgentTestContext: if self._create_scale_set: self._test_nodes = [_TestNode(name=i.instance_name, ip_address=i.ip_address) for i in scale_set.get_instances_ip_address()] - return AgentVmssTestContext( + vmss_test_context = AgentVmssTestContext( working_directory=self._working_directory, vmss=scale_set, username=self._user, identity_file=self._identity_file) + for key in self._test_args: + setattr(vmss_test_context, key, self._test_args[key]) + return vmss_test_context + + @staticmethod + def _get_test_args(arg_str) -> Dict[str, str]: + """ + Returns the arguments to be passed to the test classes + """ + test_args: Dict[str, str] = {} + if arg_str == "": + return test_args + for arg in arg_str.split(','): + key, value = map(str.strip, arg.split('=')) + test_args[key] = value + return test_args @staticmethod def _mark_log_as_failed(): diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 722ceba61d..9365e46162 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -55,6 +55,12 @@ variable: - recover_network_interface # + # Additional arguments pass to the test suites + # + - name: test_args + value: "" + is_case_visible: true + # Parameters used to create test VMs # - name: subscription_id diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 35d3fe4c14..8a7971456d 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -19,6 +19,11 @@ parameters: type: string default: "-" + - name: test_args + displayName: Test Args (additional arguments pass to the test suites. Comma-separated list of key=value pairs) + type: string + default: "-" + - name: image displayName: Image (image/image set name, URN, or VHD) type: string @@ -121,6 +126,7 @@ jobs: KEEP_ENVIRONMENT: ${{ parameters.keep_environment }} LOCATION: ${{ parameters.location }} TEST_SUITES: ${{ parameters.test_suites }} + TEST_ARGS: ${{ parameters.test_args }} VM_SIZE: ${{ parameters.vm_size }} - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/collect_artifacts.sh diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index 9c185b333c..d2d2f874c4 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -54,6 +54,9 @@ if [[ $TEST_SUITES == "-" ]]; then else TEST_SUITES="-v test_suites:\"$TEST_SUITES\"" fi +if [[ $TEST_ARGS == "-" ]]; then + TEST_ARGS="" +fi if [[ $IMAGE == "-" ]]; then IMAGE="" fi @@ -92,4 +95,5 @@ docker run --rm \ -v location:\"$LOCATION\" \ -v vm_size:\"$VM_SIZE\" \ -v allow_ssh:\"$IP_ADDRESS\" \ + -v test_args:\"$TEST_ARGS\" \ $TEST_SUITES" diff --git a/tests_e2e/tests/agent_publish/agent_publish.py b/tests_e2e/tests/agent_publish/agent_publish.py index 0cf51c3311..83c3f71607 100644 --- a/tests_e2e/tests/agent_publish/agent_publish.py +++ b/tests_e2e/tests/agent_publish/agent_publish.py @@ -18,10 +18,13 @@ # import uuid from datetime import datetime -from typing import Any, Dict, List + +from assertpy import fail from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.agent_update_helpers import request_rsm_update +from tests_e2e.tests.lib.retry import retry_if_false from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds, VmExtensionIdentifier from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.ssh_client import SshClient @@ -36,40 +39,126 @@ class AgentPublishTest(AgentVmTest): def __init__(self, context: AgentVmTestContext): super().__init__(context) self._ssh_client: SshClient = self._context.create_ssh_client() + self._published_version = self._get_published_version() def run(self): """ we run the scenario in the following steps: 1. Print the current agent version before the update 2. Prepare the agent for the update - 3. Check for agent update from the log - 4. Print the agent version after the update - 5. Ensure CSE is working + 3. Check for agent update from the log and waagent version + 4. Ensure CSE is working """ self._get_agent_info() - self._prepare_agent() - self._check_update() - self._get_agent_info() + + log.info("Testing rsm update flow....") + self._prepare_agent_for_rsm_update() + self._check_update_from_log() + self._verify_current_agent_version() + self._check_cse() + + log.info("Testing self update flow....") + self._prepare_agent_for_self_update() + self._check_update_from_log() + self._verify_current_agent_version() + self._check_cse() def get_ignore_errors_before_timestamp(self) -> datetime: timestamp = self._ssh_client.run_command("agent_publish-get_agent_log_record_timestamp.py") return datetime.strptime(timestamp.strip(), u'%Y-%m-%d %H:%M:%S.%f') + def _get_published_version(self): + """ + Gets version from test_args if provided, else use the release version from source code version.py + """ + if hasattr(self._context, "published_version"): + return self._context.published_version + + version = self._ssh_client.run_command("pypy3 -c 'from azurelinuxagent.common.version import AGENT_VERSION; print(AGENT_VERSION)'").rstrip() + return version + def _get_agent_info(self) -> None: stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True) log.info('Agent info \n%s', stdout) - def _prepare_agent(self) -> None: + def _verify_agent_reported_supported_feature_flag(self): + """ + RSM update rely on supported feature flag that agent sends to CRP.So, checking if GA reports feature flag from reported status + """ + log.info( + "Executing verify_versioning_supported_feature.py remote script to verify agent reported supported feature flag, so that CRP can send RSM update request") + self._run_remote_test(self._ssh_client, "agent_update-verify_versioning_supported_feature.py", use_sudo=True) + log.info("Successfully verified that Agent reported VersioningGovernance supported feature flag") + + def _check_rsm_gs(self, requested_version: str) -> None: + # This checks if RSM GS available to the agent after we send the rsm update request + log.info( + 'Executing wait_for_rsm_gs.py remote script to verify latest GS contain requested version after rsm update requested') + self._run_remote_test(self._ssh_client, f"agent_update-wait_for_rsm_gs.py --version {requested_version}", + use_sudo=True) + log.info('Verified latest GS contain requested version after rsm update requested') + + def _prepare_agent_for_rsm_update(self) -> None: + """ + This method prepares the agent for the RSM update + """ + # First we update the agent to latest version like prod + # Next send RSM update request for new published test version + log.info( + 'Updating agent config flags to allow and download test versions') + output: str = self._ssh_client.run_command( + "update-waagent-conf AutoUpdate.Enabled=y AutoUpdate.UpdateToLatestVersion=y", use_sudo=True) + log.info('Successfully updated agent update config \n %s', output) + + self._verify_agent_reported_supported_feature_flag() + arch_type = self._ssh_client.get_architecture() + request_rsm_update(self._published_version, self._context.vm, arch_type) + self._check_rsm_gs(self._published_version) + + output: str = self._ssh_client.run_command( + "update-waagent-conf Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) + log.info('Successfully enabled rsm updates \n %s', output) + + def _prepare_agent_for_self_update(self) -> None: + """ + This method prepares the agent for the self update + """ log.info("Modifying agent update related config flags and renaming the log file") - self._run_remote_test(self._ssh_client, "sh -c 'agent-service stop && mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log && update-waagent-conf AutoUpdate.UpdateToLatestVersion=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y'", use_sudo=True) - log.info('Renamed log file and updated agent-update DownloadNewAgents GAFamily config flags') + setup_script = ("agent-service stop && mv /var/log/waagent.log /var/log/waagent.$(date --iso-8601=seconds).log && " + "rm -rf /var/lib/waagent/WALinuxAgent-* && " + "update-waagent-conf AutoUpdate.UpdateToLatestVersion=y AutoUpdate.GAFamily=Test AutoUpdate.Enabled=y Extensions.Enabled=y Debug.EnableGAVersioning=n") + self._run_remote_test(self._ssh_client, f"sh -c '{setup_script}'", use_sudo=True) + log.info('Renamed log file and updated self-update config flags') - def _check_update(self) -> None: + def _check_update_from_log(self) -> None: log.info("Verifying for agent update status") - self._run_remote_test(self._ssh_client, "agent_publish-check_update.py") + self._run_remote_test(self._ssh_client, f"agent_publish-check_update.py --published-version {self._published_version}") log.info('Successfully checked the agent update') + def _verify_current_agent_version(self) -> None: + """ + Verify current agent version running on published version + """ + + def _check_agent_version(version: str) -> bool: + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + expected_version = f"Goal state agent: {version}" + if expected_version in waagent_version: + return True + else: + return False + + waagent_version: str = "" + log.info("Verifying agent updated to published version: {0}".format(self._published_version)) + success: bool = retry_if_false(lambda: _check_agent_version(self._published_version)) + if not success: + fail("Guest agent didn't update to published version {0} but found \n {1}. \n ".format( + self._published_version, waagent_version)) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + log.info( + f"Successfully verified agent updated to published version. Current agent version running:\n {waagent_version}") + def _check_cse(self) -> None: custom_script_2_1 = VirtualMachineExtensionClient( self._context.vm, @@ -86,20 +175,6 @@ def _check_cse(self) -> None: ) custom_script_2_1.assert_instance_view(expected_version="2.1", expected_message=message) - def get_ignore_error_rules(self) -> List[Dict[str, Any]]: - ignore_rules = [ - # - # This is expected as latest version can be the less than test version - # - # WARNING ExtHandler ExtHandler Agent WALinuxAgent-9.9.9.9 is permanently blacklisted - # - { - 'message': r"Agent WALinuxAgent-9.9.9.9 is permanently blacklisted" - } - - ] - return ignore_rules - if __name__ == "__main__": AgentPublishTest.run_from_command_line() diff --git a/tests_e2e/tests/agent_update/rsm_update.py b/tests_e2e/tests/agent_update/rsm_update.py index 86ff7b5e96..ad2222d11e 100644 --- a/tests_e2e/tests/agent_update/rsm_update.py +++ b/tests_e2e/tests/agent_update/rsm_update.py @@ -23,22 +23,16 @@ # The test verifies agent update for rsm workflow. This test covers three scenarios downgrade, upgrade and no update. # For each scenario, we initiate the rsm request with target version and then verify agent updated to that target version. # -import json import re from typing import List, Dict, Any -import requests from assertpy import assert_that, fail -from azure.identity import DefaultAzureCredential -from azure.mgmt.compute.models import VirtualMachine -from msrestazure.azure_cloud import Cloud from tests_e2e.tests.lib.agent_test import AgentVmTest from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext -from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.agent_update_helpers import request_rsm_update from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false -from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient class RsmUpdateBvt(AgentVmTest): @@ -71,6 +65,7 @@ def get_ignore_error_rules(self) -> List[Dict[str, Any]]: return ignore_rules def run(self) -> None: + arch_type = self._ssh_client.get_architecture() # retrieve the installed agent version in the vm before run the scenario self._retrieve_installed_agent_version() # Allow agent to send supported feature flag @@ -81,7 +76,7 @@ def run(self) -> None: log.info("Current agent version running on the vm before update is \n%s", stdout) self._downgrade_version: str = "2.3.15.0" log.info("Attempting downgrade version %s", self._downgrade_version) - self._request_rsm_update(self._downgrade_version) + request_rsm_update(self._downgrade_version, self._context.vm, arch_type) self._check_rsm_gs(self._downgrade_version) self._prepare_agent() # Verify downgrade scenario @@ -94,7 +89,7 @@ def run(self) -> None: log.info("Current agent version running on the vm before update is \n%s", stdout) upgrade_version: str = "2.3.15.1" log.info("Attempting upgrade version %s", upgrade_version) - self._request_rsm_update(upgrade_version) + request_rsm_update(upgrade_version, self._context.vm, arch_type) self._check_rsm_gs(upgrade_version) self._verify_guest_agent_update(upgrade_version) self._verify_agent_reported_update_status(upgrade_version) @@ -105,7 +100,7 @@ def run(self) -> None: log.info("Current agent version running on the vm before update is \n%s", stdout) current_version: str = "2.3.15.1" log.info("Attempting update version same as current version %s", current_version) - self._request_rsm_update(current_version) + request_rsm_update(current_version, self._context.vm, arch_type) self._check_rsm_gs(current_version) self._verify_guest_agent_update(current_version) self._verify_agent_reported_update_status(current_version) @@ -117,7 +112,7 @@ def run(self) -> None: log.info("Current agent version running on the vm before update is \n%s", stdout) version: str = "1.5.0.0" log.info("Attempting requested version %s", version) - self._request_rsm_update(version) + request_rsm_update(version, self._context.vm, arch_type) self._check_rsm_gs(version) self._verify_no_guest_agent_update(version) self._verify_agent_reported_update_status(version) @@ -146,64 +141,6 @@ def _prepare_agent(self) -> None: "update-waagent-conf AutoUpdate.UpdateToLatestVersion=y Debug.EnableGAVersioning=y AutoUpdate.GAFamily=Test", use_sudo=True) log.info('Successfully updated agent update config \n %s', output) - @staticmethod - def _verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: - result: VirtualMachine = vm.get_model() - flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates - if flag is None: - return False - return flag - - def _enable_agent_update_flag(self, vm: VirtualMachineClient) -> None: - osprofile = { - "location": self._context.vm.location, # location is required field - "properties": { - "osProfile": { - "linuxConfiguration": { - "enableVMAgentPlatformUpdates": True - } - } - } - } - log.info("updating the vm with osProfile property:\n%s", osprofile) - vm.update(osprofile) - - def _request_rsm_update(self, requested_version: str) -> None: - """ - This method is to simulate the rsm request. - First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api - """ - if not self._verify_agent_update_flag_enabled(self._context.vm): - # enable the flag - log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") - self._enable_agent_update_flag(self._context.vm) - log.info("Updated the enableVMAgentPlatformUpdates flag to True") - else: - log.info("Already enableVMAgentPlatformUpdates flag set to True") - - cloud: Cloud = AZURE_CLOUDS[self._context.vm.cloud] - credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) - token = credential.get_token(cloud.endpoints.resource_manager + "/.default") - headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} - # Later this api call will be replaced by azure-python-sdk wrapper - base_url = cloud.endpoints.resource_manager - url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ - "UpgradeVMAgent?api-version=2022-08-01".format(self._context.vm.subscription, - self._context.vm.resource_group, - self._context.vm.name) - data = { - "target": "Microsoft.OSTCLinuxAgent.Test", - "targetVersion": requested_version - } - - log.info("Attempting rsm upgrade post request to endpoint: {0} with data: {1}".format(url, data)) - response = requests.post(url, data=json.dumps(data), headers=headers, timeout=300) - if response.status_code == 202: - log.info("RSM upgrade request accepted") - else: - raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format( - response.status_code, response.content)) - def _verify_guest_agent_update(self, requested_version: str) -> None: """ Verify current agent version running on rsm requested version diff --git a/tests_e2e/tests/lib/agent_test.py b/tests_e2e/tests/lib/agent_test.py index 0021a8d74b..e4f73d725f 100644 --- a/tests_e2e/tests/lib/agent_test.py +++ b/tests_e2e/tests/lib/agent_test.py @@ -73,6 +73,8 @@ def run_from_command_line(cls): """ Convenience method to execute the test when it is being invoked directly from the command line (as opposed as being invoked from a test framework or library.) + + TODO: Need to implement for reading test specific arguments from command line """ try: if issubclass(cls, AgentVmTest): diff --git a/tests_e2e/tests/lib/agent_update_helpers.py b/tests_e2e/tests/lib/agent_update_helpers.py new file mode 100644 index 0000000000..d48d47bf4e --- /dev/null +++ b/tests_e2e/tests/lib/agent_update_helpers.py @@ -0,0 +1,93 @@ +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json + +import requests +from azure.identity import DefaultAzureCredential +from msrestazure.azure_cloud import Cloud +from azure.mgmt.compute.models import VirtualMachine + +from tests_e2e.tests.lib.azure_clouds import AZURE_CLOUDS +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient + +# Helper methods for agent update/publish tests + + +def verify_agent_update_flag_enabled(vm: VirtualMachineClient) -> bool: + result: VirtualMachine = vm.get_model() + flag: bool = result.os_profile.linux_configuration.enable_vm_agent_platform_updates + if flag is None: + return False + return flag + + +def enable_agent_update_flag(vm: VirtualMachineClient) -> None: + osprofile = { + "location": vm.location, # location is required field + "properties": { + "osProfile": { + "linuxConfiguration": { + "enableVMAgentPlatformUpdates": True + } + } + } + } + log.info("updating the vm with osProfile property:\n%s", osprofile) + vm.update(osprofile) + + +def request_rsm_update(requested_version: str, vm: VirtualMachineClient, arch_type) -> None: + """ + This method is to simulate the rsm request. + First we ensure the PlatformUpdates enabled in the vm and then make a request using rest api + """ + if not verify_agent_update_flag_enabled(vm): + # enable the flag + log.info("Attempting vm update to set the enableVMAgentPlatformUpdates flag") + enable_agent_update_flag(vm) + log.info("Updated the enableVMAgentPlatformUpdates flag to True") + else: + log.info("Already enableVMAgentPlatformUpdates flag set to True") + + cloud: Cloud = AZURE_CLOUDS[vm.cloud] + credential: DefaultAzureCredential = DefaultAzureCredential(authority=cloud.endpoints.active_directory) + token = credential.get_token(cloud.endpoints.resource_manager + "/.default") + headers = {'Authorization': 'Bearer ' + token.token, 'Content-Type': 'application/json'} + # Later this api call will be replaced by azure-python-sdk wrapper + base_url = cloud.endpoints.resource_manager + url = base_url + "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Compute/virtualMachines/{2}/" \ + "UpgradeVMAgent?api-version=2022-08-01".format(vm.subscription, + vm.resource_group, + vm.name) + if arch_type == "aarch64": + data = { + "target": "Microsoft.OSTCLinuxAgent.ARM64Test", + "targetVersion": requested_version + } + else: + data = { + "target": "Microsoft.OSTCLinuxAgent.Test", + "targetVersion": requested_version, + } + + log.info("Attempting rsm upgrade post request to endpoint: {0} with data: {1}".format(url, data)) + response = requests.post(url, data=json.dumps(data), headers=headers, timeout=300) + if response.status_code == 202: + log.info("RSM upgrade request accepted") + else: + raise Exception("Error occurred while making RSM upgrade request. Status code : {0} and msg: {1}".format( + response.status_code, response.content)) \ No newline at end of file diff --git a/tests_e2e/tests/scripts/agent_publish-check_update.py b/tests_e2e/tests/scripts/agent_publish-check_update.py index 38ae00a909..ab5eb73569 100755 --- a/tests_e2e/tests/scripts/agent_publish-check_update.py +++ b/tests_e2e/tests/scripts/agent_publish-check_update.py @@ -1,5 +1,5 @@ #!/usr/bin/env pypy3 - +import argparse # Microsoft Azure Linux Agent # # Copyright 2018 Microsoft Corporation @@ -53,6 +53,13 @@ """ _UPDATE_PATTERN_03 = re.compile(r'(.*Agent) update found, exiting current process to (\S*) to the new Agent version (\S*)') +""" +Current Agent 2.8.9.9 completed all update checks, exiting current process to upgrade to the new Agent version 2.10.0.7 +('2.8.9.9', 'upgrade', '2.10.0.7') +""" +_UPDATE_PATTERN_04 = re.compile(r'Current Agent (\S*) completed all update checks, exiting current process to (\S*) to the new Agent version (\S*)') + + """ > Agent WALinuxAgent-2.2.47 is running as the goal state agent ('2.2.47',) @@ -60,7 +67,7 @@ _RUNNING_PATTERN_00 = re.compile(r'.*Agent\sWALinuxAgent-(\S*)\sis running as the goal state agent') -def verify_agent_update_from_log(): +def verify_agent_update_from_log(published_version: str) -> bool: exit_code = 0 detected_update = False @@ -73,16 +80,17 @@ def verify_agent_update_from_log(): if 'TelemetryData' in record.text: continue - for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03]: - update_match = re.match(p, record.text) + for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03, _UPDATE_PATTERN_04]: + update_match = re.match(p, record.message) if update_match: - detected_update = True update_version = update_match.groups()[2] - log.info('found the agent update log: %s', record.text) - break + if update_version == published_version: + detected_update = True + log.info('found the agent update log: %s', record.text) + break if detected_update: - running_match = re.match(_RUNNING_PATTERN_00, record.text) + running_match = re.match(_RUNNING_PATTERN_00, record.message) if running_match and update_version == running_match.groups()[0]: update_successful = True log.info('found the agent started new version log: %s', record.text) @@ -95,7 +103,7 @@ def verify_agent_update_from_log(): log.warning('update was not successful') exit_code = 1 else: - log.warning('update was not detected') + log.warning('update was not detected for version: %s', published_version) exit_code = 1 return exit_code == 0 @@ -103,7 +111,10 @@ def verify_agent_update_from_log(): # This method will trace agent update messages in the agent log and determine if the update was successful or not. def main(): - found: bool = retry_if_false(verify_agent_update_from_log) + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--published-version', required=True) + args = parser.parse_args() + found: bool = retry_if_false(lambda: verify_agent_update_from_log(args.published_version)) if not found: fail('update was not found in the logs') From b7cad481f0a0c8ff0080a3ca03186a9753d65e8b Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 28 Mar 2024 11:07:33 -0700 Subject: [PATCH 192/240] Add EnableFirewall to README (#3100) * Add EnableFirewall to README * change phrasing --------- Co-authored-by: narrieta --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a5b126f2f..fd6c9fe5ad 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ Waagent depends on some system packages in order to function properly: * Filesystem utilities: sfdisk, fdisk, mkfs, parted * Password tools: chpasswd, sudo * Text processing tools: sed, grep -* Network tools: ip-route +* Network tools: ip-route, iptables ## Installation @@ -568,6 +568,13 @@ OpenSSL commands. This signals OpenSSL to use any installed FIPS-compliant libra Note that the agent itself has no FIPS-specific code. _If no FIPS-compliant certificates are installed, then enabling this option will cause all OpenSSL commands to fail._ +#### __OS.EnableFirewall__ + +_Type: Boolean_ +_Default: n (set to 'y' in waagent.conf)_ + +Creates firewall rules to allow communication with the VM Host only by the Agent. + #### __OS.MonitorDhcpClientRestartPeriod__ _Type: Integer_ From 3f49315a57f8d27ac2569193923d33a5550d9401 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 28 Mar 2024 16:05:26 -0700 Subject: [PATCH 193/240] Add Ubuntu minimal to test run (#3102) * Add ubuntu minimal to test run * typo * suppress warnings --------- Co-authored-by: narrieta --- tests_e2e/test_suites/agent_firewall.yml | 5 ++++- tests_e2e/test_suites/agent_persist_firewall.yml | 4 +++- tests_e2e/test_suites/images.yml | 9 +++++++++ tests_e2e/tests/lib/agent_log.py | 14 +++++++++++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tests_e2e/test_suites/agent_firewall.yml b/tests_e2e/test_suites/agent_firewall.yml index 0e095ba39e..787c5173b4 100644 --- a/tests_e2e/test_suites/agent_firewall.yml +++ b/tests_e2e/test_suites/agent_firewall.yml @@ -12,4 +12,7 @@ tests: images: - "endorsed" - "endorsed-arm64" -owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. \ No newline at end of file +owns_vm: true # This vm cannot be shared with other tests because it modifies the firewall rules and agent status. +skip_on_images: + - "ubuntu_2204_minimal" # TODO: Currently Ubuntu minimal does not include the 'iptables' command. Remove it once this has been addressed. + - "ubuntu_2404_minimal" diff --git a/tests_e2e/test_suites/agent_persist_firewall.yml b/tests_e2e/test_suites/agent_persist_firewall.yml index 137f3af87e..f749046a23 100644 --- a/tests_e2e/test_suites/agent_persist_firewall.yml +++ b/tests_e2e/test_suites/agent_persist_firewall.yml @@ -14,6 +14,8 @@ owns_vm: true # This vm cannot be shared with other tests because it modifies t # so skipping the test run on flatcar distro. # (2023-11-14T19:04:13.738695Z ERROR ExtHandler ExtHandler Unable to setup the persistent firewall rules: [Errno 30] Read-only file system: '/lib/systemd/system/waagent-network-setup.service) skip_on_images: + - "debian_9" # TODO: Reboot is slow on debian_9. Need to investigate further. - "flatcar" - "flatcar_arm64" - - "debian_9" # TODO: Reboot is slow on debian_9. Need to investigate further. \ No newline at end of file + - "ubuntu_2204_minimal" # TODO: Currently Ubuntu minimal does not include the 'iptables' command. Remove it once this has been addressed. + - "ubuntu_2404_minimal" diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 03c1bfd77c..fb6cc7f1fc 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -28,7 +28,9 @@ image-sets: - "ubuntu_1804" - "ubuntu_2004" - "ubuntu_2204" + - "ubuntu_2204_minimal" - "ubuntu_2404" + - "ubuntu_2404_minimal" # Endorsed distros (ARM64) that are tested on the daily runs endorsed-arm64: @@ -178,9 +180,16 @@ images: locations: AzureChinaCloud: [] AzureUSGovernment: [] + ubuntu_2204_minimal: "Canonical 0001-com-ubuntu-minimal-jammy minimal-22_04-lts-gen2 latest" ubuntu_2404: # TODO: Currently using the daily build, update to the release build once it is available urn: "Canonical 0001-com-ubuntu-server-noble-daily 24_04-daily-lts-gen2 latest" locations: AzureChinaCloud: [] AzureUSGovernment: [] + ubuntu_2404_minimal: + # TODO: Currently using the daily build, update to the release build once it is available + urn: "Canonical ubuntu-24_04-lts-daily minimal latest" + locations: + AzureChinaCloud: [] + AzureUSGovernment: [] diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 60d42ec75c..83f77b1ea2 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -367,7 +367,19 @@ def get_errors(self) -> List[AgentLogRecord]: { 'message': r"AutoUpdate.Enabled property is \*\*Deprecated\*\* now but it's set to different value from AutoUpdate.UpdateToLatestVersion", 'if': lambda r: r.prefix == 'ExtHandler' and r.thread == 'ExtHandler' - } + }, + # + # TODO: Currently Ubuntu minimal does not include the 'iptables' command. Remove this rule once this has been addressed. + # + # We don't have an easy way to distinguish Ubuntu minimal, so this rule suppresses for any Ubuntu. This is OK; if 'iptables' was missing from the regular Ubuntu images, the firewall tests would fail. + # + # 2024-03-27T16:12:35.666460Z ERROR ExtHandler ExtHandler Unable to setup the persistent firewall rules: Unable to determine version of iptables: [Errno 2] No such file or directory: 'iptables' + # 2024-03-27T16:12:35.667253Z WARNING ExtHandler ExtHandler Unable to determine version of iptables: [Errno 2] No such file or directory: 'iptables' + # + { + 'message': r"Unable to determine version of iptables: \[Errno 2\] No such file or directory: 'iptables'", + 'if': lambda r: DISTRO_NAME == 'ubuntu' + }, ] def is_error(r: AgentLogRecord) -> bool: From 782a165242a4b764249901f99db399aec198a30b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 4 Apr 2024 10:18:07 -0700 Subject: [PATCH 194/240] check for unexpected process in agent cgroups before cgroups enabled (#3103) * check for unexpected process in cgroup before enable * agent restart * move the process check * fix unit tests * address comments * pylint --- azurelinuxagent/ga/cgroupconfigurator.py | 71 +++++++++---- .../data/cgroups/proc_self_cgroup_azure_slice | 13 +++ tests/ga/test_cgroupconfigurator.py | 31 ++++++ tests_e2e/test_suites/agent_cgroups.yml | 4 +- .../agent_cgroups_process_check.py | 77 +++++++++++++++ tests_e2e/tests/lib/cgroup_helpers.py | 23 ++++- ...roups_process_check-cgroups_not_enabled.py | 60 +++++++++++ ...ups_process_check-unknown_process_check.py | 99 +++++++++++++++++++ .../agent_cpu_quota-check_agent_cpu_quota.py | 55 +++++------ 9 files changed, 378 insertions(+), 55 deletions(-) create mode 100644 tests/data/cgroups/proc_self_cgroup_azure_slice create mode 100644 tests_e2e/tests/agent_cgroups/agent_cgroups_process_check.py create mode 100755 tests_e2e/tests/scripts/agent_cgroups_process_check-cgroups_not_enabled.py create mode 100755 tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index 09eb8b55ad..ce86101e07 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -152,23 +152,9 @@ def initialize(self): return # This check is to reset the quotas if agent goes from cgroup supported to unsupported distros later in time. if not CGroupsApi.cgroups_supported(): - agent_drop_in_path = systemd.get_agent_drop_in_path() - try: - if os.path.exists(agent_drop_in_path) and os.path.isdir(agent_drop_in_path): - files_to_cleanup = [] - agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE) - agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path, - _DROP_IN_FILE_CPU_ACCOUNTING) - agent_drop_in_file_memory_accounting = os.path.join(agent_drop_in_path, - _DROP_IN_FILE_MEMORY_ACCOUNTING) - agent_drop_in_file_cpu_quota = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_QUOTA) - files_to_cleanup.extend([agent_drop_in_file_slice, agent_drop_in_file_cpu_accounting, - agent_drop_in_file_memory_accounting, agent_drop_in_file_cpu_quota]) - self.__cleanup_all_files(files_to_cleanup) - self.__reload_systemd_config() - logger.info("Agent reset the quotas if distro: {0} goes from supported to unsupported list", get_distro()) - except Exception as err: - logger.warn("Unable to delete Agent drop-in files while resetting the quotas: {0}".format(err)) + logger.info("Agent reset the quotas if distro: {0} goes from supported to unsupported list", + get_distro()) + self._reset_agent_cgroup_setup() # check whether cgroup monitoring is supported on the current distro self._cgroups_supported = CGroupsApi.cgroups_supported() @@ -200,6 +186,11 @@ def initialize(self): cpu_controller_root, memory_controller_root) + if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice): + reason = "Found unexpected processes in the agent cgroup before agent enable cgroups." + self.disable(reason, DisableCgroups.ALL) + return + if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None: self.enable() @@ -341,6 +332,25 @@ def __setup_azure_slice(): CGroupConfigurator._Impl.__reload_systemd_config() + def _reset_agent_cgroup_setup(self): + try: + agent_drop_in_path = systemd.get_agent_drop_in_path() + if os.path.exists(agent_drop_in_path) and os.path.isdir(agent_drop_in_path): + files_to_cleanup = [] + agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE) + agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path, + _DROP_IN_FILE_CPU_ACCOUNTING) + agent_drop_in_file_memory_accounting = os.path.join(agent_drop_in_path, + _DROP_IN_FILE_MEMORY_ACCOUNTING) + agent_drop_in_file_cpu_quota = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_QUOTA) + files_to_cleanup.extend([agent_drop_in_file_slice, agent_drop_in_file_cpu_accounting, + agent_drop_in_file_memory_accounting, agent_drop_in_file_cpu_quota]) + self.__cleanup_all_files(files_to_cleanup) + self.__reload_systemd_config() + except Exception as err: + logger.warn("Unable to delete Agent drop-in files while resetting the quotas: {0}".format(err)) + + @staticmethod def __reload_systemd_config(): # reload the systemd configuration; the new slices will be used once the agent's service restarts @@ -546,6 +556,26 @@ def __try_set_cpu_quota(quota): # pylint: disable=unused-private-member return False return True + def _check_fails_if_processes_found_in_agent_cgroup_before_enable(self, agent_slice): + """ + This check ensures that before we enable the agent's cgroups, there are no unexpected processes in the agent's cgroup already. + + The issue we observed that long running extension processes may be in agent cgroups if agent goes this cycle enabled(1)->disabled(2)->enabled(3). + 1. Agent cgroups enabled in some version + 2. Disabled agent cgroups due to check_cgroups regular check. Once we disable the cgroups we don't run the extensions in it's own slice, so they will be in agent cgroups. + 3. When ext_hanlder restart and enable the cgroups again, already running processes from step 2 still be in agent cgroups. This may cause the extensions run with agent limit. + """ + if agent_slice != AZURE_SLICE: + return False + try: + _log_cgroup_info("Checking for unexpected processes in the agent's cgroup before enabling cgroups") + self._check_processes_in_agent_cgroup() + except CGroupsException as exception: + _log_cgroup_warning(ustr(exception)) + return True + + return False + def check_cgroups(self, cgroup_metrics): self._check_cgroups_lock.acquire() try: @@ -591,6 +621,11 @@ def _check_processes_in_agent_cgroup(self): """ unexpected = [] agent_cgroup_proc_names = [] + # Now we call _check_processes_in_agent_cgroup before we enable the cgroups or any one of the controller is not mounted, agent cgroup paths can be None. + # so we need to check both. + cgroup_path = self._agent_cpu_cgroup_path if self._agent_cpu_cgroup_path is not None else self._agent_memory_cgroup_path + if cgroup_path is None: + return try: daemon = os.getppid() extension_handler = os.getpid() @@ -598,7 +633,7 @@ def _check_processes_in_agent_cgroup(self): agent_commands.update(shellutil.get_running_commands()) systemd_run_commands = set() systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) - agent_cgroup = CGroupsApi.get_processes_in_cgroup(self._agent_cpu_cgroup_path) + agent_cgroup = CGroupsApi.get_processes_in_cgroup(cgroup_path) # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup; agent_commands.update(shellutil.get_running_commands()) systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) diff --git a/tests/data/cgroups/proc_self_cgroup_azure_slice b/tests/data/cgroups/proc_self_cgroup_azure_slice new file mode 100644 index 0000000000..58df643b24 --- /dev/null +++ b/tests/data/cgroups/proc_self_cgroup_azure_slice @@ -0,0 +1,13 @@ +12:blkio:/azure.slice/walinuxagent.service +11:cpu,cpuacct:/azure.slice/walinuxagent.service +10:devices:/azure.slice/walinuxagent.service +9:pids:/azure.slice/walinuxagent.service +8:memory:/azure.slice/walinuxagent.service +7:freezer:/ +6:hugetlb:/ +5:perf_event:/ +4:net_cls,net_prio:/ +3:cpuset:/ +2:rdma:/ +1:name=systemd:/azure.slice/walinuxagent.service +0::/azure.slice/walinuxagent.service diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 82c86c956f..841a4b72d0 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -46,6 +46,10 @@ def tearDownClass(cls): CGroupConfigurator._instance = None AgentTestCase.tearDownClass() + def tearDown(self): + CGroupConfigurator._instance = None + AgentTestCase.tearDown(self) + @contextlib.contextmanager def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=None): CGroupConfigurator._instance = None @@ -905,6 +909,33 @@ def test_check_cgroups_should_disable_cgroups_when_a_check_fails(self): for p in patchers: p.stop() + @patch('azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl._check_processes_in_agent_cgroup', side_effect=CGroupsException("Test")) + @patch('azurelinuxagent.ga.cgroupconfigurator.add_event') + def test_agent_should_not_enable_cgroups_if_unexpected_process_already_in_agent_cgroups(self, add_event, _): + command_mocks = [MockCommand(r"^systemctl show walinuxagent\.service --property Slice", +'''Slice=azure.slice +''')] + original_read_file = fileutil.read_file + + def mock_read_file(filepath, **args): + if filepath == "/proc/self/cgroup": + filepath = os.path.join(data_dir, "cgroups", "proc_self_cgroup_azure_slice") + return original_read_file(filepath, **args) + + with self._get_cgroup_configurator(initialize=False, mock_commands=command_mocks) as configurator: + with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file): + configurator.initialize() + + self.assertFalse(configurator.enabled(), "Cgroups should not be enabled") + disable_events = [kwargs for _, kwargs in add_event.call_args_list if kwargs["op"] == WALAEventOperation.CGroupsDisabled] + self.assertTrue( + len(disable_events) == 1, + "Exactly 1 event should have been emitted. Got: {0}".format(disable_events)) + self.assertIn( + "Found unexpected processes in the agent cgroup before agent enable cgroups", + disable_events[0]["message"], + "The error message is not correct when process check failed") + def test_check_agent_memory_usage_should_raise_a_cgroups_exception_when_the_limit_is_exceeded(self): metrics = [MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, AGENT_NAME_TELEMETRY, conf.get_agent_memory_quota() + 1), MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, AGENT_NAME_TELEMETRY, conf.get_agent_memory_quota() + 1)] diff --git a/tests_e2e/test_suites/agent_cgroups.yml b/tests_e2e/test_suites/agent_cgroups.yml index d6d1fc0f17..7844e606f4 100644 --- a/tests_e2e/test_suites/agent_cgroups.yml +++ b/tests_e2e/test_suites/agent_cgroups.yml @@ -1,9 +1,11 @@ # -# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics. Also, it verifies the agent cpu quota is set as expected. +# The test suite verify the agent running in expected cgroups and also, checks agent tracking the cgroups for polling resource metrics, +# checks unexpected processes in the agent cgroups, and it verifies the agent cpu quota is set as expected. # name: "AgentCgroups" tests: - "agent_cgroups/agent_cgroups.py" - "agent_cgroups/agent_cpu_quota.py" + - "agent_cgroups/agent_cgroups_process_check.py" images: "cgroups-endorsed" owns_vm: true \ No newline at end of file diff --git a/tests_e2e/tests/agent_cgroups/agent_cgroups_process_check.py b/tests_e2e/tests/agent_cgroups/agent_cgroups_process_check.py new file mode 100644 index 0000000000..d0996caec8 --- /dev/null +++ b/tests_e2e/tests/agent_cgroups/agent_cgroups_process_check.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List, Dict, Any + +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds + + +class AgentCgroupsProcessCheck(AgentVmTest): + """ + Tests the agent's ability to detect processes that do not belong to the agent's cgroup + """ + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + + def run(self): + """ + Steps: + 1. Verify that agent detects processes that do not belong to the agent's cgroup and disable the cgroups + 2. Run the extension, so that they are run in the agent's cgroup + 3. Restart the ext_handler process to re-initialize the cgroups setup + 4. Verify that agent detects extension processes and will not enable the cgroups + """ + + log.info("=====Validating agent cgroups process check") + self._run_remote_test(self._ssh_client, "agent_cgroups_process_check-unknown_process_check.py", use_sudo=True) + + self._install_ama_extension() + + log.info("=====Validating agent cgroups not enabled") + self._run_remote_test(self._ssh_client, "agent_cgroups_process_check-cgroups_not_enabled.py", use_sudo=True) + + def _install_ama_extension(self): + ama_extension = VirtualMachineExtensionClient( + self._context.vm, VmExtensionIds.AzureMonitorLinuxAgent, + resource_name="AMAAgent") + log.info("Installing %s", ama_extension) + ama_extension.enable() + ama_extension.assert_instance_view() + + def get_ignore_error_rules(self) -> List[Dict[str, Any]]: + + ignore_rules = [ + # This is produced by the test, so it is expected + # Examples: + # 2024-04-01T19:16:11.929000Z INFO MonitorHandler ExtHandler [CGW] Disabling resource usage monitoring. Reason: Check on cgroups failed: + # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 2957] dd\x00if=/dev/zero\x00of=/dev/null\x00 '] + # 2024-04-01T19:17:04.995276Z WARNING ExtHandler ExtHandler [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 3285] /usr/bin/python3\x00/var/lib/waagent/Microsoft.Azure.Monitor.AzureM', '[PID: 3286] /usr/bin/python3\x00/var/lib/waagent/Microsoft.Azure.Monitor.AzureM'] + {'message': r"The agent's cgroup includes unexpected processes"}, + {'message': r"Found unexpected processes in the agent cgroup before agent enable cgroups"} + ] + return ignore_rules + + +if __name__ == "__main__": + AgentCgroupsProcessCheck.run_from_command_line() diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index 5c552ef19e..ef49284e15 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -1,3 +1,4 @@ +import datetime import os import re @@ -6,6 +7,7 @@ from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION +from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false @@ -146,10 +148,25 @@ def check_cgroup_disabled_with_unknown_process(): """ Returns True if the cgroup is disabled with unknown process """ + return check_log_message("Disabling resource usage monitoring. Reason: Check on cgroups failed:.+UNKNOWN") + + +def check_log_message(message, after_timestamp=datetime.datetime.min): + """ + Check if the log message is present after the given timestamp(if provided) in the agent log + """ + log.info("Checking log message: {0}".format(message)) for record in AgentLog().read(): - match = re.search("Disabling resource usage monitoring. Reason: Check on cgroups failed:.+UNKNOWN", - record.message, flags=re.DOTALL) - if match is not None: + match = re.search(message, record.message, flags=re.DOTALL) + if match is not None and record.timestamp > after_timestamp: log.info("Found message:\n\t%s", record.text.replace("\n", "\n\t")) return True return False + + +def get_unit_cgroup_paths(unit_name): + """ + Returns the cgroup paths for the given unit + """ + cgroups_api = SystemdCgroupsApi() + return cgroups_api.get_unit_cgroup_paths(unit_name) diff --git a/tests_e2e/tests/scripts/agent_cgroups_process_check-cgroups_not_enabled.py b/tests_e2e/tests/scripts/agent_cgroups_process_check-cgroups_not_enabled.py new file mode 100755 index 0000000000..a8db751e61 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_cgroups_process_check-cgroups_not_enabled.py @@ -0,0 +1,60 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script verifies agent detected unexpected processes in the agent cgroup before cgroup initialization + +from assertpy import fail + +from azurelinuxagent.common.utils import shellutil +from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + + +def restart_ext_handler(): + log.info("Restarting the extension handler") + shellutil.run_command(["pkill", "-f", "WALinuxAgent.*run-exthandler"]) + + +def verify_agent_cgroups_not_enabled(): + """ + Verifies that the agent cgroups not enabled when ama extension(unexpected) processes are found in the agent cgroup + """ + log.info("Verifying agent cgroups are not enabled") + + ama_process_found: bool = retry_if_false(lambda: check_log_message("The agent's cgroup includes unexpected processes:.+/var/lib/waagent/Microsoft.Azure.Monitor")) + if not ama_process_found: + fail("Agent failed to found ama extension processes in the agent cgroup") + + found: bool = retry_if_false(lambda: check_log_message("Found unexpected processes in the agent cgroup before agent enable cgroups")) + if not found: + fail("Agent failed to found unknown processes in the agent cgroup") + + disabled: bool = retry_if_false(check_agent_quota_disabled) + if not disabled: + fail("The agent failed to disable its CPUQuota when cgroups were not enabled") + + +def main(): + restart_ext_handler() + verify_agent_cgroups_not_enabled() + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py b/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py new file mode 100755 index 0000000000..d1b3014a03 --- /dev/null +++ b/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py @@ -0,0 +1,99 @@ +#!/usr/bin/env pypy3 +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script forces the process check by putting unknown process in the agent's cgroup + +import os +import subprocess +import datetime + +from assertpy import fail + +from azurelinuxagent.common.utils import shellutil +from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_paths, AGENT_SERVICE_NAME +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + + +def prepare_agent(): + check_time = datetime.datetime.utcnow() + log.info("Executing script update-waagent-conf to enable agent cgroups config flag") + result = shellutil.run_command(["update-waagent-conf", "Debug.CgroupCheckPeriod=20", "Debug.CgroupLogMetrics=y", + "Debug.CgroupDisableOnProcessCheckFailure=y", + "Debug.CgroupDisableOnQuotaCheckFailure=n"]) + log.info("Successfully enabled agent cgroups config flag: {0}".format(result)) + + found: bool = retry_if_false(lambda: check_log_message(" Agent cgroups enabled: True", after_timestamp=check_time)) + if not found: + fail("Agent cgroups not enabled") + + +def creating_dummy_process(): + log.info("Creating dummy process to add to agent's cgroup") + dd_command = ["sleep", "60m"] + proc = subprocess.Popen(dd_command) + return proc.pid + + +def remove_dummy_process(pid): + log.info("Removing dummy process from agent's cgroup") + shellutil.run_command(["kill", "-9", str(pid)]) + + +def disable_agent_cgroups_with_unknown_process(pid): + """ + Adding dummy process to the agent's cgroup and verifying that the agent detects the unknown process and disables cgroups + + Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process + """ + + def unknown_process_found(cpu_cgroup): + cgroup_procs_path = os.path.join(cpu_cgroup, "cgroup.procs") + log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path) + try: + with open(cgroup_procs_path, 'a') as f: + f.write("\n") + f.write(str(pid)) + except Exception as e: + log.warning("Error while adding process to cgroup.procs file: {0}".format(e)) + return False + + # The log message indicating the check failed is similar to + # 2021-03-29T23:33:15.603530Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed: + # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s'] + found: bool = retry_if_false(lambda: check_log_message( + "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes:.+{0}".format( + pid)), attempts=3) + return found and retry_if_false(check_agent_quota_disabled, attempts=3) + + cpu_cgroup, _ = get_unit_cgroup_paths(AGENT_SERVICE_NAME) + + found: bool = retry_if_false(lambda: unknown_process_found(cpu_cgroup), attempts=3) + if not found: + fail("The agent did not detect unknown process: {0}".format(pid)) + + +def main(): + prepare_agent() + pid = creating_dummy_process() + disable_agent_cgroups_with_unknown_process(pid) + remove_dummy_process(pid) + + +if __name__ == "__main__": + main() diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py index c8aad49f59..5dfc55be82 100755 --- a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py +++ b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py @@ -30,7 +30,7 @@ from azurelinuxagent.ga.cgroupconfigurator import _DROP_IN_FILE_CPU_QUOTA from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, \ - get_agent_cpu_quota + get_agent_cpu_quota, check_log_message from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.remote_test import run_remote_test from tests_e2e.tests.lib.retry import retry_if_false @@ -146,45 +146,18 @@ def wait_for_log_message(message, timeout=datetime.timedelta(minutes=5)): fail("The agent did not find [{0}] in its log within the allowed timeout".format(message)) -def verify_process_check_on_agent_cgroups(): - """ - This method checks agent detect unexpected processes in its cgroup and disables the CPUQuota - """ - log.info("***Verifying process check on agent cgroups") - log.info("Ensuring agent CPUQuota is enabled and backup the drop-in file to restore later in further tests") - if check_agent_quota_disabled(): - fail("The agent's CPUQuota is not enabled: {0}".format(get_agent_cpu_quota())) - quota_drop_in = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA) - quota_drop_in_backup = quota_drop_in + ".bk" - log.info("Backing up %s to %s...", quota_drop_in, quota_drop_in_backup) - shutil.copy(quota_drop_in, quota_drop_in_backup) - # - # Re-enable Process checks on cgroups and verify that the agent detects unexpected processes in its cgroup and disables the CPUQuota wehen - # that happens - # - shellutil.run_command(["update-waagent-conf", "Debug.CgroupDisableOnProcessCheckFailure=y"]) - - # The log message indicating the check failed is similar to - # 2021-03-29T23:33:15.603530Z INFO MonitorHandler ExtHandler Disabling resource usage monitoring. Reason: Check on cgroups failed: - # [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 25826] python3\x00/home/nam/Compute-Runtime-Tux-Pipeline/dungeon_crawler/s'] - wait_for_log_message( - "Disabling resource usage monitoring. Reason: Check on cgroups failed:.+The agent's cgroup includes unexpected processes") - disabled: bool = retry_if_false(check_agent_quota_disabled) - if not disabled: - fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) - - def verify_throttling_time_check_on_agent_cgroups(): """ This method checks agent disables its CPUQuota when it exceeds its throttling limit """ log.info("***Verifying CPU throttling check on agent cgroups") # Now disable the check on unexpected processes and enable the check on throttledtime and verify that the agent disables its CPUQuota when it exceeds its throttling limit - log.info("Re-enabling CPUQuota...") + if check_agent_quota_disabled(): + fail("The agent's CPUQuota is not enabled: {0}".format(get_agent_cpu_quota())) quota_drop_in = os.path.join(systemd.get_agent_drop_in_path(), _DROP_IN_FILE_CPU_QUOTA) quota_drop_in_backup = quota_drop_in + ".bk" - log.info("Restoring %s from %s...", quota_drop_in, quota_drop_in_backup) - shutil.copy(quota_drop_in_backup, quota_drop_in) + log.info("Backing up %s to %s...", quota_drop_in, quota_drop_in_backup) + shutil.copy(quota_drop_in, quota_drop_in_backup) shellutil.run_command(["systemctl", "daemon-reload"]) shellutil.run_command(["update-waagent-conf", "Debug.CgroupDisableOnProcessCheckFailure=n", "Debug.CgroupDisableOnQuotaCheckFailure=y", "Debug.AgentCpuThrottledTimeThreshold=5"]) @@ -205,11 +178,27 @@ def verify_throttling_time_check_on_agent_cgroups(): fail("The agent did not disable its CPUQuota: {0}".format(get_agent_cpu_quota())) +def cleanup_test_setup(): + log.info("Cleaning up test setup") + drop_in_file = os.path.join(systemd.get_agent_drop_in_path(), "99-ExecStart.conf") + if os.path.exists(drop_in_file): + log.info("Removing %s...", drop_in_file) + os.remove(drop_in_file) + shellutil.run_command(["systemctl", "daemon-reload"]) + + check_time = datetime.datetime.utcnow() + shellutil.run_command(["agent-service", "restart"]) + + found: bool = retry_if_false(lambda: check_log_message(" Agent cgroups enabled: True", after_timestamp=check_time)) + if not found: + fail("Agent cgroups not enabled yet") + + def main(): prepare_agent() verify_agent_reported_metrics() - verify_process_check_on_agent_cgroups() verify_throttling_time_check_on_agent_cgroups() + cleanup_test_setup() run_remote_test(main) From f84cde24f66d73211721cbab84cc35e24fca5722 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Thu, 4 Apr 2024 17:11:55 -0700 Subject: [PATCH 195/240] Cgroups api refactor for v2 (#3096) * Cgroups api refactor (#6) * Initial changes for log collector cgroups v2 support * Fix pylint issues * Fix pylint issues * Fix pylint issues * Check that both controllers are mounted in the chosen cgroups version for log collector * Fix regex * Update test_agent unit tests * Fix unit tests * Update format strings * Fix broken cgroupconfigurator unit tests * pyling * Fix cgroups api unit tests * Ignore unused args * Ignore unused args * Add cgroup configurator tests * v2 required check in parent cgroup * unit tests is_controller_enabled * Fix test failure and pylint: * pylint * Update agent checks * Fix controller enable logic and unit tests * Remove changes to collect logs * Fix pylint * Add e2e test for v2 * Fix log warnings * Add cgroups v2 disabled scenario to daily runbook * Address PR comments (#7) * get_cgroup_api should raise exception when controllers not mounted * Combine cgroups_supported() check * Combine SystemdCgroupsApi and CGroupApi classes * fix pylint and tests with sudo * Rename SystemdCgroupsApi to SystemdCgroupApi * Cgroup should be singular when referring to the APi * Unimpleneted methods should raise NotImplementederror * Check for cpu,cpuacct * v2 start extension command should not be implemented * log_cgorup_info and log_cgroup_warning should be in cgroupapi * Systemd check should come before api * Explicitly check for empty dict * Only check if controllers are enabled at root for v2 * Remove unnecessary mocked paths in mock cgroup env * V2 does not have concept of mounting controllers * Fix super call for python 2 * get_cgroup_api should be function * Move logging functions up * Use stat -f to get cgroup mode * Mock hybrid path * Fix unit tests: * Debug tests * Debug tests * Debug unit tests * Fix unit tests * Fix pylint * Fix e2e test for v2 * Fix e2e test * Fix e2e test * Fix e2e test * Combine common implementations * Improve comments * Pylint * Address PR comments (#8) * Run unit tests * Clean up drop in files if cgroups are disabled * Init values for cgroup apis * Rever test change * get_cgroup_api should check if mountpoints are correct (#9) * Fix conflict after merge * Merge issues (#10) * Fix merge issues * Fix unit tests * get_cgroup_api raises InvalidCroupMountpointException (#11) * get_cgroup_api can raise InvalidCgroupMountpointException * Add unit test for agent * Address PR comments (#12) --- azurelinuxagent/agent.py | 32 +- azurelinuxagent/ga/cgroupapi.py | 448 ++++++++++++------ azurelinuxagent/ga/cgroupconfigurator.py | 215 ++++----- .../hybrid/sys_fs_cgroup_cgroup.controllers | 0 .../sys_fs_cgroup_unified_cgroup.controllers | 7 - tests/data/cgroups/{ => v1}/proc_pid_cgroup | 0 tests/data/cgroups/{ => v1}/proc_self_cgroup | 0 tests/data/cgroups/v2/proc_pid_cgroup | 1 + tests/data/cgroups/v2/proc_self_cgroup | 1 + .../v2/sys_fs_cgroup_cgroup.subtree_control | 1 + ...sys_fs_cgroup_cgroup.subtree_control_empty | 0 tests/ga/test_cgroupapi.py | 408 +++++++++++++--- tests/ga/test_cgroupconfigurator.py | 154 ++++-- tests/ga/test_cgroupconfigurator_sudo.py | 6 +- tests/ga/test_update.py | 2 +- tests/lib/cgroups_tools.py | 14 - tests/lib/mock_cgroup_environment.py | 154 ++++-- tests/test_agent.py | 115 ++++- tests_e2e/orchestrator/runbook.yml | 1 + tests_e2e/test_suites/cgroup_v2_disabled.yml | 10 + .../cgroup_v2_disabled/cgroup_v2_disabled.py | 76 +++ tests_e2e/tests/lib/cgroup_helpers.py | 4 +- 22 files changed, 1202 insertions(+), 447 deletions(-) create mode 100644 tests/data/cgroups/hybrid/sys_fs_cgroup_cgroup.controllers delete mode 100644 tests/data/cgroups/sys_fs_cgroup_unified_cgroup.controllers rename tests/data/cgroups/{ => v1}/proc_pid_cgroup (100%) rename tests/data/cgroups/{ => v1}/proc_self_cgroup (100%) create mode 100644 tests/data/cgroups/v2/proc_pid_cgroup create mode 100644 tests/data/cgroups/v2/proc_self_cgroup create mode 100644 tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control create mode 100644 tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control_empty create mode 100644 tests_e2e/test_suites/cgroup_v2_disabled.yml create mode 100644 tests_e2e/tests/cgroup_v2_disabled/cgroup_v2_disabled.py diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index ee68bd678a..50735b54e4 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -28,9 +28,11 @@ import subprocess import sys import threading + +from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup -from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi +from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException import azurelinuxagent.common.conf as conf import azurelinuxagent.common.event as event @@ -206,18 +208,32 @@ def collect_logs(self, is_full_mode): # Check the cgroups unit log_collector_monitor = None - cgroups_api = SystemdCgroupsApi() - cpu_cgroup_path, memory_cgroup_path = cgroups_api.get_process_cgroup_paths("self") + cpu_cgroup_path = None + memory_cgroup_path = None if CollectLogsHandler.is_enabled_monitor_cgroups_check(): - cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) - memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) + try: + cgroup_api = get_cgroup_api() + except InvalidCgroupMountpointException as e: + log_cgroup_warning("The agent does not support cgroups if the default systemd mountpoint is not being used: {0}".format(ustr(e)), send_event=True) + sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) + except CGroupsException as e: + log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) + sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) + + cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self") + cpu_slice_matches = False + memory_slice_matches = False + if cpu_cgroup_path is not None: + cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) + if memory_cgroup_path is not None: + memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) if not cpu_slice_matches or not memory_slice_matches: - logger.info("The Log Collector process is not in the proper cgroups:") + log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False) if not cpu_slice_matches: - logger.info("\tunexpected cpu slice") + log_cgroup_warning("\tunexpected cpu slice", send_event=False) if not memory_slice_matches: - logger.info("\tunexpected memory slice") + log_cgroup_warning("\tunexpected memory slice", send_event=False) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) diff --git a/azurelinuxagent/ga/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py index 6f4bf4ab34..3bce053502 100644 --- a/azurelinuxagent/ga/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -14,7 +14,7 @@ # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ - +import json import os import re import shutil @@ -23,6 +23,7 @@ import uuid from azurelinuxagent.common import logger +from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.conf import get_agent_pid_file_path @@ -36,21 +37,26 @@ from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.version import get_distro -CGROUPS_FILE_SYSTEM_ROOT = '/sys/fs/cgroup' -CGROUP_CONTROLLERS = ["cpu", "memory"] +CGROUP_FILE_SYSTEM_ROOT = '/sys/fs/cgroup' EXTENSION_SLICE_PREFIX = "azure-vmextensions" -class SystemdRunError(CGroupsException): - """ - Raised when systemd-run fails - """ +def log_cgroup_info(formatted_string, op=WALAEventOperation.CGroupsInfo, send_event=True): + logger.info("[CGI] " + formatted_string) + if send_event: + add_event(op=op, message=formatted_string) - def __init__(self, msg=None): - super(SystemdRunError, self).__init__(msg) + +def log_cgroup_warning(formatted_string, op=WALAEventOperation.CGroupsInfo, send_event=True): + logger.info("[CGW] " + formatted_string) # log as INFO for now, in the future it should be logged as WARNING + if send_event: + add_event(op=op, message=formatted_string, is_success=False, log_event=False) -class CGroupsApi(object): +class CGroupUtil(object): + """ + Cgroup utility methods which are independent of systemd cgroup api. + """ @staticmethod def cgroups_supported(): distro_info = get_distro() @@ -63,18 +69,18 @@ def cgroups_supported(): (distro_name.lower() in ('centos', 'redhat') and 8 <= distro_version.major < 9) @staticmethod - def track_cgroups(extension_cgroups): - try: - for cgroup in extension_cgroups: - CGroupsTelemetry.track_cgroup(cgroup) - except Exception as exception: - logger.warn("Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. " - "Error: {1}".format(cgroup.path, ustr(exception))) + def get_extension_slice_name(extension_name, old_slice=False): + # The old slice makes it difficult for user to override the limits because they need to place drop-in files on every upgrade if extension slice is different for each version. + # old slice includes .- + # new slice without version . + if not old_slice: + extension_name = extension_name.rsplit("-", 1)[0] + # Since '-' is used as a separator in systemd unit names, we replace it with '_' to prevent side-effects. + return EXTENSION_SLICE_PREFIX + "-" + extension_name.replace('-', '_') + ".slice" @staticmethod - def get_processes_in_cgroup(cgroup_path): - with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs: - return [int(pid) for pid in cgroup_procs.read().split()] + def get_daemon_pid(): + return int(fileutil.read_file(get_agent_pid_file_path()).strip()) @staticmethod def _foreach_legacy_cgroup(operation): @@ -92,9 +98,9 @@ def _foreach_legacy_cgroup(operation): """ legacy_cgroups = [] for controller in ['cpu', 'memory']: - cgroup = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, "WALinuxAgent", "WALinuxAgent") + cgroup = os.path.join(CGROUP_FILE_SYSTEM_ROOT, controller, "WALinuxAgent", "WALinuxAgent") if os.path.exists(cgroup): - logger.info('Found legacy cgroup {0}', cgroup) + log_cgroup_info('Found legacy cgroup {0}'.format(cgroup), send_event=False) legacy_cgroups.append((controller, cgroup)) try: @@ -103,28 +109,99 @@ def _foreach_legacy_cgroup(operation): if os.path.exists(procs_file): procs_file_contents = fileutil.read_file(procs_file).strip() - daemon_pid = CGroupsApi.get_daemon_pid() + daemon_pid = CGroupUtil.get_daemon_pid() if ustr(daemon_pid) in procs_file_contents: operation(controller, daemon_pid) finally: for _, cgroup in legacy_cgroups: - logger.info('Removing {0}', cgroup) + log_cgroup_info('Removing {0}'.format(cgroup), send_event=False) shutil.rmtree(cgroup, ignore_errors=True) return len(legacy_cgroups) @staticmethod - def get_daemon_pid(): - return int(fileutil.read_file(get_agent_pid_file_path()).strip()) + def cleanup_legacy_cgroups(): + """ + Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent; + starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. If + we find that any of the legacy groups include the PID of the daemon then we need to disable data collection for this + instance (under systemd, moving PIDs across the cgroup file system can produce unpredictable results) + """ + return CGroupUtil._foreach_legacy_cgroup(lambda *_: None) -class SystemdCgroupsApi(CGroupsApi): +class SystemdRunError(CGroupsException): + """ + Raised when systemd-run fails + """ + + def __init__(self, msg=None): + super(SystemdRunError, self).__init__(msg) + + +class InvalidCgroupMountpointException(CGroupsException): """ - Cgroups interface via systemd + Raised when the cgroup mountpoint is invalid. """ + def __init__(self, msg=None): + super(InvalidCgroupMountpointException, self).__init__(msg) + + +def get_cgroup_api(): + """ + Determines which version of Cgroup should be used for resource enforcement and monitoring by the Agent and returns + the corresponding Api. + + Uses 'stat -f --format=%T /sys/fs/cgroup' to get the cgroup hierarchy in use. + If the result is 'cgroup2fs', cgroup v2 is being used. + If the result is 'tmpfs', cgroup v1 or a hybrid mode is being used. + If the result of 'stat -f --format=%T /sys/fs/cgroup/unified' is 'cgroup2fs', then hybrid mode is being used. + + Raises exception if cgroup filesystem mountpoint is not '/sys/fs/cgroup', or an unknown mode is detected. Also + raises exception if hybrid mode is detected and there are controllers available to be enabled in the unified + hierarchy (the agent does not support cgroups if there are controllers simultaneously attached to v1 and v2 + hierarchies). + """ + if not os.path.exists(CGROUP_FILE_SYSTEM_ROOT): + v1_mount_point = shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']) + v2_mount_point = shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']) + raise InvalidCgroupMountpointException("Expected cgroup filesystem to be mounted at '{0}', but it is not.\n v1 mount point: \n{1}\n v2 mount point: \n{2}".format(CGROUP_FILE_SYSTEM_ROOT, v1_mount_point, v2_mount_point)) + + root_hierarchy_mode = shellutil.run_command(["stat", "-f", "--format=%T", CGROUP_FILE_SYSTEM_ROOT]).rstrip() + + if root_hierarchy_mode == "cgroup2fs": + log_cgroup_info("Using cgroup v2 for resource enforcement and monitoring") + return SystemdCgroupApiv2() + + elif root_hierarchy_mode == "tmpfs": + # Check if a hybrid mode is being used + unified_hierarchy_path = os.path.join(CGROUP_FILE_SYSTEM_ROOT, "unified") + if os.path.exists(unified_hierarchy_path) and shellutil.run_command(["stat", "-f", "--format=%T", unified_hierarchy_path]).rstrip() == "cgroup2fs": + # Hybrid mode is being used. Check if any controllers are available to be enabled in the unified hierarchy. + available_unified_controllers_file = os.path.join(unified_hierarchy_path, "cgroup.controllers") + if os.path.exists(available_unified_controllers_file): + available_unified_controllers = fileutil.read_file(available_unified_controllers_file).rstrip() + if available_unified_controllers != "": + raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers)) + + cgroup_api = SystemdCgroupApiv1() + # Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd + # default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory + # controller is mounted in a location other than the systemd default, raise Exception. + if not cgroup_api.are_mountpoints_systemd_created(): + raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths()))) + log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring") + return cgroup_api + + raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode)) + + +class _SystemdCgroupApi(object): + """ + Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2. + """ def __init__(self): - self._cgroup_mountpoints = None self._agent_unit_name = None self._systemd_run_commands = [] self._systemd_run_commands_lock = threading.RLock() @@ -136,115 +213,64 @@ def get_systemd_run_commands(self): with self._systemd_run_commands_lock: return self._systemd_run_commands[:] - def get_cgroup_mount_points(self): + def get_controller_root_paths(self): """ - Returns a tuple with the mount points for the cpu and memory controllers; the values can be None - if the corresponding controller is not mounted + Cgroup version specific. Returns a tuple with the root paths for the cpu and memory controllers; the values can + be None if the corresponding controller is not mounted or enabled at the root cgroup. """ - # the output of mount is similar to - # $ mount -t cgroup - # cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) - # cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) - # cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) - # etc - # - if self._cgroup_mountpoints is None: - cpu = None - memory = None - for line in shellutil.run_command(['mount', '-t', 'cgroup']).splitlines(): - match = re.search(r'on\s+(?P/\S+(memory|cpuacct))\s', line) - if match is not None: - path = match.group('path') - if 'cpuacct' in path: - cpu = path - else: - memory = path - self._cgroup_mountpoints = {'cpu': cpu, 'memory': memory} - - return self._cgroup_mountpoints['cpu'], self._cgroup_mountpoints['memory'] + raise NotImplementedError() - @staticmethod - def get_process_cgroup_relative_paths(process_id): + def get_unit_cgroup_paths(self, unit_name): """ - Returns a tuple with the path of the cpu and memory cgroups for the given process (relative to the mount point of the corresponding - controller). - The 'process_id' can be a numeric PID or the string "self" for the current process. - The values returned can be None if the process is not in a cgroup for that controller (e.g. the controller is not mounted). + Returns a tuple with the path of the cpu and memory cgroups for the given unit. + The values returned can be None if the controller is not mounted or enabled. """ - # The contents of the file are similar to - # # cat /proc/1218/cgroup - # 10:memory:/system.slice/walinuxagent.service - # 3:cpu,cpuacct:/system.slice/walinuxagent.service - # etc - cpu_path = None - memory_path = None - for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): - match = re.match(r'\d+:(?P(memory|.*cpuacct.*)):(?P.+)', line) - if match is not None: - controller = match.group('controller') - path = match.group('path').lstrip('/') if match.group('path') != '/' else None - if controller == 'memory': - memory_path = path - else: - cpu_path = path + # Ex: ControlGroup=/azure.slice/walinuxagent.service + # controlgroup_path[1:] = azure.slice/walinuxagent.service + controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup") + cpu_root_path, memory_root_path = self.get_controller_root_paths() - return cpu_path, memory_path + cpu_cgroup_path = os.path.join(cpu_root_path, controlgroup_path[1:]) \ + if cpu_root_path is not None else None + + memory_cgroup_path = os.path.join(memory_root_path, controlgroup_path[1:]) \ + if memory_root_path is not None else None + + return cpu_cgroup_path, memory_cgroup_path def get_process_cgroup_paths(self, process_id): """ - Returns a tuple with the path of the cpu and memory cgroups for the given process. The 'process_id' can be a numeric PID or the string "self" for the current process. - The values returned can be None if the process is not in a cgroup for that controller (e.g. the controller is not mounted). + Returns a tuple with the path of the cpu and memory cgroups for the given process. + The 'process_id' can be a numeric PID or the string "self" for the current process. + The values returned can be None if the controller is not mounted or enabled. """ cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id) - cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points() + cpu_root_path, memory_root_path = self.get_controller_root_paths() - cpu_cgroup_path = os.path.join(cpu_mount_point, cpu_cgroup_relative_path) \ - if cpu_mount_point is not None and cpu_cgroup_relative_path is not None else None + cpu_cgroup_path = os.path.join(cpu_root_path, cpu_cgroup_relative_path) \ + if cpu_root_path is not None and cpu_cgroup_relative_path is not None else None - memory_cgroup_path = os.path.join(memory_mount_point, memory_cgroup_relative_path) \ - if memory_mount_point is not None and memory_cgroup_relative_path is not None else None + memory_cgroup_path = os.path.join(memory_root_path, memory_cgroup_relative_path) \ + if memory_root_path is not None and memory_cgroup_relative_path is not None else None return cpu_cgroup_path, memory_cgroup_path - def get_unit_cgroup_paths(self, unit_name): + def get_process_cgroup_relative_paths(self, process_id): """ - Returns a tuple with the path of the cpu and memory cgroups for the given unit. - The values returned can be None if the controller is not mounted. - Ex: ControlGroup=/azure.slice/walinuxagent.service - controlgroup_path[1:] = azure.slice/walinuxagent.service + Cgroup version specific. Returns a tuple with the path of the cpu and memory cgroups for the given process + (relative to the root path of the corresponding controller). + The 'process_id' can be a numeric PID or the string "self" for the current process. + The values returned can be None if the controller is not mounted or enabled. """ - controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup") - cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points() + raise NotImplementedError() - cpu_cgroup_path = os.path.join(cpu_mount_point, controlgroup_path[1:]) \ - if cpu_mount_point is not None else None - - memory_cgroup_path = os.path.join(memory_mount_point, controlgroup_path[1:]) \ - if memory_mount_point is not None else None - - return cpu_cgroup_path, memory_cgroup_path - - @staticmethod - def get_cgroup2_controllers(): + def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, + error_code=ExtensionErrorCodes.PluginUnknownFailure): """ - Returns a tuple with the mount point for the cgroups v2 controllers, and the currently mounted controllers; - either value can be None if cgroups v2 or its controllers are not mounted + Cgroup version specific. Starts extension command. """ - # the output of mount is similar to - # $ mount -t cgroup2 - # cgroup2 on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate) - # - for line in shellutil.run_command(['mount', '-t', 'cgroup2']).splitlines(): - match = re.search(r'on\s+(?P/\S+)\s', line) - if match is not None: - mount_point = match.group('path') - controllers = None - controllers_file = os.path.join(mount_point, 'cgroup.controllers') - if os.path.exists(controllers_file): - controllers = fileutil.read_file(controllers_file) - return mount_point, controllers - return None, None + raise NotImplementedError() @staticmethod def _is_systemd_failure(scope_name, stderr): @@ -254,19 +280,91 @@ def _is_systemd_failure(scope_name, stderr): return unit_not_found in stderr or scope_name not in stderr @staticmethod - def get_extension_slice_name(extension_name, old_slice=False): - # The old slice makes it difficult for user to override the limits because they need to place drop-in files on every upgrade if extension slice is different for each version. - # old slice includes .- - # new slice without version . - if not old_slice: - extension_name = extension_name.rsplit("-", 1)[0] - # Since '-' is used as a separator in systemd unit names, we replace it with '_' to prevent side-effects. - return EXTENSION_SLICE_PREFIX + "-" + extension_name.replace('-', '_') + ".slice" + def get_processes_in_cgroup(cgroup_path): + with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs: + return [int(pid) for pid in cgroup_procs.read().split()] + + +class SystemdCgroupApiv1(_SystemdCgroupApi): + """ + Cgroup v1 interface via systemd + """ + def __init__(self): + super(SystemdCgroupApiv1, self).__init__() + self._cgroup_mountpoints = self._get_controller_mountpoints() + + def _get_controller_mountpoints(self): + """ + In v1, each controller is mounted at a different path. Use findmnt to get each path. + + the output of findmnt is similar to + $ findmnt -t cgroup --noheadings + /sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd + /sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory + /sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct + etc + + Returns a dictionary of the controller-path mappings. + """ + mount_points = {} + for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines(): + # In v2, we match only the systemd default mountpoint ('/sys/fs/cgroup'). In v1, we match any path. This + # is because the agent previously supported users mounting controllers at locations other than the systemd + # default in v1. + match = re.search(r'(?P\S+\/(?P\S+))\s+cgroup', line) + if match is not None: + path = match.group('path') + controller = match.group('controller') + if controller is not None and path is not None: + mount_points[controller] = path + return mount_points + + def are_mountpoints_systemd_created(self): + """ + Systemd mounts each controller at '/sys/fs/cgroup/'. Returns True if both cpu and memory + mountpoints match this pattern, False otherwise. + + The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used. + This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in + files will be cleaned up in cgroupconfigurator. + """ + cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct') + memory_mountpoint = self._cgroup_mountpoints.get('memory') + if cpu_mountpoint is not None and cpu_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'cpu,cpuacct'): + return False + if memory_mountpoint is not None and memory_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'memory'): + return False + return True + + def get_controller_root_paths(self): + # Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding + # controller is not mounted. + return self._cgroup_mountpoints.get('cpu,cpuacct'), self._cgroup_mountpoints.get('memory') + + def get_process_cgroup_relative_paths(self, process_id): + # The contents of the file are similar to + # # cat /proc/1218/cgroup + # 10:memory:/system.slice/walinuxagent.service + # 3:cpu,cpuacct:/system.slice/walinuxagent.service + # etc + cpu_path = None + memory_path = None + for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): + match = re.match(r'\d+:(?P(memory|.*cpuacct.*)):(?P.+)', line) + if match is not None: + controller = match.group('controller') + path = match.group('path').lstrip('/') if match.group('path') != '/' else None + if controller == 'memory': + memory_path = path + else: + cpu_path = path + + return cpu_path, memory_path def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): scope = "{0}_{1}".format(cmd_name, uuid.uuid4()) - extension_slice_name = self.get_extension_slice_name(extension_name) + extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name) with self._systemd_run_commands_lock: process = subprocess.Popen( # pylint: disable=W1509 # Some distros like ubuntu20 by default cpu and memory accounting enabled. Thus create nested cgroups under the extension slice @@ -285,23 +383,23 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh scope_name = scope + '.scope' - logger.info("Started extension in unit '{0}'", scope_name) + log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False) cpu_cgroup = None try: cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name) - cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points() + cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_controller_root_paths() if cpu_cgroup_mountpoint is None: - logger.info("The CPU controller is not mounted; will not track resource usage") + log_cgroup_info("The CPU controller is not mounted; will not track resource usage", send_event=False) else: cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path) cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path) CGroupsTelemetry.track_cgroup(cpu_cgroup) if memory_cgroup_mountpoint is None: - logger.info("The Memory controller is not mounted; will not track resource usage") + log_cgroup_info("The Memory controller is not mounted; will not track resource usage", send_event=False) else: memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path) memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path) @@ -309,10 +407,10 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh except IOError as e: if e.errno == 2: # 'No such file or directory' - logger.info("The extension command already completed; will not track resource usage") - logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e)) + log_cgroup_info("The extension command already completed; will not track resource usage", send_event=False) + log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(e)), send_event=False) except Exception as e: - logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e)) + log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(e)), send_event=False) # Wait for process completion or timeout try: @@ -342,11 +440,85 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh with self._systemd_run_commands_lock: self._systemd_run_commands.remove(process.pid) - def cleanup_legacy_cgroups(self): + +class SystemdCgroupApiv2(_SystemdCgroupApi): + """ + Cgroup v2 interface via systemd + """ + def __init__(self): + super(SystemdCgroupApiv2, self).__init__() + self._root_cgroup_path = self._get_root_cgroup_path() + self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path is not None else [] + + @staticmethod + def _get_root_cgroup_path(): """ - Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent; - starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. If - we find that any of the legacy groups include the PID of the daemon then we need to disable data collection for this - instance (under systemd, moving PIDs across the cgroup file system can produce unpredictable results) + In v2, there is a unified mount point shared by all controllers. Use findmnt to get the unified mount point. + + The output of findmnt is similar to + $ findmnt -t cgroup2 --noheadings + /sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot + + Returns None if the root cgroup cannot be determined from the output above. """ - return CGroupsApi._foreach_legacy_cgroup(lambda *_: None) + # + for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines(): + # Systemd mounts the cgroup filesystem at '/sys/fs/cgroup'. The agent does not support cgroups if the + # filesystem is mounted elsewhere, so search specifically for '/sys/fs/cgroup' in the findmnt output. + match = re.search(r'(?P\/sys\/fs\/cgroup)\s+cgroup2', line) + if match is not None: + root_cgroup_path = match.group('path') + if root_cgroup_path is not None: + return root_cgroup_path + return None + + @staticmethod + def _get_controllers_enabled_at_root(root_cgroup_path): + """ + Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows + a space separated list of the controllers which are enabled to control resource distribution from the root + cgroup to its children. If a controller is listed here, then that controller is available to enable in children + cgroups. + + $ cat /sys/fs/cgroup/cgroup.subtree_control + cpuset cpu io memory hugetlb pids rdma misc + """ + controllers_enabled_at_root = [] + enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control') + if os.path.exists(enabled_controllers_file): + controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split() + return controllers_enabled_at_root + + def get_controller_root_paths(self): + # Return a tuple representing the root cgroups for cpu and memory. Either should be None if the corresponding + # controller is not enabled at the root. This check is necessary because all non-root "cgroup.subtree_control" + # files can only contain controllers which are enabled in the parent's "cgroup.subtree_control" file. + + root_cpu_path = None + root_memory_path = None + if self._root_cgroup_path is not None: + if 'cpu' in self._controllers_enabled_at_root: + root_cpu_path = self._root_cgroup_path + if 'memory' in self._controllers_enabled_at_root: + root_memory_path = self._root_cgroup_path + + return root_cpu_path, root_memory_path + + def get_process_cgroup_relative_paths(self, process_id): + # The contents of the file are similar to + # # cat /proc/1218/cgroup + # 0::/azure.slice/walinuxagent.service + cpu_path = None + memory_path = None + for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): + match = re.match(r'0::(?P\S+)', line) + if match is not None: + path = match.group('path').lstrip('/') if match.group('path') != '/' else None + memory_path = path + cpu_path = path + + return cpu_path, memory_path + + def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, + error_code=ExtensionErrorCodes.PluginUnknownFailure): + raise NotImplementedError() diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index ce86101e07..72d5329f92 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -24,7 +24,8 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup -from azurelinuxagent.ga.cgroupapi import CGroupsApi, SystemdCgroupsApi, SystemdRunError, EXTENSION_SLICE_PREFIX +from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \ + log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr @@ -114,18 +115,6 @@ class DisableCgroups(object): EXTENSIONS = "extensions" -def _log_cgroup_info(format_string, *args): - message = format_string.format(*args) - logger.info("[CGI] " + message) - add_event(op=WALAEventOperation.CGroupsInfo, message=message) - - -def _log_cgroup_warning(format_string, *args): - message = format_string.format(*args) - logger.info("[CGW] " + message) # log as INFO for now, in the future it should be logged as WARNING - add_event(op=WALAEventOperation.CGroupsInfo, message=message, is_success=False, log_event=False) - - class CGroupConfigurator(object): """ This class implements the high-level operations on CGroups (e.g. initialization, creation, etc) @@ -150,39 +139,58 @@ def initialize(self): try: if self._initialized: return - # This check is to reset the quotas if agent goes from cgroup supported to unsupported distros later in time. - if not CGroupsApi.cgroups_supported(): - logger.info("Agent reset the quotas if distro: {0} goes from supported to unsupported list", - get_distro()) - self._reset_agent_cgroup_setup() - # check whether cgroup monitoring is supported on the current distro - self._cgroups_supported = CGroupsApi.cgroups_supported() + self._cgroups_supported = CGroupUtil.cgroups_supported() if not self._cgroups_supported: - logger.info("Cgroup monitoring is not supported on {0}", get_distro()) + log_cgroup_info("Cgroup monitoring is not supported on {0}".format(get_distro()), send_event=True) + # If a distro is not supported, attempt to clean up any existing drop in files in case it was + # previously supported. It is necessary to cleanup in this scenario in case the OS hits any bugs on + # the kernel related to cgroups. + log_cgroup_info("Agent will reset the quotas in case distro: {0} went from supported to unsupported".format(get_distro()), send_event=False) + self._reset_agent_cgroup_setup() return # check that systemd is detected correctly - self._cgroups_api = SystemdCgroupsApi() if not systemd.is_systemd(): - _log_cgroup_warning("systemd was not detected on {0}", get_distro()) + log_cgroup_warning("systemd was not detected on {0}".format(get_distro())) return - _log_cgroup_info("systemd version: {0}", systemd.get_version()) + log_cgroup_info("systemd version: {0}".format(systemd.get_version())) if not self.__check_no_legacy_cgroups(): return + # Determine which version of the Cgroup Api should be used. If the correct version can't be determined, + # do not enable resource monitoring/enforcement. + try: + self._cgroups_api = get_cgroup_api() + except InvalidCgroupMountpointException as e: + # Systemd mounts the cgroup file system at '/sys/fs/cgroup'. Previously, the agent supported cgroup + # usage if a user mounted the cgroup filesystem elsewhere. The agent no longer supports that + # scenario. Cleanup any existing drop in files in case the agent previously supported cgroups on + # this machine. + log_cgroup_warning("The agent does not support cgroups if the default systemd mountpoint is not being used: {0}".format(ustr(e)), send_event=True) + log_cgroup_info("Agent will reset the quotas in case cgroup usage went from enabled to disabled") + self._reset_agent_cgroup_setup() + return + except CGroupsException as e: + log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) + return + + if self.using_cgroup_v2(): + log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") + return + agent_unit_name = systemd.get_agent_unit_name() agent_slice = systemd.get_unit_property(agent_unit_name, "Slice") if agent_slice not in (AZURE_SLICE, "system.slice"): - _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice) + log_cgroup_warning("The agent is within an unexpected slice: {0}".format(agent_slice)) return self.__setup_azure_slice() - cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers() - self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, + cpu_controller_root, memory_controller_root = self.__get_cgroup_controller_roots() + self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroup_paths(agent_slice, cpu_controller_root, memory_controller_root) @@ -195,20 +203,19 @@ def initialize(self): self.enable() if self._agent_cpu_cgroup_path is not None: - _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path) + log_cgroup_info("Agent CPU cgroup: {0}".format(self._agent_cpu_cgroup_path)) self.__set_cpu_quota(conf.get_agent_cpu_quota()) CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) if self._agent_memory_cgroup_path is not None: - _log_cgroup_info("Agent Memory cgroup: {0}", self._agent_memory_cgroup_path) + log_cgroup_info("Agent Memory cgroup: {0}".format(self._agent_memory_cgroup_path)) self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path) CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup) - _log_cgroup_info('Agent cgroups enabled: {0}', self._agent_cgroups_enabled) - except Exception as exception: - _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception)) + log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception))) finally: + log_cgroup_info('Agent cgroups enabled: {0}'.format(self._agent_cgroups_enabled)) self._initialized = True def __check_no_legacy_cgroups(self): @@ -216,35 +223,24 @@ def __check_no_legacy_cgroups(self): Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent. When running under systemd this could produce invalid resource usage data. Cgroups should not be enabled under this condition. """ - legacy_cgroups = self._cgroups_api.cleanup_legacy_cgroups() + legacy_cgroups = CGroupUtil.cleanup_legacy_cgroups() if legacy_cgroups > 0: - _log_cgroup_warning("The daemon's PID was added to a legacy cgroup; will not monitor resource usage.") + log_cgroup_warning("The daemon's PID was added to a legacy cgroup; will not monitor resource usage.") return False return True - def __get_cgroup_controllers(self): - # - # check v1 controllers - # - cpu_controller_root, memory_controller_root = self._cgroups_api.get_cgroup_mount_points() + def __get_cgroup_controller_roots(self): + cpu_controller_root, memory_controller_root = self._cgroups_api.get_controller_root_paths() if cpu_controller_root is not None: - logger.info("The CPU cgroup controller is mounted at {0}", cpu_controller_root) + log_cgroup_info("The CPU cgroup controller root path is {0}".format(cpu_controller_root), send_event=False) else: - _log_cgroup_warning("The CPU cgroup controller is not mounted") + log_cgroup_warning("The CPU cgroup controller is not mounted or enabled") if memory_controller_root is not None: - logger.info("The memory cgroup controller is mounted at {0}", memory_controller_root) + log_cgroup_info("The memory cgroup controller root path is {0}".format(memory_controller_root), send_event=False) else: - _log_cgroup_warning("The memory cgroup controller is not mounted") - - # - # check v2 controllers - # - cgroup2_mount_point, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers() - if cgroup2_mount_point is not None: - _log_cgroup_info("cgroups v2 mounted at {0}. Controllers: [{1}]", cgroup2_mount_point, - cgroup2_controllers) + log_cgroup_warning("The memory cgroup controller is not mounted or enabled") return cpu_controller_root, memory_controller_root @@ -325,7 +321,7 @@ def __setup_azure_slice(): for path, contents in files_to_create: CGroupConfigurator._Impl.__create_unit_file(path, contents) except Exception as exception: - _log_cgroup_warning("Failed to create unit files for the azure slice: {0}", ustr(exception)) + log_cgroup_warning("Failed to create unit files for the azure slice: {0}".format(ustr(exception))) for unit_file in files_to_create: CGroupConfigurator._Impl.__cleanup_unit_file(unit_file) return @@ -350,15 +346,14 @@ def _reset_agent_cgroup_setup(self): except Exception as err: logger.warn("Unable to delete Agent drop-in files while resetting the quotas: {0}".format(err)) - @staticmethod def __reload_systemd_config(): # reload the systemd configuration; the new slices will be used once the agent's service restarts try: - logger.info("Executing systemctl daemon-reload...") + log_cgroup_info("Executing systemctl daemon-reload...", send_event=False) shellutil.run_command(["systemctl", "daemon-reload"]) except Exception as exception: - _log_cgroup_warning("daemon-reload failed (create azure slice): {0}", ustr(exception)) + log_cgroup_warning("daemon-reload failed (create azure slice): {0}".format(ustr(exception))) # W0238: Unused private member `_Impl.__create_unit_file(path, contents)` (unused-private-member) @staticmethod @@ -368,7 +363,7 @@ def __create_unit_file(path, contents): # pylint: disable=unused-private-member fileutil.mkdir(parent, mode=0o755) exists = os.path.exists(path) fileutil.write_file(path, contents) - _log_cgroup_info("{0} {1}", "Updated" if exists else "Created", path) + log_cgroup_info("{0} {1}".format("Updated" if exists else "Created", path)) # W0238: Unused private member `_Impl.__cleanup_unit_file(path)` (unused-private-member) @staticmethod @@ -376,9 +371,9 @@ def __cleanup_unit_file(path): # pylint: disable=unused-private-member if os.path.exists(path): try: os.remove(path) - _log_cgroup_info("Removed {0}", path) + log_cgroup_info("Removed {0}".format(path)) except Exception as exception: - _log_cgroup_warning("Failed to remove {0}: {1}", path, ustr(exception)) + log_cgroup_warning("Failed to remove {0}: {1}".format(path, ustr(exception))) @staticmethod def __cleanup_all_files(files_to_cleanup): @@ -386,9 +381,9 @@ def __cleanup_all_files(files_to_cleanup): if os.path.exists(path): try: os.remove(path) - _log_cgroup_info("Removed {0}", path) + log_cgroup_info("Removed {0}".format(path)) except Exception as exception: - _log_cgroup_warning("Failed to remove {0}: {1}", path, ustr(exception)) + log_cgroup_warning("Failed to remove {0}: {1}".format(path, ustr(exception))) @staticmethod def __create_all_files(files_to_create): @@ -397,20 +392,20 @@ def __create_all_files(files_to_create): for path, contents in files_to_create: CGroupConfigurator._Impl.__create_unit_file(path, contents) except Exception as exception: - _log_cgroup_warning("Failed to create unit files : {0}", ustr(exception)) + log_cgroup_warning("Failed to create unit files : {0}".format(ustr(exception))) for unit_file in files_to_create: CGroupConfigurator._Impl.__cleanup_unit_file(unit_file) return def is_extension_resource_limits_setup_completed(self, extension_name, cpu_quota=None): unit_file_install_path = systemd.get_unit_file_install_path() - old_extension_slice_path = os.path.join(unit_file_install_path, SystemdCgroupsApi.get_extension_slice_name(extension_name, old_slice=True)) + old_extension_slice_path = os.path.join(unit_file_install_path, CGroupUtil.get_extension_slice_name(extension_name, old_slice=True)) # clean up the old slice from the disk if os.path.exists(old_extension_slice_path): CGroupConfigurator._Impl.__cleanup_unit_file(old_extension_slice_path) extension_slice_path = os.path.join(unit_file_install_path, - SystemdCgroupsApi.get_extension_slice_name(extension_name)) + CGroupUtil.get_extension_slice_name(extension_name)) cpu_quota = str( cpu_quota) + "%" if cpu_quota is not None else "" # setting an empty value resets to the default (infinity) slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name, @@ -421,7 +416,7 @@ def is_extension_resource_limits_setup_completed(self, extension_name, cpu_quota return True return False - def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controller_root): + def __get_agent_cgroup_paths(self, agent_slice, cpu_controller_root, memory_controller_root): agent_unit_name = systemd.get_agent_unit_name() expected_relative_path = os.path.join(agent_slice, agent_unit_name) @@ -429,29 +424,25 @@ def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controlle "self") if cpu_cgroup_relative_path is None: - _log_cgroup_warning("The agent's process is not within a CPU cgroup") + log_cgroup_warning("The agent's process is not within a CPU cgroup") else: if cpu_cgroup_relative_path == expected_relative_path: - _log_cgroup_info('CPUAccounting: {0}', systemd.get_unit_property(agent_unit_name, "CPUAccounting")) - _log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec")) + log_cgroup_info('CPUAccounting: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUAccounting"))) + log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec"))) else: - _log_cgroup_warning( - "The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]", - cpu_cgroup_relative_path, - expected_relative_path) + log_cgroup_warning( + "The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]".format(cpu_cgroup_relative_path, expected_relative_path)) cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring if memory_cgroup_relative_path is None: - _log_cgroup_warning("The agent's process is not within a memory cgroup") + log_cgroup_warning("The agent's process is not within a memory cgroup") else: if memory_cgroup_relative_path == expected_relative_path: memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting") - _log_cgroup_info('MemoryAccounting: {0}', memory_accounting) + log_cgroup_info('MemoryAccounting: {0}'.format(memory_accounting)) else: - _log_cgroup_info( - "The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]", - memory_cgroup_relative_path, - expected_relative_path) + log_cgroup_warning( + "The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]".format(memory_cgroup_relative_path, expected_relative_path)) memory_cgroup_relative_path = None # Set the path to None to prevent monitoring if cpu_controller_root is not None and cpu_cgroup_relative_path is not None: @@ -478,6 +469,9 @@ def agent_enabled(self): def extensions_enabled(self): return self._extensions_cgroups_enabled + def using_cgroup_v2(self): + return isinstance(self._cgroups_api, SystemdCgroupApiv2) + def enable(self): if not self.supported(): raise CGroupsException( @@ -491,7 +485,7 @@ def disable(self, reason, disable_cgroups): self.__reset_agent_cpu_quota() extension_services = self.get_extension_services_list() for extension in extension_services: - logger.info("Resetting extension : {0} and it's services: {1} CPUQuota".format(extension, extension_services[extension])) + log_cgroup_info("Resetting extension : {0} and it's services: {1} CPUQuota".format(extension, extension_services[extension]), send_event=False) self.__reset_extension_cpu_quota(extension_name=extension) self.__reset_extension_services_cpu_quota(extension_services[extension]) self.__reload_systemd_config() @@ -504,9 +498,7 @@ def disable(self, reason, disable_cgroups): self.__reset_agent_cpu_quota() CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) - message = "[CGW] Disabling resource usage monitoring. Reason: {0}".format(reason) - logger.info(message) # log as INFO for now, in the future it should be logged as WARNING - add_event(op=WALAEventOperation.CGroupsDisabled, message=message, is_success=False, log_event=False) + log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled) @staticmethod def __set_cpu_quota(quota): @@ -517,7 +509,7 @@ def __set_cpu_quota(quota): over this setting. """ quota_percentage = "{0}%".format(quota) - _log_cgroup_info("Ensuring the agent's CPUQuota is {0}", quota_percentage) + log_cgroup_info("Ensuring the agent's CPUQuota is {0}".format(quota_percentage)) if CGroupConfigurator._Impl.__try_set_cpu_quota(quota_percentage): CGroupsTelemetry.set_track_throttled_time(True) @@ -529,10 +521,9 @@ def __reset_agent_cpu_quota(): NOTE: This resets the quota on the agent's default dropin file; any local overrides on the VM will take precedence over this setting. """ - logger.info("Resetting agent's CPUQuota") + log_cgroup_info("Resetting agent's CPUQuota", send_event=False) if CGroupConfigurator._Impl.__try_set_cpu_quota(''): # setting an empty value resets to the default (infinity) - _log_cgroup_info('CPUQuota: {0}', - systemd.get_unit_property(systemd.get_agent_unit_name(), "CPUQuotaPerSecUSec")) + log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(systemd.get_agent_unit_name(), "CPUQuotaPerSecUSec"))) # W0238: Unused private member `_Impl.__try_set_cpu_quota(quota)` (unused-private-member) @staticmethod @@ -546,13 +537,13 @@ def __try_set_cpu_quota(quota): # pylint: disable=unused-private-member return True # no need to update the file; return here to avoid doing a daemon-reload CGroupConfigurator._Impl.__create_unit_file(drop_in_file, contents) except Exception as exception: - _log_cgroup_warning('Failed to set CPUQuota: {0}', ustr(exception)) + log_cgroup_warning('Failed to set CPUQuota: {0}'.format(ustr(exception))) return False try: - logger.info("Executing systemctl daemon-reload...") + log_cgroup_info("Executing systemctl daemon-reload...", send_event=False) shellutil.run_command(["systemctl", "daemon-reload"]) except Exception as exception: - _log_cgroup_warning("daemon-reload failed (set quota): {0}", ustr(exception)) + log_cgroup_warning("daemon-reload failed (set quota): {0}".format(ustr(exception))) return False return True @@ -568,10 +559,10 @@ def _check_fails_if_processes_found_in_agent_cgroup_before_enable(self, agent_sl if agent_slice != AZURE_SLICE: return False try: - _log_cgroup_info("Checking for unexpected processes in the agent's cgroup before enabling cgroups") + log_cgroup_info("Checking for unexpected processes in the agent's cgroup before enabling cgroups") self._check_processes_in_agent_cgroup() except CGroupsException as exception: - _log_cgroup_warning(ustr(exception)) + log_cgroup_warning(ustr(exception)) return True return False @@ -633,7 +624,7 @@ def _check_processes_in_agent_cgroup(self): agent_commands.update(shellutil.get_running_commands()) systemd_run_commands = set() systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) - agent_cgroup = CGroupsApi.get_processes_in_cgroup(cgroup_path) + agent_cgroup = self._cgroups_api.get_processes_in_cgroup(cgroup_path) # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup; agent_commands.update(shellutil.get_running_commands()) systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) @@ -661,7 +652,7 @@ def _check_processes_in_agent_cgroup(self): if len(unexpected) >= 5: # collect just a small sample break except Exception as exception: - _log_cgroup_warning("Error checking the processes in the agent's cgroup: {0}".format(ustr(exception))) + log_cgroup_warning("Error checking the processes in the agent's cgroup: {0}".format(ustr(exception))) if len(unexpected) > 0: self._report_agent_cgroups_procs(agent_cgroup_proc_names, unexpected) @@ -796,17 +787,17 @@ def start_tracking_unit_cgroups(self, unit_name): cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name) if cpu_cgroup_path is None: - logger.info("The CPU controller is not mounted; will not track resource usage") + log_cgroup_info("The CPU controller is not mounted or enabled; will not track resource usage", send_event=False) else: CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path)) if memory_cgroup_path is None: - logger.info("The Memory controller is not mounted; will not track resource usage") + log_cgroup_info("The Memory controller is not mounted or enabled; will not track resource usage", send_event=False) else: CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path)) except Exception as exception: - logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(exception)) + log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False) def stop_tracking_unit_cgroups(self, unit_name): """ @@ -822,20 +813,20 @@ def stop_tracking_unit_cgroups(self, unit_name): CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path)) except Exception as exception: - logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception)) + log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) def stop_tracking_extension_cgroups(self, extension_name): """ TODO: remove extension Memory cgroups from tracked list """ try: - extension_slice_name = SystemdCgroupsApi.get_extension_slice_name(extension_name) + extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name) cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, extension_slice_name) - cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self._cgroups_api.get_cgroup_mount_points() - cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path) - memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path) + cpu_root_path, memory_root_path = self._cgroups_api.get_controller_root_paths() + cpu_cgroup_path = os.path.join(cpu_root_path, cgroup_relative_path) + memory_cgroup_path = os.path.join(memory_root_path, cgroup_relative_path) if cpu_cgroup_path is not None: CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path)) @@ -844,7 +835,7 @@ def stop_tracking_extension_cgroups(self, extension_name): CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path)) except Exception as exception: - logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception)) + log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): @@ -898,19 +889,19 @@ def setup_extension_slice(self, extension_name, cpu_quota): if self.enabled(): unit_file_install_path = systemd.get_unit_file_install_path() extension_slice_path = os.path.join(unit_file_install_path, - SystemdCgroupsApi.get_extension_slice_name(extension_name)) + CGroupUtil.get_extension_slice_name(extension_name)) try: cpu_quota = str(cpu_quota) + "%" if cpu_quota is not None else "" # setting an empty value resets to the default (infinity) if cpu_quota == "": - _log_cgroup_info("CPUQuota not set for {0}", extension_name) + log_cgroup_info("CPUQuota not set for {0}".format(extension_name)) else: - _log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}", extension_name, cpu_quota) + log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}".format(extension_name, cpu_quota)) slice_contents = _EXTENSION_SLICE_CONTENTS.format(extension_name=extension_name, cpu_quota=cpu_quota) CGroupConfigurator._Impl.__create_unit_file(extension_slice_path, slice_contents) except Exception as exception: - _log_cgroup_warning("Failed to set the extension {0} slice and quotas: {1}", extension_name, - ustr(exception)) + log_cgroup_warning("Failed to set the extension {0} slice and quotas: {1}".format(extension_name, + ustr(exception))) CGroupConfigurator._Impl.__cleanup_unit_file(extension_slice_path) def remove_extension_slice(self, extension_name): @@ -920,7 +911,7 @@ def remove_extension_slice(self, extension_name): """ if self.enabled(): unit_file_install_path = systemd.get_unit_file_install_path() - extension_slice_name = SystemdCgroupsApi.get_extension_slice_name(extension_name) + extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name) extension_slice_path = os.path.join(unit_file_install_path, extension_slice_name) if os.path.exists(extension_slice_path): self.stop_tracking_extension_cgroups(extension_name) @@ -951,7 +942,7 @@ def set_extension_services_cpu_memory_quota(self, services_list): cpu_quota = service.get('cpuQuotaPercentage', None) if cpu_quota is not None: cpu_quota = str(cpu_quota) + "%" - _log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}", service_name, cpu_quota) + log_cgroup_info("Ensuring the {0}'s CPUQuota is {1}".format(service_name, cpu_quota)) drop_in_file_cpu_quota = os.path.join(drop_in_path, _DROP_IN_FILE_CPU_QUOTA) cpu_quota_contents = _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT.format(cpu_quota) files_to_create.append((drop_in_file_cpu_quota, cpu_quota_contents)) @@ -985,7 +976,7 @@ def __reset_extension_services_cpu_quota(self, services_list): files_to_create.append((drop_in_file_cpu_quota, cpu_quota_contents)) self.__create_all_files(files_to_create) except Exception as exception: - _log_cgroup_warning('Failed to reset CPUQuota for {0} : {1}', service_name, ustr(exception)) + log_cgroup_warning('Failed to reset CPUQuota for {0} : {1}'.format(service_name, ustr(exception))) def remove_extension_services_drop_in_files(self, services_list): """ @@ -1010,7 +1001,7 @@ def remove_extension_services_drop_in_files(self, services_list): files_to_cleanup.append(drop_in_file_cpu_quota) CGroupConfigurator._Impl.__cleanup_all_files(files_to_cleanup) - _log_cgroup_info("Drop in files removed for {0}".format(service_name)) + log_cgroup_info("Drop in files removed for {0}".format(service_name)) def stop_tracking_extension_services_cgroups(self, services_list): """ @@ -1051,10 +1042,10 @@ def get_extension_services_list(): services = resource_limits.get('services') if resource_limits else None extensions_services[extensions_name] = services except (IOError, OSError) as e: - _log_cgroup_warning( + log_cgroup_warning( 'Failed to load manifest file ({0}): {1}'.format(manifest_path, e.strerror)) except ValueError: - _log_cgroup_warning('Malformed manifest file ({0}).'.format(manifest_path)) + log_cgroup_warning('Malformed manifest file ({0}).'.format(manifest_path)) return extensions_services # unique instance for the singleton diff --git a/tests/data/cgroups/hybrid/sys_fs_cgroup_cgroup.controllers b/tests/data/cgroups/hybrid/sys_fs_cgroup_cgroup.controllers new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/data/cgroups/sys_fs_cgroup_unified_cgroup.controllers b/tests/data/cgroups/sys_fs_cgroup_unified_cgroup.controllers deleted file mode 100644 index 2a03d239de..0000000000 --- a/tests/data/cgroups/sys_fs_cgroup_unified_cgroup.controllers +++ /dev/null @@ -1,7 +0,0 @@ -io -memory -pids -perf_event -rdma -cpu -freezer \ No newline at end of file diff --git a/tests/data/cgroups/proc_pid_cgroup b/tests/data/cgroups/v1/proc_pid_cgroup similarity index 100% rename from tests/data/cgroups/proc_pid_cgroup rename to tests/data/cgroups/v1/proc_pid_cgroup diff --git a/tests/data/cgroups/proc_self_cgroup b/tests/data/cgroups/v1/proc_self_cgroup similarity index 100% rename from tests/data/cgroups/proc_self_cgroup rename to tests/data/cgroups/v1/proc_self_cgroup diff --git a/tests/data/cgroups/v2/proc_pid_cgroup b/tests/data/cgroups/v2/proc_pid_cgroup new file mode 100644 index 0000000000..8a1f8d0bed --- /dev/null +++ b/tests/data/cgroups/v2/proc_pid_cgroup @@ -0,0 +1 @@ +0::/system.slice/Microsoft.A.Sample.Extension_1.0.1_aeac05dc-8c24-4542-95f2-a0d6be1c5ba7.scope diff --git a/tests/data/cgroups/v2/proc_self_cgroup b/tests/data/cgroups/v2/proc_self_cgroup new file mode 100644 index 0000000000..0027b4040a --- /dev/null +++ b/tests/data/cgroups/v2/proc_self_cgroup @@ -0,0 +1 @@ +0::/system.slice/walinuxagent.service diff --git a/tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control b/tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control new file mode 100644 index 0000000000..c94e05c420 --- /dev/null +++ b/tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control @@ -0,0 +1 @@ +cpuset cpu io memory pids diff --git a/tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control_empty b/tests/data/cgroups/v2/sys_fs_cgroup_cgroup.subtree_control_empty new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/ga/test_cgroupapi.py b/tests/ga/test_cgroupapi.py index ad8ef80c2c..ec077c90a0 100644 --- a/tests/ga/test_cgroupapi.py +++ b/tests/ga/test_cgroupapi.py @@ -22,14 +22,19 @@ import subprocess import tempfile -from azurelinuxagent.ga.cgroupapi import CGroupsApi, SystemdCgroupsApi +from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \ + InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil -from tests.lib.mock_cgroup_environment import mock_cgroup_environment +from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \ + mock_cgroup_hybrid_environment +from tests.lib.mock_environment import MockCommand from tests.lib.tools import AgentTestCase, patch, mock_sleep from tests.lib.cgroups_tools import CGroupsTools + class _MockedFileSystemTestCase(AgentTestCase): def setUp(self): AgentTestCase.setUp(self) @@ -39,7 +44,7 @@ def setUp(self): os.mkdir(os.path.join(self.cgroups_file_system_root, "cpu")) os.mkdir(os.path.join(self.cgroups_file_system_root, "memory")) - self.mock_cgroups_file_system_root = patch("azurelinuxagent.ga.cgroupapi.CGROUPS_FILE_SYSTEM_ROOT", self.cgroups_file_system_root) + self.mock_cgroups_file_system_root = patch("azurelinuxagent.ga.cgroupapi.CGROUP_FILE_SYSTEM_ROOT", self.cgroups_file_system_root) self.mock_cgroups_file_system_root.start() def tearDown(self): @@ -47,7 +52,7 @@ def tearDown(self): AgentTestCase.tearDown(self) -class CGroupsApiTestCase(_MockedFileSystemTestCase): +class CGroupUtilTestCase(AgentTestCase): def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_and_later_versions(self): test_cases = [ (['ubuntu', '16.04', 'xenial'], True), @@ -76,84 +81,218 @@ def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_an for (distro, supported) in test_cases: with patch("azurelinuxagent.ga.cgroupapi.get_distro", return_value=distro): - self.assertEqual(CGroupsApi.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro)) + self.assertEqual(CGroupUtil.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro)) class SystemdCgroupsApiTestCase(AgentTestCase): - def test_get_systemd_version_should_return_a_version_number(self): - with mock_cgroup_environment(self.tmp_dir): - version_info = systemd.get_version() - found = re.search(r"systemd \d+", version_info) is not None - self.assertTrue(found, "Could not determine the systemd version: {0}".format(version_info)) - - def test_get_cpu_and_memory_mount_points_should_return_the_cgroup_mount_points(self): - with mock_cgroup_environment(self.tmp_dir): - cpu, memory = SystemdCgroupsApi().get_cgroup_mount_points() - self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The mount point for the CPU controller is incorrect") - self.assertEqual(memory, '/sys/fs/cgroup/memory', "The mount point for the memory controller is incorrect") - - def test_get_service_cgroup_paths_should_return_the_cgroup_mount_points(self): - with mock_cgroup_environment(self.tmp_dir): - cpu, memory = SystemdCgroupsApi().get_unit_cgroup_paths("extension.service") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', - "The mount point for the CPU controller is incorrect") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service', - "The mount point for the memory controller is incorrect") + def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self): + with mock_cgroup_v1_environment(self.tmp_dir): + # Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check + with patch("os.path.exists", return_value=False): + with self.assertRaises(InvalidCgroupMountpointException) as context: + get_cgroup_api() + self.assertTrue("Expected cgroup filesystem to be mounted at '/sys/fs/cgroup', but it is not" in str(context.exception)) + + def test_get_cgroup_api_is_v2_when_v2_in_use(self): + with mock_cgroup_v2_environment(self.tmp_dir): + self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv2) + + def test_get_cgroup_api_raises_exception_when_hybrid_in_use_and_controllers_available_in_unified_hierarchy(self): + with mock_cgroup_hybrid_environment(self.tmp_dir): + # Mock /sys/fs/cgroup/unified/cgroup.controllers file to have available controllers + with patch("os.path.exists", return_value=True): + with patch('azurelinuxagent.common.utils.fileutil.read_file', return_value="cpu memory"): + with self.assertRaises(CGroupsException) as context: + get_cgroup_api() + self.assertTrue("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: cpu memory" in str(context.exception)) + + def test_get_cgroup_api_raises_exception_when_v1_in_use_and_controllers_have_non_sytemd_mountpoints(self): + with mock_cgroup_v1_environment(self.tmp_dir): + # Mock /sys/fs/cgroup/unified/cgroup.controllers file to have available controllers + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.are_mountpoints_systemd_created', return_value=False): + with self.assertRaises(InvalidCgroupMountpointException) as context: + get_cgroup_api() + self.assertTrue("Expected cgroup controllers to be mounted at '/sys/fs/cgroup', but at least one is not." in str(context.exception)) + + def test_get_cgroup_api_is_v1_when_v1_in_use(self): + with mock_cgroup_v1_environment(self.tmp_dir): + self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1) + + def test_get_cgroup_api_is_v1_when_hybrid_in_use(self): + with mock_cgroup_hybrid_environment(self.tmp_dir): + # Mock os.path.exists to return True for the os.path.exists('/sys/fs/cgroup/cgroup.controllers') check + with patch("os.path.exists", return_value=True): + self.assertIsInstance(get_cgroup_api(), SystemdCgroupApiv1) + + def test_get_cgroup_api_raises_exception_when_cgroup_mode_cannot_be_determined(self): + unknown_cgroup_type = "unknown_cgroup_type" + with patch('azurelinuxagent.common.utils.shellutil.run_command', return_value=unknown_cgroup_type): + with self.assertRaises(CGroupsException) as context: + get_cgroup_api() + self.assertTrue("/sys/fs/cgroup has an unexpected file type: {0}".format(unknown_cgroup_type) in str(context.exception)) - def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_relative_paths(self): - with mock_cgroup_environment(self.tmp_dir): - cpu, memory = SystemdCgroupsApi.get_process_cgroup_relative_paths('self') - self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") - self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") - - def test_get_cgroup2_controllers_should_return_the_v2_cgroup_controllers(self): - with mock_cgroup_environment(self.tmp_dir): - mount_point, controllers = SystemdCgroupsApi.get_cgroup2_controllers() - - self.assertEqual(mount_point, "/sys/fs/cgroup/unified", "Invalid mount point for V2 cgroups") - self.assertIn("cpu", controllers, "The CPU controller is not in the list of V2 controllers") - self.assertIn("memory", controllers, "The memory controller is not in the list of V2 controllers") + def test_get_systemd_version_should_return_a_version_number(self): + # We expect same behavior for v1 and v2 + mock_envs = [mock_cgroup_v1_environment(self.tmp_dir), mock_cgroup_v2_environment(self.tmp_dir)] + for env in mock_envs: + with env: + version_info = systemd.get_version() + found = re.search(r"systemd \d+", version_info) is not None + self.assertTrue(found, "Could not determine the systemd version: {0}".format(version_info)) def test_get_unit_property_should_return_the_value_of_the_given_property(self): - with mock_cgroup_environment(self.tmp_dir): - cpu_accounting = systemd.get_unit_property("walinuxagent.service", "CPUAccounting") - - self.assertEqual(cpu_accounting, "no", "Property {0} of {1} is incorrect".format("CPUAccounting", "walinuxagent.service")) - - def assert_cgroups_created(self, extension_cgroups): - self.assertEqual(len(extension_cgroups), 2, - 'start_extension_command did not return the expected number of cgroups') + # We expect same behavior for v1 and v2 + mock_envs = [mock_cgroup_v1_environment(self.tmp_dir), mock_cgroup_v2_environment(self.tmp_dir)] + for env in mock_envs: + with env: + cpu_accounting = systemd.get_unit_property("walinuxagent.service", "CPUAccounting") - cpu_found = memory_found = False + self.assertEqual(cpu_accounting, "no", "Property {0} of {1} is incorrect".format("CPUAccounting", "walinuxagent.service")) - for cgroup in extension_cgroups: - match = re.match( - r'^/sys/fs/cgroup/(cpu|memory)/system.slice/Microsoft.Compute.TestExtension_1\.2\.3\_([a-f0-9-]+)\.scope$', - cgroup.path) - self.assertTrue(match is not None, "Unexpected path for cgroup: {0}".format(cgroup.path)) +class SystemdCgroupsApiv1TestCase(AgentTestCase): + def test_get_unit_cgroup_paths_should_return_the_cgroup_v1_mount_points(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', + "The mount point for the CPU controller is incorrect") + self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service', + "The mount point for the memory controller is incorrect") - if match.group(1) == 'cpu': - cpu_found = True - if match.group(1) == 'memory': - memory_found = True + def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', + "The mount point for the CPU controller is incorrect") + self.assertIsNone(memory, + "The mount point for the memory controller is None so unit cgroup should be None") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertIsNone(cpu, "The mount point for the cpu controller is None so unit cgroup should be None") + self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service', + "The mount point for the memory controller is incorrect") + + def test_get_process_cgroup_paths_should_return_the_cgroup_v1_mount_points(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', + "The mount point for the CPU controller is incorrect") + self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', + "The mount point for the memory controller is incorrect") - self.assertTrue(cpu_found, 'start_extension_command did not return a cpu cgroup') - self.assertTrue(memory_found, 'start_extension_command did not return a memory cgroup') + def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', + "The mount point for the CPU controller is incorrect") + self.assertIsNone(memory, + "The mount point for the memory controller is None so unit cgroup should be None") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIsNone(cpu, "The mount point for the CPU controller is None so unit cgroup should be None") + self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', + "The mount point for the memory controller is incorrect") + + def test_get_process_cgroup_v1_path_should_return_None_if_either_relative_path_is_None(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=('system.slice/walinuxagent.service', None)): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', + "The mount point for the CPU controller is incorrect") + self.assertIsNone(memory, + "The relative cgroup path for the memory controller is None so unit cgroup should be None") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=(None, 'system.slice/walinuxagent.service')): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None") + self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', + "The mount point for the memory controller is incorrect") + + def test_get_controller_root_paths_should_return_the_cgroup_v1_controller_mount_points(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the CPU controller is incorrect") + self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect") + + def test_get_controller_root_paths_should_return_None_if_either_controller_not_mounted(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory', 'io': '/sys/fs/cgroup/io'}): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertIsNone(cpu, "The CPU controller is mot mounted, so the cpu controller path should be None") + self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'io': '/sys/fs/cgroup/io'}): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertIsNone(memory, "The memory controller is mot mounted, so the memory controller path should be None") + self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the cpu controller is incorrect") + + def test_get_controller_mountpoints_should_return_all_controller_mount_points(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup_api = get_cgroup_api() + # Expected value comes from findmnt output in the mocked environment + self.assertEqual(cgroup_api._get_controller_mountpoints(), { + 'systemd': '/sys/fs/cgroup/systemd', + 'devices': '/sys/fs/cgroup/devices', + 'rdma': '/sys/fs/cgroup/rdma', + 'perf_event': '/sys/fs/cgroup/perf_event', + 'net_cls,net_prio': '/sys/fs/cgroup/net_cls,net_prio', + 'blkio': '/sys/fs/cgroup/blkio', + 'cpuset': '/sys/fs/cgroup/cpuset', + 'misc': '/sys/fs/cgroup/misc', + 'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', + 'memory': '/sys/fs/cgroup/memory', + 'freezer': '/sys/fs/cgroup/freezer', + 'hugetlb': '/sys/fs/cgroup/hugetlb', + 'pids': '/sys/fs/cgroup/pids', + }, "The controller mountpoints are not correct") + + def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + # are_mountpoints_systemd_created should only check controllers which are mounted + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}): + self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') + self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") + self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") @patch('time.sleep', side_effect=lambda _: mock_sleep()) - def test_start_extension_command_should_return_the_command_output(self, _): - original_popen = subprocess.Popen + def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _): + with mock_cgroup_v1_environment(self.tmp_dir): + original_popen = subprocess.Popen - def mock_popen(command, *args, **kwargs): - if command.startswith('systemd-run --property'): - command = "echo TEST_OUTPUT" - return original_popen(command, *args, **kwargs) + def mock_popen(command, *args, **kwargs): + if isinstance(command, str) and command.startswith('systemd-run --property'): + command = "echo TEST_OUTPUT" + return original_popen(command, *args, **kwargs) - with mock_cgroup_environment(self.tmp_dir): with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as output_file: - with patch("subprocess.Popen", side_effect=mock_popen) as popen_patch: # pylint: disable=unused-variable - command_output = SystemdCgroupsApi().start_extension_command( + with patch("subprocess.Popen", + side_effect=mock_popen) as popen_patch: # pylint: disable=unused-variable + command_output = get_cgroup_api().start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="A_TEST_COMMAND", cmd_name="test", @@ -167,9 +306,9 @@ def mock_popen(command, *args, **kwargs): self.assertIn("[stdout]\nTEST_OUTPUT\n", command_output, "The test output was not captured") @patch('time.sleep', side_effect=lambda _: mock_sleep()) - def test_start_extension_command_should_execute_the_command_in_a_cgroup(self, _): - with mock_cgroup_environment(self.tmp_dir): - SystemdCgroupsApi().start_extension_command( + def test_start_extension_cgroups_v1_command_should_execute_the_command_in_a_cgroup(self, _): + with mock_cgroup_v1_environment(self.tmp_dir): + get_cgroup_api().start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="test command", cmd_name="test", @@ -183,18 +322,20 @@ def test_start_extension_command_should_execute_the_command_in_a_cgroup(self, _) tracked = CGroupsTelemetry._tracked self.assertTrue( - any(cg for cg in tracked.values() if cg.name == 'Microsoft.Compute.TestExtension-1.2.3' and 'cpu' in cg.path), + any(cg for cg in tracked.values() if + cg.name == 'Microsoft.Compute.TestExtension-1.2.3' and 'cpu' in cg.path), "The extension's CPU is not being tracked") self.assertTrue( - any(cg for cg in tracked.values() if cg.name == 'Microsoft.Compute.TestExtension-1.2.3' and 'memory' in cg.path), + any(cg for cg in tracked.values() if + cg.name == 'Microsoft.Compute.TestExtension-1.2.3' and 'memory' in cg.path), "The extension's Memory is not being tracked") @patch('time.sleep', side_effect=lambda _: mock_sleep()) - def test_start_extension_command_should_use_systemd_to_execute_the_command(self, _): - with mock_cgroup_environment(self.tmp_dir): + def test_start_extension_cgroups_v1_command_should_use_systemd_to_execute_the_command(self, _): + with mock_cgroup_v1_environment(self.tmp_dir): with patch("subprocess.Popen", wraps=subprocess.Popen) as popen_patch: - SystemdCgroupsApi().start_extension_command( + get_cgroup_api().start_extension_command( extension_name="Microsoft.Compute.TestExtension-1.2.3", command="the-test-extension-command", cmd_name="test", @@ -205,12 +346,127 @@ def test_start_extension_command_should_use_systemd_to_execute_the_command(self, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if "the-test-extension-command" in args[0]] + extension_calls = [args[0] for (args, _) in popen_patch.call_args_list if + "the-test-extension-command" in args[0]] self.assertEqual(1, len(extension_calls), "The extension should have been invoked exactly once") self.assertIn("systemd-run", extension_calls[0], "The extension should have been invoked using systemd") +class SystemdCgroupsApiv2TestCase(AgentTestCase): + def test_get_controllers_enabled_at_root_should_return_list_of_enabled_controllers(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup'), ['cpuset', 'cpu', 'io', 'memory', 'pids']) + + def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_None(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None): + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._controllers_enabled_at_root, []) + + def test_get_root_cgroup_path_should_return_v2_cgroup_root(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup') + + def test_get_root_cgroup_path_should_only_match_systemd_mountpoint(self): + with mock_cgroup_v2_environment(self.tmp_dir) as env: + # Mock an environment which has multiple v2 mountpoints + env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$", +'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime +/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime +/custom/mountpoint/path2 none cgroup2 rw,relatime +''')) + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup') + + def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertEqual(cpu, '/sys/fs/cgroup/system.slice/extension.service', + "The cgroup path for the CPU controller is incorrect") + self.assertEqual(memory, '/sys/fs/cgroup/system.slice/extension.service', + "The cgroup path for the memory controller is incorrect") + + def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertIn(cpu, '/sys/fs/cgroup/system.slice/extension.service', + "The cgroup path for the CPU controller is incorrect") + self.assertIsNone(memory, + "The cgroup path for the memory controller is None so unit cgroup should be None") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): + cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") + self.assertIsNone(cpu, "The cgroup path for the cpu controller is None so unit cgroup should be None") + self.assertIn(memory, '/sys/fs/cgroup/system.slice/extension.service', + "The cgroup path for the memory controller is incorrect") + + def test_get_process_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', + "The cgroup path for the CPU controller is incorrect") + self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service', + "The cgroup path for the memory controller is incorrect") + + def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', + "The cgroup path for the CPU controller is incorrect") + self.assertIsNone(memory, + "The cgroup path for the memory controller is None so unit cgroup should be None") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIsNone(cpu, "The cgroup path for the CPU controller is None so unit cgroup should be None") + self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service', + "The cgroup path for the memory controller is incorrect") + + def test_get_process_cgroup_v2_path_should_return_None_if_relative_path_is_None(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup_relative_paths', return_value=(None, None)): + cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") + self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None") + self.assertIsNone(memory, + "The relative cgroup path for the memory controller is None so unit cgroup should be None") + + def test_get_controller_root_paths_should_return_the_cgroup_v2_root_cgroup_path(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect") + self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") + + def test_get_controller_root_paths_should_return_None_if_root_cgroup_path_is_None(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertIsNone(cpu, "The root cgroup path is None, so the CPU controller path should be None") + self.assertIsNone(memory, "The root cgroup path is None, so the memory controller path should be None") + + def test_get_controller_root_paths_should_return_None_if_either_controller_not_enabled(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['io', 'memory']): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertIsNone(cpu, "The CPU controller is not enabled, so the CPU controller path should be None") + self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu', 'io']): + cpu, memory = get_cgroup_api().get_controller_root_paths() + self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect") + self.assertIsNone(memory, "The memory controller is not enabled, so the memory controller path should be None") + + def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v2_relative_paths(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') + self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") + self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") + + class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase): def test_cleanup_legacy_cgroups_should_remove_legacy_cgroups(self): # Set up a mock /var/run/waagent.pid file @@ -222,7 +478,7 @@ def test_cleanup_legacy_cgroups_should_remove_legacy_cgroups(self): legacy_memory_cgroup = CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", '') with patch("azurelinuxagent.ga.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file): - legacy_cgroups = SystemdCgroupsApi().cleanup_legacy_cgroups() + legacy_cgroups = CGroupUtil.cleanup_legacy_cgroups() self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups") self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup") diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 841a4b72d0..5b4b0976e9 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -35,7 +35,7 @@ from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils import shellutil, fileutil from tests.lib.mock_environment import MockCommand -from tests.lib.mock_cgroup_environment import mock_cgroup_environment, UnitFilePaths +from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, UnitFilePaths, mock_cgroup_v2_environment from tests.lib.tools import AgentTestCase, patch, mock_sleep, data_dir, is_python_version_26_or_34, skip_if_predicate_true from tests.lib.miscellaneous_tools import format_processes, wait_for @@ -55,7 +55,7 @@ def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=N CGroupConfigurator._instance = None configurator = CGroupConfigurator.get_instance() CGroupsTelemetry.reset() - with mock_cgroup_environment(self.tmp_dir) as mock_environment: + with mock_cgroup_v1_environment(self.tmp_dir) as mock_environment: if mock_commands is not None: for command in mock_commands: mock_environment.add_command(command) @@ -68,10 +68,41 @@ def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=N configurator.initialize() yield configurator - def test_initialize_should_enable_cgroups(self): + @contextlib.contextmanager + def _get_cgroup_configurator_v2(self, initialize=True, enable=True, mock_commands=None): + CGroupConfigurator._instance = None + configurator = CGroupConfigurator.get_instance() + CGroupsTelemetry.reset() + with mock_cgroup_v2_environment(self.tmp_dir) as mock_environment: + if mock_commands is not None: + for command in mock_commands: + mock_environment.add_command(command) + configurator.mocks = mock_environment + if initialize: + if not enable: + with patch.object(configurator, "enable"): + configurator.initialize() + else: + configurator.initialize() + yield configurator + + def test_initialize_should_enable_cgroups_v1(self): with self._get_cgroup_configurator() as configurator: self.assertTrue(configurator.enabled(), "cgroups were not enabled") + def test_initialize_should_not_enable_cgroups_v2(self): + with self._get_cgroup_configurator_v2() as configurator: + self.assertFalse(configurator.enabled(), "cgroups were enabled") + + def test_initialize_should_not_enable_when_cgroup_api_cannot_be_determined(self): + # Mock cgroup api to raise CGroupsException + def mock_get_cgroup_api(): + raise CGroupsException("") + + with patch('azurelinuxagent.ga.cgroupconfigurator.get_cgroup_api', side_effect=mock_get_cgroup_api): + with self._get_cgroup_configurator() as configurator: + self.assertFalse(configurator.enabled(), "cgroups were enabled") + def test_initialize_should_start_tracking_the_agent_cgroups(self): with self._get_cgroup_configurator() as configurator: tracked = CGroupsTelemetry._tracked @@ -83,18 +114,18 @@ def test_initialize_should_start_tracking_the_agent_cgroups(self): "The Agent's Memory is not being tracked. Tracked: {0}".format(tracked)) def test_initialize_should_start_tracking_other_controllers_when_one_is_not_present(self): - command_mocks = [MockCommand(r"^mount -t cgroup$", -'''cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) -cgroup on /sys/fs/cgroup/rdma type cgroup (rw,nosuid,nodev,noexec,relatime,rdma) -cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) -cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) -cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) -cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) -cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) -cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) -cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) -cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) -cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) + command_mocks = [MockCommand(r"^findmnt -t cgroup --noheadings$", +'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd +/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices +/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma +/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event +/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio +/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio +/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset +/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct +/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer +/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb +/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids ''')] with self._get_cgroup_configurator(mock_commands=command_mocks) as configurator: tracked = CGroupsTelemetry._tracked @@ -104,17 +135,17 @@ def test_initialize_should_start_tracking_other_controllers_when_one_is_not_pres "The Agent's memory should not be tracked. Tracked: {0}".format(tracked)) def test_initialize_should_not_enable_cgroups_when_the_cpu_and_memory_controllers_are_not_present(self): - command_mocks = [MockCommand(r"^mount -t cgroup$", -'''cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) -cgroup on /sys/fs/cgroup/rdma type cgroup (rw,nosuid,nodev,noexec,relatime,rdma) -cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) -cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) -cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) -cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) -cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) -cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) -cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) -cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) + command_mocks = [MockCommand(r"^findmnt -t cgroup --noheadings$", +'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd +/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices +/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma +/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event +/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio +/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio +/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset +/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer +/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb +/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids ''')] with self._get_cgroup_configurator(mock_commands=command_mocks) as configurator: tracked = CGroupsTelemetry._tracked @@ -123,17 +154,17 @@ def test_initialize_should_not_enable_cgroups_when_the_cpu_and_memory_controller self.assertEqual(len(tracked), 0, "No cgroups should be tracked. Tracked: {0}".format(tracked)) def test_initialize_should_not_enable_cgroups_when_the_agent_is_not_in_the_system_slice(self): - command_mocks = [MockCommand(r"^mount -t cgroup$", -'''cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) -cgroup on /sys/fs/cgroup/rdma type cgroup (rw,nosuid,nodev,noexec,relatime,rdma) -cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) -cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) -cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) -cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) -cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) -cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) -cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) -cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) + command_mocks = [MockCommand(r"^findmnt -t cgroup --noheadings$", +'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd* +/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices +/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma +/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event +/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio +/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio +/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset +/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer +/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb +/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids ''')] with self._get_cgroup_configurator(mock_commands=command_mocks) as configurator: @@ -290,6 +321,17 @@ def test_enable_should_not_track_throttled_time_when_setting_the_cpu_quota_fails self.assertFalse(CGroupsTelemetry.get_track_throttled_time(), "Throttle time should not be tracked") + def test_enable_should_not_track_throttled_time_when_cgroups_v2_enabled(self): + with self._get_cgroup_configurator_v2(initialize=False) as configurator: + if CGroupsTelemetry.get_track_throttled_time(): + raise Exception("Test setup should not start tracking Throttle Time") + + configurator.mocks.add_file(UnitFilePaths.cpu_quota, Exception("A TEST EXCEPTION")) + + configurator.initialize() + + self.assertFalse(CGroupsTelemetry.get_track_throttled_time(), "Throttle time should not be tracked when using cgroups v2") + def test_disable_should_reset_cpu_quota(self): with self._get_cgroup_configurator() as configurator: if len(CGroupsTelemetry._tracked) == 0: @@ -380,7 +422,7 @@ def test_start_extension_command_should_not_use_systemd_when_cgroups_are_not_ena self.assertEqual(command_calls[0], "date", "The command line should not have been modified") @patch('time.sleep', side_effect=lambda _: mock_sleep()) - def test_start_extension_command_should_use_systemd_run_when_cgroups_are_enabled(self, _): + def test_start_extension_command_should_use_systemd_run_when_cgroups_v1_are_enabled(self, _): with self._get_cgroup_configurator() as configurator: with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as popen_patch: configurator.start_extension_command( @@ -448,6 +490,34 @@ def mock_popen(command_arg, *args, **kwargs): self.assertIn("A TEST EXCEPTION", str(context_manager.exception)) + @patch('time.sleep', side_effect=lambda _: mock_sleep()) + def test_start_extension_command_should_not_use_systemd_when_cgroup_v2_enabled(self, _): + with self._get_cgroup_configurator_v2() as configurator: + self.assertFalse(configurator.enabled()) + + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.start_extension_command") as v2_extension_start_command: + with patch("azurelinuxagent.ga.cgroupapi.subprocess.Popen", wraps=subprocess.Popen) as patcher: + configurator.start_extension_command( + extension_name="Microsoft.Compute.TestExtension-1.2.3", + command="date", + cmd_name="test", + timeout=300, + shell=False, + cwd=self.tmp_dir, + env={}, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + command_calls = [args[0] for args, _ in patcher.call_args_list if + len(args) > 0 and "date" in args[0]] + self.assertFalse(v2_extension_start_command.called) + self.assertEqual(len(command_calls), 1, + "The test command should have been called exactly once [{0}]".format( + command_calls)) + self.assertNotIn("systemd-run", command_calls[0], + "The command should not have been invoked using systemd") + self.assertEqual(command_calls[0], "date", "The command line should not have been modified") + @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_directly_if_systemd_fails(self, _): with self._get_cgroup_configurator() as configurator: @@ -455,7 +525,7 @@ def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_d configurator.mocks.add_command(MockCommand("systemd-run", return_value=1, stdout='', stderr='Failed to start transient scope unit: syntax error')) with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as output_file: - with patch("azurelinuxagent.ga.cgroupconfigurator.add_event") as mock_add_event: + with patch("azurelinuxagent.ga.cgroupapi.add_event") as mock_add_event: with patch("subprocess.Popen", wraps=subprocess.Popen) as popen_patch: CGroupsTelemetry.reset() @@ -841,7 +911,7 @@ def get_completed_process(): agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid] other_processes = [1, get_completed_process()] + extension_processes - with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupsApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): + with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): with self.assertRaises(CGroupsException) as context_manager: configurator._check_processes_in_agent_cgroup() @@ -885,7 +955,7 @@ def test_check_cgroups_should_disable_cgroups_when_a_check_fails(self): patchers.append(p) p.start() - with patch("azurelinuxagent.ga.cgroupconfigurator.add_event") as add_event: + with patch("azurelinuxagent.ga.cgroupapi.add_event") as add_event: configurator.enable() tracked_metrics = [ @@ -910,7 +980,7 @@ def test_check_cgroups_should_disable_cgroups_when_a_check_fails(self): p.stop() @patch('azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl._check_processes_in_agent_cgroup', side_effect=CGroupsException("Test")) - @patch('azurelinuxagent.ga.cgroupconfigurator.add_event') + @patch('azurelinuxagent.ga.cgroupapi.add_event') def test_agent_should_not_enable_cgroups_if_unexpected_process_already_in_agent_cgroups(self, add_event, _): command_mocks = [MockCommand(r"^systemctl show walinuxagent\.service --property Slice", '''Slice=azure.slice diff --git a/tests/ga/test_cgroupconfigurator_sudo.py b/tests/ga/test_cgroupconfigurator_sudo.py index 30db194086..14b544f5b4 100644 --- a/tests/ga/test_cgroupconfigurator_sudo.py +++ b/tests/ga/test_cgroupconfigurator_sudo.py @@ -25,7 +25,7 @@ from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.future import ustr -from tests.lib.mock_cgroup_environment import mock_cgroup_environment +from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment from tests.lib.tools import AgentTestCase, patch, mock_sleep, i_am_root, is_python_version_26_or_34, skip_if_predicate_true @@ -40,7 +40,7 @@ def _get_cgroup_configurator(self, initialize=True, enable=True, mock_commands=N CGroupConfigurator._instance = None configurator = CGroupConfigurator.get_instance() CGroupsTelemetry.reset() - with mock_cgroup_environment(self.tmp_dir) as mock_environment: + with mock_cgroup_v1_environment(self.tmp_dir) as mock_environment: if mock_commands is not None: for command in mock_commands: mock_environment.add_command(command) @@ -139,7 +139,7 @@ def test_start_extension_command_should_not_use_fallback_option_if_extension_tim with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: with patch("azurelinuxagent.ga.extensionprocessutil.wait_for_process_completion_or_timeout", return_value=[True, None, 0]): - with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupsApi._is_systemd_failure", + with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi._is_systemd_failure", return_value=False): with self.assertRaises(ExtensionError) as context_manager: configurator.start_extension_command( diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index c257cefed0..58d58505bd 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1955,7 +1955,7 @@ def iterator(*_, **__): with patch('azurelinuxagent.ga.remoteaccess.get_remote_access_handler'): with patch('azurelinuxagent.ga.agent_update_handler.get_agent_update_handler'): with patch('azurelinuxagent.ga.update.initialize_event_logger_vminfo_common_parameters'): - with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=False): # skip all cgroup stuff + with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=False): # skip all cgroup stuff with patch('azurelinuxagent.ga.update.is_log_collection_allowed', return_value=True): with patch('time.sleep'): with patch('sys.exit'): diff --git a/tests/lib/cgroups_tools.py b/tests/lib/cgroups_tools.py index 45b8174474..cb29ee9bfc 100644 --- a/tests/lib/cgroups_tools.py +++ b/tests/lib/cgroups_tools.py @@ -33,17 +33,3 @@ def create_legacy_agent_cgroup(cgroups_file_system_root, controller, daemon_pid) fileutil.append_file(os.path.join(legacy_cgroup, "cgroup.procs"), daemon_pid + "\n") return legacy_cgroup - @staticmethod - def create_agent_cgroup(cgroups_file_system_root, controller, extension_handler_pid): - """ - Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent; - starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. - - This method creates a mock cgroup using the newer path and adds the given PID to it. - """ - new_cgroup = os.path.join(cgroups_file_system_root, controller, "walinuxagent.service") - if not os.path.exists(new_cgroup): - os.makedirs(new_cgroup) - fileutil.append_file(os.path.join(new_cgroup, "cgroup.procs"), extension_handler_pid + "\n") - return new_cgroup - diff --git a/tests/lib/mock_cgroup_environment.py b/tests/lib/mock_cgroup_environment.py index 3b51dce8fe..d9f79cb6a1 100644 --- a/tests/lib/mock_cgroup_environment.py +++ b/tests/lib/mock_cgroup_environment.py @@ -20,29 +20,11 @@ from tests.lib.tools import patch, data_dir from tests.lib.mock_environment import MockEnvironment, MockCommand -_MOCKED_COMMANDS = [ +# Mocked commands which are common between v1, v2, and hybrid cgroup environments +_MOCKED_COMMANDS_COMMON = [ MockCommand(r"^systemctl --version$", '''systemd 237 +PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid -'''), - - MockCommand(r"^mount -t cgroup$", -'''cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,name=systemd) -cgroup on /sys/fs/cgroup/rdma type cgroup (rw,nosuid,nodev,noexec,relatime,rdma) -cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset) -cgroup on /sys/fs/cgroup/net_cls,net_prio type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls,net_prio) -cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event) -cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb) -cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer) -cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory) -cgroup on /sys/fs/cgroup/pids type cgroup (rw,nosuid,nodev,noexec,relatime,pids) -cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices) -cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpu,cpuacct) -cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio) -'''), - - MockCommand(r"^mount -t cgroup2$", -'''cgroup on /sys/fs/cgroup/unified type cgroup2 (rw,nosuid,nodev,noexec,relatime) '''), MockCommand(r"^systemctl show walinuxagent\.service --property Slice", @@ -77,10 +59,84 @@ ] -_MOCKED_FILES = [ - ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'proc_self_cgroup')), - (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'proc_pid_cgroup')), - ("/sys/fs/cgroup/unified/cgroup.controllers", os.path.join(data_dir, 'cgroups', 'sys_fs_cgroup_unified_cgroup.controllers')) +_MOCKED_COMMANDS_V1 = [ + MockCommand(r"^findmnt -t cgroup --noheadings$", +'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd +/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices +/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma +/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event +/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio +/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio +/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset +/sys/fs/cgroup/misc cgroup cgroup rw,nosuid,nodev,noexec,relatime,misc +/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct +/sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory +/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer +/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb +/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids +'''), + + MockCommand(r"^findmnt -t cgroup2 --noheadings$", ''), + + MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'tmpfs'), + +] + +_MOCKED_COMMANDS_V2 = [ + MockCommand(r"^findmnt -t cgroup2 --noheadings$", +'''/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot +'''), + + MockCommand(r"^findmnt -t cgroup --noheadings$", ''), + + MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'cgroup2fs'), + +] + +_MOCKED_COMMANDS_HYBRID = [ + MockCommand(r"^findmnt -t cgroup --noheadings$", +'''/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd +/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices +/sys/fs/cgroup/rdma cgroup cgroup rw,nosuid,nodev,noexec,relatime,rdma +/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event +/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_cls,net_prio +/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio +/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset +/sys/fs/cgroup/misc cgroup cgroup rw,nosuid,nodev,noexec,relatime,misc +/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct +/sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory +/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer +/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb +/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids +'''), + + MockCommand(r"^findmnt -t cgroup2 --noheadings$", +'''/sys/fs/cgroup/unified cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate +'''), + + MockCommand(r"^stat -f --format=%T /sys/fs/cgroup$", 'tmpfs'), + + MockCommand(r"^stat -f --format=%T /sys/fs/cgroup/unified$", 'cgroup2fs'), + +] + +_MOCKED_FILES_V1 = [ + ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')), + (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')) +] + +_MOCKED_FILES_V2 = [ + ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_self_cgroup')), + (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')), + ("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), + ("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), + ("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')) +] + +_MOCKED_FILES_HYBRID = [ + ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')), + (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')), + ("/sys/fs/cgroup/unified/cgroup.controllers", os.path.join(data_dir, 'cgroups', 'hybrid', 'sys_fs_cgroup_cgroup.controllers')) ] _MOCKED_PATHS = [ @@ -106,18 +162,56 @@ class UnitFilePaths: @contextlib.contextmanager -def mock_cgroup_environment(tmp_dir): +def mock_cgroup_v1_environment(tmp_dir): + """ + Creates a mock environment for cgroup v1 hierarchy used by the tests related to cgroups (currently it only + provides support for systemd platforms). + The command output used in __MOCKED_COMMANDS comes from an Ubuntu 20 system. + """ + data_files = [ + (os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent), + (os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure), + (os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions) + ] + + with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True): + with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True): + with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_V1, paths=_MOCKED_PATHS, files=_MOCKED_FILES_V1, data_files=data_files) as mock: + yield mock + + +@contextlib.contextmanager +def mock_cgroup_v2_environment(tmp_dir): + """ + Creates a mock environment for cgroup v2 hierarchy used by the tests related to cgroups (currently it only + provides support for systemd platforms). + The command output used in __MOCKED_COMMANDS comes from an Ubuntu 22 system. + """ + data_files = [ + (os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent), + (os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure), + (os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions) + ] + + with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True): + with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True): + with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_V2, paths=_MOCKED_PATHS, files=_MOCKED_FILES_V2, data_files=data_files) as mock: + yield mock + + +@contextlib.contextmanager +def mock_cgroup_hybrid_environment(tmp_dir): + """ + Creates a mock environment for cgroup hybrid hierarchy used by the tests related to cgroups (currently it only + provides support for systemd platforms). """ - Creates a mocks environment used by the tests related to cgroups (currently it only provides support for systemd platforms). - The command output used in __MOCKED_COMMANDS comes from an Ubuntu 18 system. - """ data_files = [ (os.path.join(data_dir, 'init', 'walinuxagent.service'), UnitFilePaths.walinuxagent), (os.path.join(data_dir, 'init', 'azure.slice'), UnitFilePaths.azure), (os.path.join(data_dir, 'init', 'azure-vmextensions.slice'), UnitFilePaths.vmextensions) ] - with patch('azurelinuxagent.ga.cgroupapi.CGroupsApi.cgroups_supported', return_value=True): + with patch('azurelinuxagent.ga.cgroupapi.CGroupUtil.cgroups_supported', return_value=True): with patch('azurelinuxagent.common.osutil.systemd.is_systemd', return_value=True): - with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS, paths=_MOCKED_PATHS, files=_MOCKED_FILES, data_files=data_files) as mock: + with MockEnvironment(tmp_dir, commands=_MOCKED_COMMANDS_COMMON + _MOCKED_COMMANDS_HYBRID, paths=_MOCKED_PATHS, files=_MOCKED_FILES_HYBRID, data_files=data_files) as mock: yield mock diff --git a/tests/test_agent.py b/tests/test_agent.py index cbf223aa52..4b643ca36f 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -21,10 +21,12 @@ from azurelinuxagent.agent import parse_args, Agent, usage, AgentCommands from azurelinuxagent.common import conf +from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator -from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException from azurelinuxagent.ga.collect_logs import CollectLogsHandler +from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment from tests.lib.tools import AgentTestCase, data_dir, Mock, patch EXPECTED_CONFIGURATION = \ @@ -240,46 +242,131 @@ def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): self.assertFalse(full_mode) @patch("azurelinuxagent.agent.LogCollector") - def test_calls_collect_logs_on_valid_cgroups(self, mock_log_collector): + def test_calls_collect_logs_on_valid_cgroups_v1(self, mock_log_collector): try: CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() + # Mock cgroup paths so process is in the log collector slice def mock_cgroup_paths(*args, **kwargs): if args and args[0] == "self": relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) - return (cgroupconfigurator.LOGCOLLECTOR_SLICE, relative_path) - return SystemdCgroupsApi.get_process_cgroup_relative_paths(*args, **kwargs) + return (relative_path, relative_path) + return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) - with patch("azurelinuxagent.agent.SystemdCgroupsApi.get_process_cgroup_paths", side_effect=mock_cgroup_paths): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", + side_effect=mock_cgroup_paths): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + agent.collect_logs(is_full_mode=True) + + mock_log_collector.assert_called_once() + + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + + @patch("azurelinuxagent.agent.LogCollector") + def test_doesnt_call_collect_logs_when_cgroup_api_cannot_be_determined(self, mock_log_collector): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.run = Mock() + + # Mock cgroup api to raise CGroupsException + def mock_get_cgroup_api(): + raise CGroupsException("") + + def raise_on_sys_exit(*args): + raise RuntimeError(args[0] if args else "Exiting") + + with patch("azurelinuxagent.agent.get_cgroup_api", side_effect=mock_get_cgroup_api): agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) - agent.collect_logs(is_full_mode=True) - - mock_log_collector.assert_called_once() + + with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: + try: + agent.collect_logs(is_full_mode=True) + except RuntimeError as re: + self.assertEqual(logcollector.INVALID_CGROUPS_ERRCODE, re.args[0]) + mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) finally: CollectLogsHandler.disable_monitor_cgroups_check() @patch("azurelinuxagent.agent.LogCollector") - def test_doesnt_call_collect_logs_on_invalid_cgroups(self, mock_log_collector): + def test_doesnt_call_collect_logs_on_invalid_cgroups_v1(self, mock_log_collector): try: CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() + # Mock cgroup paths so process is in incorrect slice def mock_cgroup_paths(*args, **kwargs): if args and args[0] == "self": return ("NOT_THE_CORRECT_PATH", "NOT_THE_CORRECT_PATH") - return SystemdCgroupsApi.get_process_cgroup_relative_paths(*args, **kwargs) + return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) + + def raise_on_sys_exit(*args): + raise RuntimeError(args[0] if args else "Exiting") + + with mock_cgroup_v1_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", side_effect=mock_cgroup_paths): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) - with patch("azurelinuxagent.agent.SystemdCgroupsApi.get_process_cgroup_paths", side_effect=mock_cgroup_paths): + with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: + try: + agent.collect_logs(is_full_mode=True) + except RuntimeError as re: + self.assertEqual(logcollector.INVALID_CGROUPS_ERRCODE, re.args[0]) + mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + + @patch('azurelinuxagent.agent.get_cgroup_api', side_effect=InvalidCgroupMountpointException("Test")) + @patch("azurelinuxagent.agent.LogCollector") + def test_doesnt_call_collect_logs_on_non_systemd_cgroups_v1_mountpoints(self, mock_log_collector, _): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.run = Mock() + + def raise_on_sys_exit(*args): + raise RuntimeError(args[0] if args else "Exiting") + + with mock_cgroup_v1_environment(self.tmp_dir): agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) - exit_error = RuntimeError("Exiting") - with patch("sys.exit", return_value=exit_error) as mock_exit: + with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: try: agent.collect_logs(is_full_mode=True) except RuntimeError as re: + self.assertEqual(logcollector.INVALID_CGROUPS_ERRCODE, re.args[0]) + mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + + @patch("azurelinuxagent.agent.LogCollector") + def test_doesnt_call_collect_logs_if_either_controller_not_mounted(self, mock_log_collector): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.run = Mock() + + # Mock cgroup paths so process is in the log collector slice and cpu is not mounted + def mock_cgroup_paths(*args, **kwargs): + if args and args[0] == "self": + relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) + return (None, relative_path) + return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) + + def raise_on_sys_exit(*args): + raise RuntimeError(args[0] if args else "Exiting") + + with mock_cgroup_v1_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", + side_effect=mock_cgroup_paths): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + + with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: + try: + agent.collect_logs(is_full_mode=True) + except RuntimeError as re: + self.assertEqual(logcollector.INVALID_CGROUPS_ERRCODE, re.args[0]) mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) - self.assertEqual(exit_error, re) finally: CollectLogsHandler.disable_monitor_cgroups_check() diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index 9365e46162..dfed709afe 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -53,6 +53,7 @@ variable: - no_outbound_connections - publish_hostname - recover_network_interface + - cgroup_v2_disabled # # Additional arguments pass to the test suites diff --git a/tests_e2e/test_suites/cgroup_v2_disabled.yml b/tests_e2e/test_suites/cgroup_v2_disabled.yml new file mode 100644 index 0000000000..cf25ecdcfc --- /dev/null +++ b/tests_e2e/test_suites/cgroup_v2_disabled.yml @@ -0,0 +1,10 @@ +# +# The test suite verifies that the agent does not enable resource enforcement and monitoring on machines which are +# using cgroup v2. This suite will be removed once cgroup v2 is supported. +# +name: "Cgroupv2Disabled" +tests: + - "cgroup_v2_disabled/cgroup_v2_disabled.py" +images: + - "ubuntu_2204" + - "ubuntu_2404" \ No newline at end of file diff --git a/tests_e2e/tests/cgroup_v2_disabled/cgroup_v2_disabled.py b/tests_e2e/tests/cgroup_v2_disabled/cgroup_v2_disabled.py new file mode 100644 index 0000000000..7ab0ca0ff8 --- /dev/null +++ b/tests_e2e/tests/cgroup_v2_disabled/cgroup_v2_disabled.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +from assertpy import fail + +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.shell import CommandError +from tests_e2e.tests.lib.ssh_client import SshClient + + +class Cgroupv2Disabled(AgentVmTest): + """ + The test verifies that the agent does not enable resource enforcement and monitoring on machines which are using + cgroup v2. It also checks that the agent correctly determined the controller mount points. This test will be + removed once cgroup v2 is supported. + """ + + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client: SshClient = self._context.create_ssh_client() + + def check_agent_log_contains(self, data, assertion): + try: + self._ssh_client.run_command("grep \"{0}\" /var/log/waagent.log".format(data)) + except CommandError: + fail("{0}".format(assertion)) + + def run(self): + # Cgroup configurator is initialized when agent is started, and before the goal state processing period is + # logged. Wait until the agent logs the goal state period before checking for cgroup initialization logs. + log.info("Wait for cgroup configurator to be initialized...") + for _ in range(15): + try: + self._ssh_client.run_command("grep 'Goal State Period:' /var/log/waagent.log") + break + except CommandError: + log.info("The Agent has not initialized cgroups yet, will check again after a short delay") + time.sleep(60) + else: + raise Exception("Timeout while waiting for the Agent to initialize cgroups") + + # Verify that the agent chose v2 for resource enforcement and monitoring + log.info("") + log.info("Checking that the agent chose cgroup v2 api for resource enforcement and monitoring...") + self.check_agent_log_contains('Using cgroup v2 for resource enforcement and monitoring', 'The agent should choose v2 for api resource enforcement and monitoring') + + # Verify that the agent does not support cgroup v2 + log.info("") + log.info("Checking that the agent does not use cgroup v2 for resource enforcement and monitoring...") + self.check_agent_log_contains('Agent and extensions resource monitoring is not currently supported on cgroup v2', + 'The agent should not attempt to use cgroup v2 for resource enforcement and monitoring') + self.check_agent_log_contains('Agent cgroups enabled: False', + 'The agent should not enable cgroups when system is using v2') + + +if __name__ == "__main__": + Cgroupv2Disabled.run_from_command_line() diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index ef49284e15..1fe21c329a 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -7,7 +7,7 @@ from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION -from azurelinuxagent.ga.cgroupapi import SystemdCgroupsApi +from azurelinuxagent.ga.cgroupapi import get_cgroup_api from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false @@ -168,5 +168,5 @@ def get_unit_cgroup_paths(unit_name): """ Returns the cgroup paths for the given unit """ - cgroups_api = SystemdCgroupsApi() + cgroups_api = get_cgroup_api() return cgroups_api.get_unit_cgroup_paths(unit_name) From eb42b8f8a73ac3fe63d7d0c1f823e9e54c427b45 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:43:32 -0700 Subject: [PATCH 196/240] Increase timeout for agent to start and provisioning to complete (#3105) --- tests_e2e/orchestrator/scripts/install-agent | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/scripts/install-agent b/tests_e2e/orchestrator/scripts/install-agent index 240be052ad..cf5a9e8106 100755 --- a/tests_e2e/orchestrator/scripts/install-agent +++ b/tests_e2e/orchestrator/scripts/install-agent @@ -87,7 +87,7 @@ echo "Service Status:" # We need to wait for the provisioning code to complete before stopping the agent's service to do the test setup started=false -for i in {1..6} +for i in {1..12} do if [[ -f /var/lib/waagent/provisioned ]]; then started=true From 5373e9de8cdddf64e2174494f41c849e82d77f3e Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:54:29 -0700 Subject: [PATCH 197/240] Keep whole goal state in log (#3104) --- tests_e2e/tests/lib/virtual_machine_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/lib/virtual_machine_client.py b/tests_e2e/tests/lib/virtual_machine_client.py index 5d6e471b9c..c4181be5a2 100644 --- a/tests_e2e/tests/lib/virtual_machine_client.py +++ b/tests_e2e/tests/lib/virtual_machine_client.py @@ -171,7 +171,7 @@ def restart( instance_view = self.get_instance_view() power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code] if len(power_state) != 1: - raise Exception(f"Could not find PowerState in the instance view statuses:\n{json.dumps(instance_view.statuses)}") + raise Exception(f"Could not find PowerState in the instance view statuses:\n{json.dumps(instance_view.serialize(), indent=2)}") log.info("VM's Power State: %s", power_state[0]) if power_state[0] == "PowerState/running": # We may get an instance view captured before the reboot actually happened; verify From add195ca7474a802d29f53cee0e6a737aa9cf03f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 9 Apr 2024 13:23:29 -0700 Subject: [PATCH 198/240] Log cgroup if process found in unexpected slice (#3107) --- azurelinuxagent/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index 50735b54e4..c0ebdbb42e 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -231,9 +231,9 @@ def collect_logs(self, is_full_mode): if not cpu_slice_matches or not memory_slice_matches: log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False) if not cpu_slice_matches: - log_cgroup_warning("\tunexpected cpu slice", send_event=False) + log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False) if not memory_slice_matches: - log_cgroup_warning("\tunexpected memory slice", send_event=False) + log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) From b986425c23373333a9e675ffc7399ef63508ea1f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:41:13 -0700 Subject: [PATCH 199/240] Allow retries for ifdown and add comments (#3106) --- .../recover_network_interface.py | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tests_e2e/tests/recover_network_interface/recover_network_interface.py b/tests_e2e/tests/recover_network_interface/recover_network_interface.py index 8ea8f8ea12..2d03077caf 100644 --- a/tests_e2e/tests/recover_network_interface/recover_network_interface.py +++ b/tests_e2e/tests/recover_network_interface/recover_network_interface.py @@ -91,14 +91,46 @@ def run(self): # The script should bring the primary network interface down and use the agent to recover the interface. These # commands will bring the network down, so they should be executed on the machine using CSE instead of ssh. script = f""" - set -euxo pipefail - ifdown {ifname}; - nic_state=$(nmcli -g general.state device show {ifname}) - echo Primary network interface state before recovering: $nic_state - source /home/{self._context.username}/bin/set-agent-env; - pypy3 -c 'from azurelinuxagent.common.osutil.redhat import RedhatOSUtil; RedhatOSUtil().check_and_recover_nic_state({formatted_ifname})'; - nic_state=$(nmcli -g general.state device show {ifname}); - echo Primary network interface state after recovering: $nic_state + set -uxo pipefail + + # The 'ifdown' network script is used to bring the network interface down. For some distros, this script + # executes nmcli commands which can timeout and return non-zero exit codes. Allow 3 retries in case 'ifdown' + # returns non-zero exit code. This is the same number of retries the agent allows in DefaultOSUtil.restart_if + retries=3; + ifdown_success=false + while [ $retries -gt 0 ] + do + echo Attempting to bring network interface down with ifdown... + ifdown {ifname}; + exit_code=$? + if [ $exit_code -eq 0 ]; then + echo ifdown succeeded + ifdown_success=true + break + fi + echo ifdown failed with exit code $exit_code, try again after 5 seconds... + sleep 5 + ((retries=retries-1)) + done + + # Verify the agent network interface recovery logic only if 'ifdown' succeeded + if ! $ifdown_success ; then + # Fail the script if 'ifdown' command didn't succeed + exit 1 + else + # Log the network interface state before attempting to recover the interface + nic_state=$(nmcli -g general.state device show {ifname}) + echo Primary network interface state before recovering: $nic_state + + # Use the agent OSUtil to bring the network interface back up + source /home/{self._context.username}/bin/set-agent-env; + echo Attempting to recover the network interface with the agent... + pypy3 -c 'from azurelinuxagent.common.osutil.redhat import RedhatOSUtil; RedhatOSUtil().check_and_recover_nic_state({formatted_ifname})'; + + # Log the network interface state after attempting to recover the interface + nic_state=$(nmcli -g general.state device show {ifname}); + echo Primary network interface state after recovering: $nic_state + fi """ log.info("") log.info("Using CSE to bring the primary network interface down and call the OSUtil to bring the interface back up. Command to execute: {0}".format(script)) From 569e997c035eeb24e3c50934e1e4e52a575029c3 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 26 Apr 2024 14:24:13 -0700 Subject: [PATCH 200/240] Collect telemetry for firewall settings changed (#3110) (#3112) Co-authored-by: narrieta (cherry picked from commit 468cf813af188854dd8f28d20fa4591bd8ed35ff) --- azurelinuxagent/common/event.py | 1 + azurelinuxagent/ga/env.py | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index dba95dd9de..830dd6fc9a 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -121,6 +121,7 @@ class WALAEventOperation: ReportEventUnicodeErrors = "ReportEventUnicodeErrors" ReportStatus = "ReportStatus" ReportStatusExtended = "ReportStatusExtended" + ResetFirewall = "ResetFirewall" Restart = "Restart" SequenceNumberMismatch = "SequenceNumberMismatch" SetCGroupsLimits = "SetCGroupsLimits" diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index fa02b64ae1..f03bdb2063 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -104,6 +104,7 @@ def __init__(self, osutil, protocol): self._osutil = osutil self._protocol = protocol self._try_remove_legacy_firewall_rule = False + self._is_first_setup = True def _operation(self): # If the rules ever change we must reset all rules and start over again. @@ -117,13 +118,19 @@ def _operation(self): self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint()) self._try_remove_legacy_firewall_rule = True - success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), - uid=os.getuid()) + firewall_state = self._get_firewall_state() + + success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) if is_firewall_rules_updated: - msg = "Successfully added Azure fabric firewall rules. Current Firewall rules:\n{0}".format(self._osutil.get_firewall_list()) - logger.info(msg) - add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, message=msg, log_event=False) + if self._is_first_setup: + msg = "Created Azure fabric firewall rules:\n{0}".format(self._get_firewall_state()) + logger.info(msg) + add_event(op=WALAEventOperation.Firewall, message=msg) + else: + msg = "Reset Azure fabric firewall rules.\nInitial state:\n{0}\nCurrent state:\n{1}".format(firewall_state, self._get_firewall_state()) + logger.info(msg) + add_event(op=WALAEventOperation.ResetFirewall, message=msg) add_periodic( logger.EVERY_HOUR, @@ -133,6 +140,14 @@ def _operation(self): is_success=success, log_event=False) + self._is_first_setup = False + + def _get_firewall_state(self): + try: + return self._osutil.get_firewall_list() + except Exception as e: + return "Failed to get the firewall state: {0}".format(ustr(e)) + class LogFirewallRules(PeriodicOperation): """ From 7adb8d42d768743efdf01f139127176b6fac12ee Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 6 May 2024 09:55:42 -0700 Subject: [PATCH 201/240] Update agent_publish test to check for new agent update pattern (#3114) (#3119) * Add new agent update pattern * Use record message * Need to update log record timestamp (cherry picked from commit 1d91c149d2ffd57f70f89daf930e5e379ec3c5af) --- .../agent_publish-get_agent_log_record_timestamp.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py b/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py index d055fc6c25..4fbe2365b8 100755 --- a/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py +++ b/tests_e2e/tests/scripts/agent_publish-get_agent_log_record_timestamp.py @@ -48,6 +48,11 @@ _UPDATE_PATTERN_03 = re.compile( r'(.*Agent) update found, exiting current process to (\S*) to the new Agent version (\S*)') +""" +Current Agent 2.8.9.9 completed all update checks, exiting current process to upgrade to the new Agent version 2.10.0.7 +('2.8.9.9', 'upgrade', '2.10.0.7') +""" +_UPDATE_PATTERN_04 = re.compile(r'Current Agent (\S*) completed all update checks, exiting current process to (\S*) to the new Agent version (\S*)') """ This script return timestamp of update message in the agent log @@ -60,8 +65,8 @@ def main(): for record in agentlog.read(): - for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03]: - update_match = re.match(p, record.text) + for p in [_UPDATE_PATTERN_00, _UPDATE_PATTERN_01, _UPDATE_PATTERN_02, _UPDATE_PATTERN_03, _UPDATE_PATTERN_04]: + update_match = re.match(p, record.message) if update_match: return record.timestamp From 6e5cde0144e573ac0cb1448b6dbf78cf608fd625 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 7 May 2024 14:31:08 -0700 Subject: [PATCH 202/240] remove secret and use cert for aad app in e2e pipeline (#3116) * remove secret and use cert * address comment --- tests_e2e/pipeline/pipeline-cleanup.yml | 6 ++--- tests_e2e/pipeline/pipeline.yml | 25 +++++++++++++++------ tests_e2e/pipeline/scripts/execute_tests.sh | 6 ++++- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/tests_e2e/pipeline/pipeline-cleanup.yml b/tests_e2e/pipeline/pipeline-cleanup.yml index d8d894612f..c673f0378d 100644 --- a/tests_e2e/pipeline/pipeline-cleanup.yml +++ b/tests_e2e/pipeline/pipeline-cleanup.yml @@ -18,9 +18,9 @@ parameters: - name: service_connections type: object default: - - azuremanagement - - azuremanagement.china - - azuremanagement.government + - waagenttests.public + - waagenttests.china + - waagenttests.gov pool: name: waagent-pool diff --git a/tests_e2e/pipeline/pipeline.yml b/tests_e2e/pipeline/pipeline.yml index 8a7971456d..bccee67124 100644 --- a/tests_e2e/pipeline/pipeline.yml +++ b/tests_e2e/pipeline/pipeline.yml @@ -1,7 +1,7 @@ # variables: # # NOTE: When creating the pipeline, "connection_info" must be added as a variable pointing to the - # corresponding key vault; see wiki for details. + # cloud specific service connection; see wiki for details. # parameters: @@ -78,16 +78,17 @@ jobs: architecture: 'x64' # Extract the Azure cloud from the "connection_info" variable. Its value includes one of - # 'public', 'china', or 'government' as a suffix (the suffix comes after the last '-'). + # 'public', 'china', or 'gov' as a suffix (the suffix comes after the '.'). - bash: | - case $(echo $CONNECTION_INFO | sed 's/^.*-//') in + case $(echo $CONNECTION_INFO | sed 's/.*\.//') in public) echo "##vso[task.setvariable variable=cloud]AzureCloud" ;; china) echo "##vso[task.setvariable variable=cloud]AzureChinaCloud" + ;; - government) + gov) echo "##vso[task.setvariable variable=cloud]AzureUSGovernment" ;; *) @@ -106,17 +107,27 @@ jobs: - task: AzureKeyVault@2 displayName: "Fetch connection info" inputs: - azureSubscription: 'azuremanagement' - KeyVaultName: '$(connection_info)' + azureSubscription: $(connection_info) + KeyVaultName: 'waagenttests' SecretsFilter: '*' + - task: AzureCLI@2 + displayName: "Download connection certificate" + inputs: + azureSubscription: $(connection_info) + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + # This temporary directory removed after the pipeline execution + mkdir -p $(Agent.TempDirectory)/app + az keyvault secret download --file $(Agent.TempDirectory)/app/cert.pem --vault-name waagenttests --name AZURE-CLIENT-CERTIFICATE + - bash: $(Build.SourcesDirectory)/tests_e2e/pipeline/scripts/execute_tests.sh displayName: "Execute tests" continueOnError: true env: SUBSCRIPTION_ID: $(SUBSCRIPTION-ID) AZURE_CLIENT_ID: $(AZURE-CLIENT-ID) - AZURE_CLIENT_SECRET: $(AZURE-CLIENT-SECRET) AZURE_TENANT_ID: $(AZURE-TENANT-ID) CR_USER: $(CR-USER) CR_SECRET: $(CR-SECRET) diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index d2d2f874c4..37716ec493 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -72,13 +72,17 @@ fi # IP_ADDRESS=$(curl -4 ifconfig.io/ip) +# certificate location in the container +AZURE_CLIENT_CERTIFICATE_PATH="/home/waagent/app/cert.pem" + docker run --rm \ --volume "$BUILD_SOURCESDIRECTORY:/home/waagent/WALinuxAgent" \ --volume "$AGENT_TEMPDIRECTORY"/ssh:/home/waagent/.ssh \ + --volume "$AGENT_TEMPDIRECTORY"/app:/home/waagent/app \ --volume "$LOGS_DIRECTORY":/home/waagent/logs \ --env AZURE_CLIENT_ID \ - --env AZURE_CLIENT_SECRET \ --env AZURE_TENANT_ID \ + --env AZURE_CLIENT_CERTIFICATE_PATH=$AZURE_CLIENT_CERTIFICATE_PATH \ waagenttests.azurecr.io/waagenttests \ bash --login -c \ "lisa \ From eb503d4879f0b0a09fee1ac9866285464513ed71 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 7 May 2024 14:43:06 -0700 Subject: [PATCH 203/240] wait for rg creation in e2e tests (#3117) * wait for rg creation * update param * check for rg existance * input rg name --- tests_e2e/orchestrator/lib/agent_test_suite.py | 5 +++++ tests_e2e/tests/lib/resource_group_client.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite.py b/tests_e2e/orchestrator/lib/agent_test_suite.py index 799ea441bf..b29c16b3c6 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite.py @@ -46,6 +46,7 @@ import makepkg from azurelinuxagent.common.version import AGENT_VERSION +from tests_e2e.tests.lib.retry import retry_if_false from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient @@ -913,6 +914,10 @@ def _create_test_scale_set(self) -> None: self._lisa_log.info("Creating resource group %s", self._resource_group_name) resource_group = ResourceGroupClient(cloud=self._cloud, location=self._location, subscription=self._subscription_id, name=self._resource_group_name) resource_group.create() + exist = retry_if_false(resource_group.is_exists) + if not exist: + self._lisa_log.error("Failed to create resource group %s", self._resource_group_name) + raise Exception("Failed to create resource group: {0}".format(self._resource_group_name)) self._delete_scale_set = True self._lisa_log.info("Creating scale set %s", self._vmss_name) diff --git a/tests_e2e/tests/lib/resource_group_client.py b/tests_e2e/tests/lib/resource_group_client.py index 9ca07a2602..30f82ccec2 100644 --- a/tests_e2e/tests/lib/resource_group_client.py +++ b/tests_e2e/tests/lib/resource_group_client.py @@ -70,5 +70,11 @@ def delete(self) -> None: log.info("Deleting resource group %s (no wait)", self) self._resource_client.resource_groups.begin_delete(self.name) # Do not wait for the deletion to complete + def is_exists(self) -> bool: + """ + Checks if the resource group exists + """ + return self._resource_client.resource_groups.check_existence(self.name) + def __str__(self): return f"{self.name}" From 25475d46386e5e95cfb28363d97b948690de6607 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 10 May 2024 16:03:38 -0700 Subject: [PATCH 204/240] Reduce the frequency of firewall telemetry (#3124) (#3127) * Reduce the frequency of firewall telemetry * python 2: timespan.total_seconds() does not exist * fix unit test --------- Co-authored-by: narrieta (cherry picked from commit 5302651b6608d315f46085c19fbc3693e89724dc) --- azurelinuxagent/common/osutil/default.py | 18 +++++------ azurelinuxagent/common/utils/networkutil.py | 18 ++++++++--- azurelinuxagent/ga/env.py | 34 +++++++++++++++------ tests/common/osutil/test_default.py | 8 ++--- 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 3b9c504e76..0a0fd0e1cd 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -277,16 +277,16 @@ def remove_legacy_firewall_rule(self, dst_ip): def enable_firewall(self, dst_ip, uid): """ - It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, update rules flag) + It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, missing rules array) enable firewall success status: Returns True if every firewall rule exists otherwise False - update rules flag: Returns True if rules are updated otherwise False + missing rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP") """ - # This is to send telemetry when iptable rules updated - is_firewall_rules_updated = False # If a previous attempt failed, do not retry global _enable_firewall # pylint: disable=W0603 if not _enable_firewall: - return False, is_firewall_rules_updated + return False, [] + + missing_rules = [] try: wait = self.get_firewall_will_wait() @@ -294,10 +294,10 @@ def enable_firewall(self, dst_ip, uid): # check every iptable rule and delete others if any rule is missing # and append every iptable rule to the end of the chain. try: - if not AddFirewallRules.verify_iptables_rules_exist(wait, dst_ip, uid): + missing_rules.extend(AddFirewallRules.get_missing_iptables_rules(wait, dst_ip, uid)) + if len(missing_rules) > 0: self.remove_firewall(dst_ip, uid, wait) AddFirewallRules.add_iptables_rules(wait, dst_ip, uid) - is_firewall_rules_updated = True except CommandError as e: if e.returncode == 2: self.remove_firewall(dst_ip, uid, wait) @@ -308,14 +308,14 @@ def enable_firewall(self, dst_ip, uid): logger.warn(ustr(error)) raise - return True, is_firewall_rules_updated + return True, missing_rules except Exception as e: _enable_firewall = False logger.info("Unable to establish firewall -- " "no further attempts will be made: " "{0}".format(ustr(e))) - return False, is_firewall_rules_updated + return False, missing_rules def get_firewall_list(self, wait=None): try: diff --git a/azurelinuxagent/common/utils/networkutil.py b/azurelinuxagent/common/utils/networkutil.py index 160f17514b..7f69801441 100644 --- a/azurelinuxagent/common/utils/networkutil.py +++ b/azurelinuxagent/common/utils/networkutil.py @@ -217,14 +217,22 @@ def __execute_check_command(cmd): return False @staticmethod - def verify_iptables_rules_exist(wait, dst_ip, uid): + def get_missing_iptables_rules(wait, dst_ip, uid): + missing = [] + check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) - check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, - wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_tcp_rule): + missing.append("ACCEPT DNS") + + check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_accept_rule): + missing.append("ACCEPT") + check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_drop_rule): + missing.append("DROP") - return AddFirewallRules.__execute_check_command(check_cmd_tcp_rule) and AddFirewallRules.__execute_check_command(check_cmd_accept_rule) \ - and AddFirewallRules.__execute_check_command(check_cmd_drop_rule) + return missing @staticmethod def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""): diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index f03bdb2063..3d5887e6f0 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -16,7 +16,7 @@ # # Requires Python 2.6+ and Openssl 1.0+ # - +import datetime import re import os import socket @@ -105,6 +105,9 @@ def __init__(self, osutil, protocol): self._protocol = protocol self._try_remove_legacy_firewall_rule = False self._is_first_setup = True + self._reset_count = 0 + self._report_after = datetime.datetime.min + self._report_period = None # None indicates "report immediately" def _operation(self): # If the rules ever change we must reset all rules and start over again. @@ -118,19 +121,32 @@ def _operation(self): self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint()) self._try_remove_legacy_firewall_rule = True - firewall_state = self._get_firewall_state() - - success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) + success, missing_firewall_rules = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) - if is_firewall_rules_updated: + if len(missing_firewall_rules) > 0: if self._is_first_setup: - msg = "Created Azure fabric firewall rules:\n{0}".format(self._get_firewall_state()) + msg = "Created firewall rules for the Azure Fabric:\n{0}".format(self._get_firewall_state()) logger.info(msg) add_event(op=WALAEventOperation.Firewall, message=msg) else: - msg = "Reset Azure fabric firewall rules.\nInitial state:\n{0}\nCurrent state:\n{1}".format(firewall_state, self._get_firewall_state()) - logger.info(msg) - add_event(op=WALAEventOperation.ResetFirewall, message=msg) + self._reset_count += 1 + # We report immediately (when period is None) the first 5 instances, then we switch the period to every few hours + if self._report_period is None: + msg = "Some firewall rules were missing: {0}. Re-created all the rules:\n{1}".format(missing_firewall_rules, self._get_firewall_state()) + if self._reset_count >= 5: + self._report_period = datetime.timedelta(hours=3) + self._reset_count = 0 + self._report_after = datetime.datetime.now() + self._report_period + elif datetime.datetime.now() >= self._report_after: + msg = "Some firewall rules were missing: {0}. This has happened {1} time(s) since the last report. Re-created all the rules:\n{2}".format( + missing_firewall_rules, self._reset_count, self._get_firewall_state()) + self._reset_count = 0 + self._report_after = datetime.datetime.now() + self._report_period + else: + msg = "" + if msg != "": + logger.info(msg) + add_event(op=WALAEventOperation.ResetFirewall, message=msg) add_periodic( logger.EVERY_HOUR, diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 0814d112f1..7cb5501c1e 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -822,13 +822,13 @@ def test_enable_firewall_should_not_use_wait_when_iptables_does_not_support_it(s success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) self.assertTrue(success, "Enabling the firewall was not successful") - # Exactly 8 calls have to be made. - # First check rule, delete 4 rules, + # Exactly 10 calls have to be made. + # First check 3 rules, delete 4 rules, # and Append the IPTable 3 rules. - self.assertEqual(len(mock_iptables.command_calls), 8, + self.assertEqual(len(mock_iptables.command_calls), 10, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) for command in mock_iptables.command_calls: - self.assertNotIn("-w", command, "The -w option should have been used in {0}".format(command)) + self.assertNotIn("-w", command, "The -w option sh ould have been used in {0}".format(command)) self.assertTrue(osutil._enable_firewall, "The firewall should not have been disabled") From f47718e427f77fdcf1f524e95c8ae609721f4476 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:20:39 -0700 Subject: [PATCH 205/240] suppress pylint warn contextmanager-generator-missing-cleanup (#3138) * suppress pylint warn * addressed comments --- .github/workflows/ci_pr.yml | 9 ++++----- tests/ga/test_multi_config_extension.py | 6 ++++-- tests/ga/test_update.py | 7 ++++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index f470b4428c..fd8d91a380 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -139,16 +139,15 @@ jobs: # * On 3.9 pylint crashes when parsing azurelinuxagent/daemon/main.py (see https://github.com/pylint-dev/pylint/issues/9473), so we ignore it. # * 'no-self-use' ("R0201: Method could be a function") was moved to an optional extension on 3.8 and is no longer used by default. It needs # to be suppressed for previous versions (3.0-3.7), though. - # + # * 'contextmanager-generator-missing-cleanup' are false positives if yield is used inside an if-else block for contextmanager generator functions. + # (https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/contextmanager-generator-missing-cleanup.html). + # This is not implemented on versions (3.0-3.7) Bad option value 'contextmanager-generator-missing-cleanup' (bad-option-value) PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0" - if [[ "${{ matrix.python-version }}" == "3.5" ]]; then - PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=bad-option-value" - fi if [[ "${{ matrix.python-version }}" == "3.9" ]]; then PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member --ignore=main.py" fi if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then - PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use" + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use,bad-option-value" fi echo "PYLINT_OPTIONS: $PYLINT_OPTIONS" diff --git a/tests/ga/test_multi_config_extension.py b/tests/ga/test_multi_config_extension.py index 127535a54a..450ca071dc 100644 --- a/tests/ga/test_multi_config_extension.py +++ b/tests/ga/test_multi_config_extension.py @@ -284,7 +284,8 @@ def __setup_generic_test_env(self): third_ext = extension_emulator(name="OSTCExtensions.ExampleHandlerLinux.thirdExtension") fourth_ext = extension_emulator(name="Microsoft.Powershell.ExampleExtension") - with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): + # In _setup_test_env() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning + with self._setup_test_env(mock_manifest=True) as (exthandlers_handler, protocol, no_of_extensions): # pylint: disable=contextmanager-generator-missing-cleanup with enable_invocations(first_ext, second_ext, third_ext, fourth_ext) as invocation_record: exthandlers_handler.run() exthandlers_handler.report_ext_handlers_status() @@ -1070,7 +1071,8 @@ def __setup_test_and_get_exts(self): dependent_sc_ext = extension_emulator(name="Microsoft.Powershell.ExampleExtension") independent_sc_ext = extension_emulator(name="Microsoft.Azure.Geneva.GenevaMonitoring", version="1.1.0") - with self._setup_test_env() as (exthandlers_handler, protocol, no_of_extensions): + # In _setup_test_env() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning + with self._setup_test_env() as (exthandlers_handler, protocol, no_of_extensions): # pylint: disable=contextmanager-generator-missing-cleanup yield exthandlers_handler, protocol, no_of_extensions, first_ext, second_ext, third_ext, dependent_sc_ext, independent_sc_ext def test_it_should_process_dependency_chain_extensions_properly(self): diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 58d58505bd..f06e64a902 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -1200,7 +1200,8 @@ def test_it_should_not_set_dns_tcp_iptable_if_drop_and_accept_available(self): @contextlib.contextmanager def _setup_test_for_ext_event_dirs_retention(self): try: - with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT, autoupdate_enabled=False) as (update_handler, protocol): + # In _get_update_handler() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning + with _get_update_handler(test_data=DATA_FILE_MULTIPLE_EXT, autoupdate_enabled=False) as (update_handler, protocol): # pylint: disable=contextmanager-generator-missing-cleanup with patch("azurelinuxagent.common.agent_supported_feature._ETPFeature.is_supported", True): update_handler.run(debug=True) expected_events_dirs = glob.glob(os.path.join(conf.get_ext_log_dir(), "*", EVENTS_DIRECTORY)) @@ -1483,8 +1484,8 @@ def __get_update_handler(self, iterations=1, test_data=None, reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0): test_data = DATA_FILE if test_data is None else test_data - - with _get_update_handler(iterations, test_data) as (update_handler, protocol): + # In _get_update_handler() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning + with _get_update_handler(iterations, test_data) as (update_handler, protocol): # pylint: disable=contextmanager-generator-missing-cleanup protocol.aggregate_status = None From 81140ee669b39458a31cf12d1c802351bf12bfda Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:14:05 -0700 Subject: [PATCH 206/240] Switching to SNI based authentication for aad app (#3137) * SNI auth * new env var * pylint --- tests_e2e/orchestrator/docker/Dockerfile | 2 +- tests_e2e/pipeline/scripts/execute_tests.sh | 3 +++ tests_e2e/tests/lib/network_security_rule.py | 8 ++++---- tests_e2e/tests/lib/update_arm_template.py | 10 +++++----- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index 597e57418b..219c9b8694 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -67,7 +67,7 @@ RUN \ cd $HOME && \ git clone https://github.com/microsoft/lisa.git && \ cd lisa && \ - git checkout 2c16e32001fdefb9572dff61241451b648259dbf && \ + git checkout 95c09ff7d5b6e71d1642a628607ac9bb441c69f5 && \ \ python3 -m pip install --upgrade pip && \ python3 -m pip install --editable .[azure,libvirt] --config-settings editable_mode=compat && \ diff --git a/tests_e2e/pipeline/scripts/execute_tests.sh b/tests_e2e/pipeline/scripts/execute_tests.sh index 37716ec493..6c751d6a78 100755 --- a/tests_e2e/pipeline/scripts/execute_tests.sh +++ b/tests_e2e/pipeline/scripts/execute_tests.sh @@ -74,6 +74,8 @@ IP_ADDRESS=$(curl -4 ifconfig.io/ip) # certificate location in the container AZURE_CLIENT_CERTIFICATE_PATH="/home/waagent/app/cert.pem" +# Need to set this to True if we sue SNI based authentication for certificate +AZURE_CLIENT_SEND_CERTIFICATE_CHAIN="True" docker run --rm \ --volume "$BUILD_SOURCESDIRECTORY:/home/waagent/WALinuxAgent" \ @@ -83,6 +85,7 @@ docker run --rm \ --env AZURE_CLIENT_ID \ --env AZURE_TENANT_ID \ --env AZURE_CLIENT_CERTIFICATE_PATH=$AZURE_CLIENT_CERTIFICATE_PATH \ + --env AZURE_CLIENT_SEND_CERTIFICATE_CHAIN=$AZURE_CLIENT_SEND_CERTIFICATE_CHAIN \ waagenttests.azurecr.io/waagenttests \ bash --login -c \ "lisa \ diff --git a/tests_e2e/tests/lib/network_security_rule.py b/tests_e2e/tests/lib/network_security_rule.py index 8df51b2048..d2f67d19cb 100644 --- a/tests_e2e/tests/lib/network_security_rule.py +++ b/tests_e2e/tests/lib/network_security_rule.py @@ -17,7 +17,7 @@ import json -from typing import Any, Dict, List +from typing import Any, Dict from tests_e2e.tests.lib.update_arm_template import UpdateArmTemplate @@ -55,7 +55,7 @@ def add_security_rule(self, security_rule: Dict[str, Any]) -> None: self._get_network_security_group()["properties"]["securityRules"].append(security_rule) def _get_network_security_group(self) -> Dict[str, Any]: - resources: List[Dict[str, Any]] = self._template["resources"] + resources: Dict[str, Dict[str, Any]] = self._template["resources"] # # If the NSG already exists, just return it # @@ -76,14 +76,14 @@ def _get_network_security_group(self) -> Dict[str, Any]: "securityRules": [] }} }}""") - resources.append(network_security_group) + nsg_reference = "network_security_groups" + resources[nsg_reference] = network_security_group # # Add a dependency on the NSG to the virtual network # network_resource = UpdateArmTemplate.get_resource(resources, "Microsoft.Network/virtualNetworks") network_resource_dependencies = network_resource.get("dependsOn") - nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]" if network_resource_dependencies is None: network_resource["dependsOn"] = [nsg_reference] else: diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index 010178ab9c..2fc1b09805 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -16,7 +16,7 @@ # from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict class UpdateArmTemplate(ABC): @@ -32,25 +32,25 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: """ @staticmethod - def get_resource(resources: List[Dict[str, Any]], type_name: str) -> Any: + def get_resource(resources: Dict[str, Dict[str, Any]], type_name: str) -> Any: """ Returns the first resource of the specified type in the given 'resources' list. Raises KeyError if no resource of the specified type is found. """ - for item in resources: + for item in resources.values(): if item["type"] == type_name: return item raise KeyError(f"Cannot find a resource of type {type_name} in the ARM template") @staticmethod - def get_resource_by_name(resources: List[Dict[str, Any]], resource_name: str, type_name: str) -> Any: + def get_resource_by_name(resources: Dict[str, Dict[str, Any]], resource_name: str, type_name: str) -> Any: """ Returns the first resource of the specified type and name in the given 'resources' list. Raises KeyError if no resource of the specified type and name is found. """ - for item in resources: + for item in resources.values(): if item["type"] == type_name and item["name"] == resource_name: return item raise KeyError(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") From 49d3ce3e85ae0773d4689f4c20088875b259e138 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:20:55 -0700 Subject: [PATCH 207/240] new namespace (#3139) --- .../tests/agent_wait_for_cloud_init/add_cloud_init_script.py | 2 +- tests_e2e/tests/lib/update_arm_template.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py index 1fbc60adc4..14f2cdecac 100755 --- a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py +++ b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py @@ -55,7 +55,7 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # encoded_script = base64.b64encode(AgentWaitForCloudInit.CloudInitScript.encode('utf-8')).decode('utf-8') - get_os_profile = self.get_lisa_function(template, 'getOSProfile') + get_os_profile = self.get_lisa_function(template, 'getOsProfile') output = self.get_function_output(get_os_profile) if output.get('customData') is not None: raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_os_profile}") diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index 2fc1b09805..1cd4e1d72f 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -58,7 +58,8 @@ def get_resource_by_name(resources: Dict[str, Dict[str, Any]], resource_name: st @staticmethod def get_lisa_function(template: Dict[str, Any], function_name: str) -> Dict[str, Any]: """ - Looks for the given function name in the LISA namespace and returns its definition. Raises KeyError if the function is not found. + Looks for the given function name in the bicep namespace and returns its definition. Raises KeyError if the function is not found. + Note: LISA leverages the bicep language to define the ARM templates.Now namespace is changed to __bicep instead lisa """ # # NOTE: LISA's functions are in the "lisa" namespace, for example: @@ -96,7 +97,7 @@ def get_lisa_function(template: Dict[str, Any], function_name: str) -> Dict[str, name = namespace.get("namespace") if name is None: raise Exception(f'Cannot find "namespace" in the LISA template: {namespace}') - if name == "lisa": + if name == "__bicep": lisa_functions = namespace.get('members') if lisa_functions is None: raise Exception(f'Cannot find the members of the lisa namespace in the LISA template: {namespace}') From d655c291601683b8676cdb53f8b6a9593479f423 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:44:00 -0700 Subject: [PATCH 208/240] support dict/list resources type for lisa template (#3140) * support dict/list for resources schema * addressed comment --- tests_e2e/tests/lib/network_security_rule.py | 12 +++++++++--- tests_e2e/tests/lib/update_arm_template.py | 16 ++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tests_e2e/tests/lib/network_security_rule.py b/tests_e2e/tests/lib/network_security_rule.py index d2f67d19cb..6fe8ee296e 100644 --- a/tests_e2e/tests/lib/network_security_rule.py +++ b/tests_e2e/tests/lib/network_security_rule.py @@ -55,7 +55,7 @@ def add_security_rule(self, security_rule: Dict[str, Any]) -> None: self._get_network_security_group()["properties"]["securityRules"].append(security_rule) def _get_network_security_group(self) -> Dict[str, Any]: - resources: Dict[str, Dict[str, Any]] = self._template["resources"] + resources: Any = self._template["resources"] # # If the NSG already exists, just return it # @@ -76,8 +76,14 @@ def _get_network_security_group(self) -> Dict[str, Any]: "securityRules": [] }} }}""") - nsg_reference = "network_security_groups" - resources[nsg_reference] = network_security_group + + # resources is a dictionary in LISA's ARM template, but a list in the template for scale sets + if isinstance(resources, dict): + nsg_reference = "network_security_groups" + resources[nsg_reference] = network_security_group + else: + nsg_reference = f"[resourceId('Microsoft.Network/networkSecurityGroups', '{self._NETWORK_SECURITY_GROUP}')]" + resources.append(network_security_group) # # Add a dependency on the NSG to the virtual network diff --git a/tests_e2e/tests/lib/update_arm_template.py b/tests_e2e/tests/lib/update_arm_template.py index 1cd4e1d72f..ef3dfd1d92 100644 --- a/tests_e2e/tests/lib/update_arm_template.py +++ b/tests_e2e/tests/lib/update_arm_template.py @@ -32,25 +32,29 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: """ @staticmethod - def get_resource(resources: Dict[str, Dict[str, Any]], type_name: str) -> Any: + def get_resource(resources: Any, type_name: str) -> Any: """ - Returns the first resource of the specified type in the given 'resources' list. + Returns the first resource of the specified type in the given 'resources' list/dict. Raises KeyError if no resource of the specified type is found. """ - for item in resources.values(): + if isinstance(resources, dict): + resources = resources.values() + for item in resources: if item["type"] == type_name: return item raise KeyError(f"Cannot find a resource of type {type_name} in the ARM template") @staticmethod - def get_resource_by_name(resources: Dict[str, Dict[str, Any]], resource_name: str, type_name: str) -> Any: + def get_resource_by_name(resources: Any, resource_name: str, type_name: str) -> Any: """ - Returns the first resource of the specified type and name in the given 'resources' list. + Returns the first resource of the specified type and name in the given 'resources' list/dict. Raises KeyError if no resource of the specified type and name is found. """ - for item in resources.values(): + if isinstance(resources, dict): + resources = resources.values() + for item in resources: if item["type"] == type_name and item["name"] == resource_name: return item raise KeyError(f"Cannot find a resource {resource_name} of type {type_name} in the ARM template") From cc6501d6ddc3fe3349b125a7cd4d05f6ffbf91a1 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Fri, 14 Jun 2024 18:10:46 -0700 Subject: [PATCH 209/240] eFix multi config (#16) (#3145) * Use runcommand api for runcommand multiconfig operations * remove rc * Fix comments * Remove comment * Fix rc * pylint * Add line --- .../lib/virtual_machine_runcommand_client.py | 130 ++++++++++++++++++ .../multi_config_ext/multi_config_ext.py | 21 +-- 2 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 tests_e2e/tests/lib/virtual_machine_runcommand_client.py diff --git a/tests_e2e/tests/lib/virtual_machine_runcommand_client.py b/tests_e2e/tests/lib/virtual_machine_runcommand_client.py new file mode 100644 index 0000000000..7858c6fc90 --- /dev/null +++ b/tests_e2e/tests/lib/virtual_machine_runcommand_client.py @@ -0,0 +1,130 @@ +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This module includes facilities to execute VM extension runcommand operations (enable, remove, etc). +# +import json +from typing import Any, Dict, Callable +from assertpy import soft_assertions, assert_that + +from azure.mgmt.compute import ComputeManagementClient +from azure.mgmt.compute.models import VirtualMachineRunCommand, VirtualMachineRunCommandScriptSource, VirtualMachineRunCommandInstanceView + +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import execute_with_retry +from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient +from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIdentifier + + +class VirtualMachineRunCommandClient(AzureSdkClient): + """ + Client for operations virtual machine RunCommand extensions. + """ + def __init__(self, vm: VirtualMachineClient, extension: VmExtensionIdentifier, resource_name: str = None): + super().__init__() + self._vm: VirtualMachineClient = vm + self._identifier = extension + self._resource_name = resource_name or extension.type + self._compute_client: ComputeManagementClient = AzureSdkClient.create_client(ComputeManagementClient, self._vm.cloud, self._vm.subscription) + + def get_instance_view(self) -> VirtualMachineRunCommandInstanceView: + """ + Retrieves the instance view of the run command extension + """ + log.info("Retrieving instance view for %s...", self._identifier) + + return execute_with_retry(lambda: self._compute_client.virtual_machine_run_commands.get_by_virtual_machine( + resource_group_name=self._vm.resource_group, + vm_name=self._vm.name, + run_command_name=self._resource_name, + expand="instanceView" + ).instance_view) + + def enable( + self, + settings: Dict[str, Any] = None, + timeout: int = AzureSdkClient._DEFAULT_TIMEOUT + ) -> None: + """ + Performs an enable operation on the run command extension. + """ + run_command_parameters = VirtualMachineRunCommand( + location=self._vm.location, + source=VirtualMachineRunCommandScriptSource( + script=settings.get("source") if settings is not None else settings + ) + ) + + log.info("Enabling %s", self._identifier) + log.info("%s", run_command_parameters) + + result: VirtualMachineRunCommand = self._execute_async_operation( + lambda: self._compute_client.virtual_machine_run_commands.begin_create_or_update( + self._vm.resource_group, + self._vm.name, + self._resource_name, + run_command_parameters), + operation_name=f"Enable {self._identifier}", + timeout=timeout) + + log.info("Provisioning state: %s", result.provisioning_state) + + def delete(self, timeout: int = AzureSdkClient._DEFAULT_TIMEOUT) -> None: + """ + Performs a delete operation on the run command extension + """ + self._execute_async_operation( + lambda: self._compute_client.virtual_machine_run_commands.begin_delete( + self._vm.resource_group, + self._vm.name, + self._resource_name), + operation_name=f"Delete {self._identifier}", + timeout=timeout) + + def assert_instance_view( + self, + expected_status_code: str = "Succeeded", + expected_exit_code: int = 0, + expected_message: str = None, + assert_function: Callable[[VirtualMachineRunCommandInstanceView], None] = None + ) -> None: + """ + Asserts that the run command's instance view matches the given expected values. If 'expected_message' is + omitted, it is not validated. + + If 'assert_function' is provided, it is invoked passing as parameter the instance view. This function can be used to perform + additional validations. + """ + instance_view = self.get_instance_view() + log.info("Instance view:\n%s", json.dumps(instance_view.serialize(), indent=4)) + + with soft_assertions(): + if expected_message is not None: + assert_that(expected_message in instance_view.output).described_as(f"{expected_message} should be in the InstanceView message ({instance_view.output})").is_true() + + assert_that(instance_view.execution_state).described_as("InstanceView execution state").is_equal_to(expected_status_code) + assert_that(instance_view.exit_code).described_as("InstanceView exit code").is_equal_to(expected_exit_code) + + if assert_function is not None: + assert_function(instance_view) + + log.info("The instance view matches the expected values") + + def __str__(self): + return f"{self._identifier}" diff --git a/tests_e2e/tests/multi_config_ext/multi_config_ext.py b/tests_e2e/tests/multi_config_ext/multi_config_ext.py index 4df75fd2be..d9315dea50 100644 --- a/tests_e2e/tests/multi_config_ext/multi_config_ext.py +++ b/tests_e2e/tests/multi_config_ext/multi_config_ext.py @@ -28,6 +28,8 @@ from azure.mgmt.compute.models import VirtualMachineInstanceView from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.azure_sdk_client import AzureSdkClient +from tests_e2e.tests.lib.virtual_machine_runcommand_client import VirtualMachineRunCommandClient from tests_e2e.tests.lib.vm_extension_identifier import VmExtensionIds from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient @@ -36,7 +38,7 @@ class MultiConfigExt(AgentVmTest): class TestCase: - def __init__(self, extension: VirtualMachineExtensionClient, get_settings: Callable[[str], Dict[str, str]]): + def __init__(self, extension: AzureSdkClient, get_settings: Callable[[str], Dict[str, str]]): self.extension = extension self.get_settings = get_settings self.test_guid: str = str(uuid.uuid4()) @@ -89,19 +91,18 @@ def run(self): # Create 3 different RCv2 extensions and a single config extension (CSE) and assign each a unique guid. Each # extension will have settings that echo its assigned guid. We will use this guid to verify the extension # statuses later. - mc_settings: Callable[[Any], Dict[str, Dict[str, str]]] = lambda s: { - "source": {"script": f"echo {s}"}} + mc_settings: Callable[[Any], Dict[str, str]] = lambda s: {"source": f"echo {s}"} sc_settings: Callable[[Any], Dict[str, str]] = lambda s: {'commandToExecute': f"echo {s}"} test_cases: Dict[str, MultiConfigExt.TestCase] = { "MCExt1": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt1"), mc_settings), "MCExt2": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt2"), mc_settings), "MCExt3": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt3"), mc_settings), "CSE": MultiConfigExt.TestCase( VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript), sc_settings) @@ -116,10 +117,10 @@ def run(self): # Update MCExt3 and CSE with new guids and add a new instance of RCv2 to the VM updated_test_cases: Dict[str, MultiConfigExt.TestCase] = { "MCExt3": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt3"), mc_settings), "MCExt4": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt4"), mc_settings), "CSE": MultiConfigExt.TestCase( VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript), sc_settings) @@ -138,10 +139,10 @@ def run(self): log.info("Add only multi-config extensions to the VM...") mc_test_cases: Dict[str, MultiConfigExt.TestCase] = { "MCExt5": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt5"), mc_settings), "MCExt6": MultiConfigExt.TestCase( - VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.RunCommandHandler, + VirtualMachineRunCommandClient(self._context.vm, VmExtensionIds.RunCommandHandler, resource_name="MCExt6"), mc_settings) } self.enable_and_assert_test_cases(cases_to_enable=mc_test_cases, cases_to_assert=mc_test_cases, From 610e12b3f160d587b06d078825c20e0a52744bff Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Sun, 16 Jun 2024 11:41:57 -0700 Subject: [PATCH 210/240] refactor cgroup controllers (#3135) * refactor cgroup controllers (#13) * Refactor Cgroup, CpuCgroup, MemoryCgroup to ControllerMetrics, CpuMetrics, MemoryMetrics * Create methods to get unit/process cgroup representation * Refactoring changes * Refactoring changes * Fix e2e test * Fix unintentional comment change * Remove unneeded comments * Clean up comments and make code more readable * Simplify get controller metrics * Clean up cgroupapi * Cleanup cgroup -> controllermetrics changes * Clean up cgroup configurator * Fix unit tests for agent.py * Fix cgroupapi tests * Fix cgroupconfigurator and tests * Rename controller metrics tests * Ignore pylint issues * Improve test coverage for cgroupapi * Rename cgroup to metrics * Update cgroup.procs to accurately represent file * Do not track metrics if controller is not mounted * We should set cpu quota before tracking cpu metrics * Pylint * address pr comments (#14) * Address Nag's comments * pyling * pylint * remove lambda (#15) --- azurelinuxagent/agent.py | 44 +- azurelinuxagent/ga/cgroupapi.py | 456 ++++++++++---- azurelinuxagent/ga/cgroupconfigurator.py | 163 ++--- azurelinuxagent/ga/cgroupstelemetry.py | 4 +- azurelinuxagent/ga/collect_logs.py | 2 +- .../ga/{cgroup.py => controllermetrics.py} | 22 +- azurelinuxagent/ga/extensionprocessutil.py | 18 +- azurelinuxagent/ga/monitor.py | 2 +- .../utils/test_extension_process_util.py | 42 +- tests/data/cgroups/cgroup.procs | 3 + tests/ga/test_cgroupapi.py | 577 ++++++++++++------ tests/ga/test_cgroupconfigurator.py | 16 +- tests/ga/test_cgroupstelemetry.py | 76 +-- tests/ga/test_collect_logs.py | 6 +- ...t_cgroups.py => test_controllermetrics.py} | 122 ++-- tests/ga/test_monitor.py | 10 +- tests/lib/mock_cgroup_environment.py | 7 +- tests/test_agent.py | 70 ++- tests_e2e/tests/lib/cgroup_helpers.py | 13 +- ...ups_process_check-unknown_process_check.py | 11 +- 20 files changed, 992 insertions(+), 672 deletions(-) rename azurelinuxagent/ga/{cgroup.py => controllermetrics.py} (96%) create mode 100644 tests/data/cgroups/cgroup.procs rename tests/ga/{test_cgroups.py => test_controllermetrics.py} (59%) diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index c0ebdbb42e..babd67e1ef 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -31,7 +31,7 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator -from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup +from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException import azurelinuxagent.common.conf as conf @@ -208,8 +208,7 @@ def collect_logs(self, is_full_mode): # Check the cgroups unit log_collector_monitor = None - cpu_cgroup_path = None - memory_cgroup_path = None + tracked_metrics = [] if CollectLogsHandler.is_enabled_monitor_cgroups_check(): try: cgroup_api = get_cgroup_api() @@ -220,40 +219,27 @@ def collect_logs(self, is_full_mode): log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) - cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self") - cpu_slice_matches = False - memory_slice_matches = False - if cpu_cgroup_path is not None: - cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) - if memory_cgroup_path is not None: - memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) - - if not cpu_slice_matches or not memory_slice_matches: - log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False) - if not cpu_slice_matches: - log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False) - if not memory_slice_matches: - log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False) + log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR) + tracked_metrics = log_collector_cgroup.get_controller_metrics() + if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()): + log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers())) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) - def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path): - cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) - msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) - logger.info(msg) - cpu_cgroup.initialize_cpu_usage() - memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path) - msg = "Started tracking memory cgroup {0}".format(memory_cgroup) - logger.info(msg) - return [cpu_cgroup, memory_cgroup] + if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE): + log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False) + sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) try: log_collector = LogCollector(is_full_mode) - # Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector. + # Running log collector resource monitoring only if agent starts the log collector. # If Log collector start by any other means, then it will not be monitored. if CollectLogsHandler.is_enabled_monitor_cgroups_check(): - tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path) - log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups) + for metric in tracked_metrics: + if isinstance(metric, CpuMetrics): + metric.initialize_cpu_usage() + break + log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics) log_collector_monitor.run() archive = log_collector.collect_logs_and_get_archive() logger.info("Log collection successfully completed. Archive can be found at {0} " diff --git a/azurelinuxagent/ga/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py index 3bce053502..3483527620 100644 --- a/azurelinuxagent/ga/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -24,7 +24,7 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.conf import get_agent_pid_file_path from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \ @@ -185,14 +185,14 @@ def get_cgroup_api(): if available_unified_controllers != "": raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers)) - cgroup_api = SystemdCgroupApiv1() + cgroup_api_v1 = SystemdCgroupApiv1() # Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd - # default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory - # controller is mounted in a location other than the systemd default, raise Exception. - if not cgroup_api.are_mountpoints_systemd_created(): - raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths()))) + # default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If any agent supported controller is + # mounted in a location other than the systemd default, raise Exception. + if not cgroup_api_v1.are_mountpoints_systemd_created(): + raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints()))) log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring") - return cgroup_api + return cgroup_api_v1 raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode)) @@ -202,7 +202,6 @@ class _SystemdCgroupApi(object): Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2. """ def __init__(self): - self._agent_unit_name = None self._systemd_run_commands = [] self._systemd_run_commands_lock = threading.RLock() @@ -213,55 +212,36 @@ def get_systemd_run_commands(self): with self._systemd_run_commands_lock: return self._systemd_run_commands[:] - def get_controller_root_paths(self): + def get_unit_cgroup(self, unit_name, cgroup_name): """ - Cgroup version specific. Returns a tuple with the root paths for the cpu and memory controllers; the values can - be None if the corresponding controller is not mounted or enabled at the root cgroup. + Cgroup version specific. Returns a representation of the unit cgroup. + + :param unit_name: The unit to return the cgroup of. + :param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes. """ raise NotImplementedError() - def get_unit_cgroup_paths(self, unit_name): - """ - Returns a tuple with the path of the cpu and memory cgroups for the given unit. - The values returned can be None if the controller is not mounted or enabled. + def get_cgroup_from_relative_path(self, relative_path, cgroup_name): """ - # Ex: ControlGroup=/azure.slice/walinuxagent.service - # controlgroup_path[1:] = azure.slice/walinuxagent.service - controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup") - cpu_root_path, memory_root_path = self.get_controller_root_paths() - - cpu_cgroup_path = os.path.join(cpu_root_path, controlgroup_path[1:]) \ - if cpu_root_path is not None else None - - memory_cgroup_path = os.path.join(memory_root_path, controlgroup_path[1:]) \ - if memory_root_path is not None else None + Cgroup version specific. Returns a representation of the cgroup at the provided relative path. - return cpu_cgroup_path, memory_cgroup_path - - def get_process_cgroup_paths(self, process_id): - """ - Returns a tuple with the path of the cpu and memory cgroups for the given process. - The 'process_id' can be a numeric PID or the string "self" for the current process. - The values returned can be None if the controller is not mounted or enabled. + :param relative_path: The relative path to return the cgroup of. + :param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes. """ - cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id) - - cpu_root_path, memory_root_path = self.get_controller_root_paths() - - cpu_cgroup_path = os.path.join(cpu_root_path, cpu_cgroup_relative_path) \ - if cpu_root_path is not None and cpu_cgroup_relative_path is not None else None + raise NotImplementedError() - memory_cgroup_path = os.path.join(memory_root_path, memory_cgroup_relative_path) \ - if memory_root_path is not None and memory_cgroup_relative_path is not None else None + def get_process_cgroup(self, process_id, cgroup_name): + """ + Cgroup version specific. Returns a representation of the process' cgroup. - return cpu_cgroup_path, memory_cgroup_path + :param process_id: A numeric PID to return the cgroup of, or the string "self" to return the cgroup of the current process. + :param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes. + """ + raise NotImplementedError() - def get_process_cgroup_relative_paths(self, process_id): + def log_root_paths(self): """ - Cgroup version specific. Returns a tuple with the path of the cpu and memory cgroups for the given process - (relative to the root path of the corresponding controller). - The 'process_id' can be a numeric PID or the string "self" for the current process. - The values returned can be None if the controller is not mounted or enabled. + Cgroup version specific. Logs the root paths of the cgroup filesystem/controllers. """ raise NotImplementedError() @@ -279,11 +259,6 @@ def _is_systemd_failure(scope_name, stderr): unit_not_found = "Unit {0} not found.".format(scope_name) return unit_not_found in stderr or scope_name not in stderr - @staticmethod - def get_processes_in_cgroup(cgroup_path): - with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs: - return [int(pid) for pid in cgroup_procs.read().split()] - class SystemdCgroupApiv1(_SystemdCgroupApi): """ @@ -293,7 +268,8 @@ def __init__(self): super(SystemdCgroupApiv1, self).__init__() self._cgroup_mountpoints = self._get_controller_mountpoints() - def _get_controller_mountpoints(self): + @staticmethod + def _get_controller_mountpoints(): """ In v1, each controller is mounted at a different path. Use findmnt to get each path. @@ -304,7 +280,8 @@ def _get_controller_mountpoints(self): /sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct etc - Returns a dictionary of the controller-path mappings. + Returns a dictionary of the controller-path mappings. The dictionary only includes the controllers which are + supported by the agent. """ mount_points = {} for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines(): @@ -315,51 +292,91 @@ def _get_controller_mountpoints(self): if match is not None: path = match.group('path') controller = match.group('controller') - if controller is not None and path is not None: + if controller is not None and path is not None and controller in CgroupV1.get_supported_controllers(): mount_points[controller] = path return mount_points + def get_controller_mountpoints(self): + """ + Returns a dictionary of controller-mountpoint mappings. + """ + return self._cgroup_mountpoints + def are_mountpoints_systemd_created(self): """ - Systemd mounts each controller at '/sys/fs/cgroup/'. Returns True if both cpu and memory - mountpoints match this pattern, False otherwise. + Systemd mounts each controller at '/sys/fs/cgroup/'. Returns True if all mounted controllers which + are supported by the agent have mountpoints which match this pattern, False otherwise. The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used. This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in files will be cleaned up in cgroupconfigurator. """ - cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct') - memory_mountpoint = self._cgroup_mountpoints.get('memory') - if cpu_mountpoint is not None and cpu_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'cpu,cpuacct'): - return False - if memory_mountpoint is not None and memory_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'memory'): - return False + for controller, mount_point in self._cgroup_mountpoints.items(): + if mount_point != os.path.join(CGROUP_FILE_SYSTEM_ROOT, controller): + return False return True - def get_controller_root_paths(self): - # Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding - # controller is not mounted. - return self._cgroup_mountpoints.get('cpu,cpuacct'), self._cgroup_mountpoints.get('memory') - - def get_process_cgroup_relative_paths(self, process_id): - # The contents of the file are similar to - # # cat /proc/1218/cgroup - # 10:memory:/system.slice/walinuxagent.service - # 3:cpu,cpuacct:/system.slice/walinuxagent.service - # etc - cpu_path = None - memory_path = None + @staticmethod + def _get_process_relative_controller_paths(process_id): + """ + Returns the relative paths of the cgroup for the given process as a dict of controller-path mappings. The result + only includes controllers which are supported. + The contents of the /proc/{process_id}/cgroup file are similar to + # cat /proc/1218/cgroup + 10:memory:/system.slice/walinuxagent.service + 3:cpu,cpuacct:/system.slice/walinuxagent.service + etc + + :param process_id: A numeric PID to return the relative paths of, or the string "self" to return the relative paths of the current process. + """ + conroller_relative_paths = {} for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): - match = re.match(r'\d+:(?P(memory|.*cpuacct.*)):(?P.+)', line) + match = re.match(r'\d+:(?P.+):(?P.+)', line) if match is not None: controller = match.group('controller') path = match.group('path').lstrip('/') if match.group('path') != '/' else None - if controller == 'memory': - memory_path = path - else: - cpu_path = path + if path is not None and controller in CgroupV1.get_supported_controllers(): + conroller_relative_paths[controller] = path + + return conroller_relative_paths + + def get_unit_cgroup(self, unit_name, cgroup_name): + unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup") + unit_controller_paths = {} - return cpu_path, memory_path + for controller, mountpoint in self._cgroup_mountpoints.items(): + unit_controller_paths[controller] = os.path.join(mountpoint, unit_cgroup_relative_path[1:]) + + return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints, + controller_paths=unit_controller_paths) + + def get_cgroup_from_relative_path(self, relative_path, cgroup_name): + controller_paths = {} + for controller, mountpoint in self._cgroup_mountpoints.items(): + controller_paths[controller] = os.path.join(mountpoint, relative_path) + + return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints, + controller_paths=controller_paths) + + def get_process_cgroup(self, process_id, cgroup_name): + relative_controller_paths = self._get_process_relative_controller_paths(process_id) + process_controller_paths = {} + + for controller, mountpoint in self._cgroup_mountpoints.items(): + relative_controller_path = relative_controller_paths.get(controller) + if relative_controller_path is not None: + process_controller_paths[controller] = os.path.join(mountpoint, relative_controller_path) + + return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints, + controller_paths=process_controller_paths) + + def log_root_paths(self): + for controller in CgroupV1.get_supported_controllers(): + mount_point = self._cgroup_mountpoints.get(controller) + if mount_point is None: + log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False) + else: + log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point), send_event=False) def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): @@ -385,25 +402,14 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False) - cpu_cgroup = None + cpu_metrics = None try: cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name) - - cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_controller_root_paths() - - if cpu_cgroup_mountpoint is None: - log_cgroup_info("The CPU controller is not mounted; will not track resource usage", send_event=False) - else: - cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path) - cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path) - CGroupsTelemetry.track_cgroup(cpu_cgroup) - - if memory_cgroup_mountpoint is None: - log_cgroup_info("The Memory controller is not mounted; will not track resource usage", send_event=False) - else: - memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path) - memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path) - CGroupsTelemetry.track_cgroup(memory_cgroup) + cgroup = self.get_cgroup_from_relative_path(cgroup_relative_path, extension_name) + for metrics in cgroup.get_controller_metrics(): + if isinstance(metrics, CpuMetrics): + cpu_metrics = metrics + CGroupsTelemetry.track_cgroup(metrics) except IOError as e: if e.errno == 2: # 'No such file or directory' @@ -415,7 +421,7 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh # Wait for process completion or timeout try: return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, - stderr=stderr, error_code=error_code, cpu_cgroup=cpu_cgroup) + stderr=stderr, error_code=error_code, cpu_metrics=cpu_metrics) except ExtensionError as e: # The extension didn't terminate successfully. Determine whether it was due to systemd errors or # extension errors. @@ -448,7 +454,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi): def __init__(self): super(SystemdCgroupApiv2, self).__init__() self._root_cgroup_path = self._get_root_cgroup_path() - self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path is not None else [] + self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path != "" else [] @staticmethod def _get_root_cgroup_path(): @@ -459,7 +465,7 @@ def _get_root_cgroup_path(): $ findmnt -t cgroup2 --noheadings /sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot - Returns None if the root cgroup cannot be determined from the output above. + Returns empty string if the root cgroup cannot be determined from the output above. """ # for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines(): @@ -470,7 +476,13 @@ def _get_root_cgroup_path(): root_cgroup_path = match.group('path') if root_cgroup_path is not None: return root_cgroup_path - return None + return "" + + def get_root_cgroup_path(self): + """ + Returns the unified cgroup mountpoint. + """ + return self._root_cgroup_path @staticmethod def _get_controllers_enabled_at_root(root_cgroup_path): @@ -478,47 +490,229 @@ def _get_controllers_enabled_at_root(root_cgroup_path): Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows a space separated list of the controllers which are enabled to control resource distribution from the root cgroup to its children. If a controller is listed here, then that controller is available to enable in children - cgroups. + cgroups. Returns only the enabled controllers which are supported by the agent. $ cat /sys/fs/cgroup/cgroup.subtree_control cpuset cpu io memory hugetlb pids rdma misc """ - controllers_enabled_at_root = [] enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control') if os.path.exists(enabled_controllers_file): controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split() - return controllers_enabled_at_root - - def get_controller_root_paths(self): - # Return a tuple representing the root cgroups for cpu and memory. Either should be None if the corresponding - # controller is not enabled at the root. This check is necessary because all non-root "cgroup.subtree_control" - # files can only contain controllers which are enabled in the parent's "cgroup.subtree_control" file. - - root_cpu_path = None - root_memory_path = None - if self._root_cgroup_path is not None: - if 'cpu' in self._controllers_enabled_at_root: - root_cpu_path = self._root_cgroup_path - if 'memory' in self._controllers_enabled_at_root: - root_memory_path = self._root_cgroup_path - - return root_cpu_path, root_memory_path - - def get_process_cgroup_relative_paths(self, process_id): - # The contents of the file are similar to - # # cat /proc/1218/cgroup - # 0::/azure.slice/walinuxagent.service - cpu_path = None - memory_path = None + return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controllers())) + return [] + + @staticmethod + def _get_process_relative_cgroup_path(process_id): + """ + Returns the relative path of the cgroup for the given process. + The contents of the /proc/{process_id}/cgroup file are similar to + # cat /proc/1218/cgroup + 0::/azure.slice/walinuxagent.service + + :param process_id: A numeric PID to return the relative path of, or the string "self" to return the relative path of the current process. + """ + relative_path = "" for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): match = re.match(r'0::(?P\S+)', line) if match is not None: - path = match.group('path').lstrip('/') if match.group('path') != '/' else None - memory_path = path - cpu_path = path + relative_path = match.group('path').lstrip('/') if match.group('path') != '/' else "" + + return relative_path + + def get_unit_cgroup(self, unit_name, cgroup_name): + unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup") + unit_cgroup_path = "" + + if self._root_cgroup_path != "": + unit_cgroup_path = os.path.join(self._root_cgroup_path, unit_cgroup_relative_path[1:]) - return cpu_path, memory_path + return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=unit_cgroup_path, enabled_controllers=self._controllers_enabled_at_root) + + def get_cgroup_from_relative_path(self, relative_path, cgroup_name): + cgroup_path = "" + if self._root_cgroup_path != "": + cgroup_path = os.path.join(self._root_cgroup_path, relative_path) + + return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root) + + def get_process_cgroup(self, process_id, cgroup_name): + relative_path = self._get_process_relative_cgroup_path(process_id) + cgroup_path = "" + + if self._root_cgroup_path != "": + cgroup_path = os.path.join(self._root_cgroup_path, relative_path) + + return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root) + + def log_root_paths(self): + log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False) + for controller in CgroupV2.get_supported_controllers(): + if controller in self._controllers_enabled_at_root: + log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False) + else: + log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller), send_event=False) def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): raise NotImplementedError() + + +class Cgroup(object): + MEMORY_CONTROLLER = "memory" + + def __init__(self, cgroup_name): + self._cgroup_name = cgroup_name + + @staticmethod + def get_supported_controllers(): + """ + Cgroup version specific. Returns a list of the controllers which the agent supports. + """ + raise NotImplementedError() + + def check_in_expected_slice(self, expected_slice): + """ + Cgroup version specific. Returns True if the cgroup is in the expected slice, False otherwise. + + :param expected_slice: The slice the cgroup is expected to be in. + """ + raise NotImplementedError() + + def get_controller_metrics(self, expected_relative_path=None): + """ + Cgroup version specific. Returns a list of the metrics for the agent supported controllers which are + mounted/enabled for the cgroup. + + :param expected_relative_path: The expected relative path of the cgroup. If provided, only metrics for controllers at this expected path will be returned. + """ + raise NotImplementedError() + + def get_processes(self): + """ + Cgroup version specific. Returns a list of all the process ids in the cgroup. + """ + raise NotImplementedError() + + +class CgroupV1(Cgroup): + CPU_CONTROLLER = "cpu,cpuacct" + + def __init__(self, cgroup_name, controller_mountpoints, controller_paths): + """ + :param cgroup_name: The name of the cgroup. Used for logging/tracking purposes. + :param controller_mountpoints: A dictionary of controller-mountpoint mappings for each agent supported controller which is mounted. + :param controller_paths: A dictionary of controller-path mappings for each agent supported controller which is mounted. The path represents the absolute path of the controller. + """ + super(CgroupV1, self).__init__(cgroup_name=cgroup_name) + self._controller_mountpoints = controller_mountpoints + self._controller_paths = controller_paths + + @staticmethod + def get_supported_controllers(): + return [CgroupV1.CPU_CONTROLLER, CgroupV1.MEMORY_CONTROLLER] + + def check_in_expected_slice(self, expected_slice): + in_expected_slice = True + for controller, path in self._controller_paths.items(): + if expected_slice not in path: + log_cgroup_warning("The {0} controller for the {1} cgroup is not mounted in the expected slice. Expected slice: {2}. Actual controller path: {3}".format(controller, self._cgroup_name, expected_slice, path), send_event=False) + in_expected_slice = False + + return in_expected_slice + + def get_controller_metrics(self, expected_relative_path=None): + metrics = [] + + for controller in self.get_supported_controllers(): + controller_metrics = None + controller_path = self._controller_paths.get(controller) + controller_mountpoint = self._controller_mountpoints.get(controller) + + if controller_mountpoint is None: + log_cgroup_warning("{0} controller is not mounted; will not track metrics".format(controller), send_event=False) + continue + + if controller_path is None: + log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track metrics".format(controller, self._cgroup_name), send_event=False) + continue + + if expected_relative_path is not None: + expected_path = os.path.join(controller_mountpoint, expected_relative_path) + if controller_path != expected_path: + log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track metrics. Actual cgroup path:[{2}] Expected:[{3}]".format(controller, self._cgroup_name, controller_path, expected_path), send_event=False) + continue + + if controller == self.CPU_CONTROLLER: + controller_metrics = CpuMetrics(self._cgroup_name, controller_path) + elif controller == self.MEMORY_CONTROLLER: + controller_metrics = MemoryMetrics(self._cgroup_name, controller_path) + + if controller_metrics is not None: + msg = "{0} metrics for cgroup: {1}".format(controller, controller_metrics) + log_cgroup_info(msg, send_event=False) + metrics.append(controller_metrics) + + return metrics + + def get_controller_procs_path(self, controller): + controller_path = self._controller_paths.get(controller) + if controller_path is not None and controller_path != "": + return os.path.join(controller_path, "cgroup.procs") + return "" + + def get_processes(self): + pids = set() + for controller in self._controller_paths.keys(): + procs_path = self.get_controller_procs_path(controller) + if os.path.exists(procs_path): + with open(procs_path, "r") as cgroup_procs: + for pid in cgroup_procs.read().split(): + pids.add(int(pid)) + return list(pids) + + +class CgroupV2(Cgroup): + CPU_CONTROLLER = "cpu" + + def __init__(self, cgroup_name, root_cgroup_path, cgroup_path, enabled_controllers): + """ + :param cgroup_name: The name of the cgroup. Used for logging/tracking purposes. + :param root_cgroup_path: A string representing the root cgroup path. String can be empty. + :param cgroup_path: A string representing the absolute cgroup path. String can be empty. + :param enabled_controllers: A list of strings representing the agent supported controllers enabled at the root cgroup. + """ + super(CgroupV2, self).__init__(cgroup_name) + self._root_cgroup_path = root_cgroup_path + self._cgroup_path = cgroup_path + self._enabled_controllers = enabled_controllers + + @staticmethod + def get_supported_controllers(): + return [CgroupV2.CPU_CONTROLLER, CgroupV2.MEMORY_CONTROLLER] + + def check_in_expected_slice(self, expected_slice): + if expected_slice not in self._cgroup_path: + log_cgroup_warning("The {0} cgroup is not in the expected slice. Expected slice: {1}. Actual cgroup path: {2}".format(self._cgroup_name, expected_slice, self._cgroup_path), send_event=False) + return False + + return True + + def get_controller_metrics(self, expected_relative_path=None): + # TODO - Implement controller metrics for cgroup v2 + raise NotImplementedError() + + def get_procs_path(self): + if self._cgroup_path != "": + return os.path.join(self._cgroup_path, "cgroup.procs") + return "" + + def get_processes(self): + pids = set() + procs_path = self.get_procs_path() + if os.path.exists(procs_path): + with open(procs_path, "r") as cgroup_procs: + for pid in cgroup_procs.read().split(): + pids.add(int(pid)) + return list(pids) + + diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index 72d5329f92..a36b9dae10 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -23,7 +23,7 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger -from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup +from azurelinuxagent.ga.controllermetrics import CpuMetrics, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryMetrics from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \ log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry @@ -130,9 +130,8 @@ def __init__(self): self._agent_cgroups_enabled = False self._extensions_cgroups_enabled = False self._cgroups_api = None - self._agent_cpu_cgroup_path = None - self._agent_memory_cgroup_path = None - self._agent_memory_cgroup = None + self._agent_cgroup = None + self._agent_memory_metrics = None self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop. def initialize(self): @@ -189,28 +188,30 @@ def initialize(self): self.__setup_azure_slice() - cpu_controller_root, memory_controller_root = self.__get_cgroup_controller_roots() - self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroup_paths(agent_slice, - cpu_controller_root, - memory_controller_root) + # Log mount points/root paths for cgroup controllers + self._cgroups_api.log_root_paths() + + # Get agent cgroup + self._agent_cgroup = self._cgroups_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_NAME_TELEMETRY) if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice): reason = "Found unexpected processes in the agent cgroup before agent enable cgroups." self.disable(reason, DisableCgroups.ALL) return - if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None: + # Get metrics to track + metrics = self._agent_cgroup.get_controller_metrics(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name())) + if len(metrics) > 0: self.enable() - if self._agent_cpu_cgroup_path is not None: - log_cgroup_info("Agent CPU cgroup: {0}".format(self._agent_cpu_cgroup_path)) - self.__set_cpu_quota(conf.get_agent_cpu_quota()) - CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) - - if self._agent_memory_cgroup_path is not None: - log_cgroup_info("Agent Memory cgroup: {0}".format(self._agent_memory_cgroup_path)) - self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path) - CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup) + for metric in metrics: + for prop in metric.get_unit_properties(): + log_cgroup_info('{0}: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop))) + if isinstance(metric, CpuMetrics): + self.__set_cpu_quota(conf.get_agent_cpu_quota()) + elif isinstance(metric, MemoryMetrics): + self._agent_memory_metrics = metric + CGroupsTelemetry.track_cgroup(metric) except Exception as exception: log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception))) @@ -229,21 +230,6 @@ def __check_no_legacy_cgroups(self): return False return True - def __get_cgroup_controller_roots(self): - cpu_controller_root, memory_controller_root = self._cgroups_api.get_controller_root_paths() - - if cpu_controller_root is not None: - log_cgroup_info("The CPU cgroup controller root path is {0}".format(cpu_controller_root), send_event=False) - else: - log_cgroup_warning("The CPU cgroup controller is not mounted or enabled") - - if memory_controller_root is not None: - log_cgroup_info("The memory cgroup controller root path is {0}".format(memory_controller_root), send_event=False) - else: - log_cgroup_warning("The memory cgroup controller is not mounted or enabled") - - return cpu_controller_root, memory_controller_root - @staticmethod def __setup_azure_slice(): """ @@ -416,47 +402,6 @@ def is_extension_resource_limits_setup_completed(self, extension_name, cpu_quota return True return False - def __get_agent_cgroup_paths(self, agent_slice, cpu_controller_root, memory_controller_root): - agent_unit_name = systemd.get_agent_unit_name() - - expected_relative_path = os.path.join(agent_slice, agent_unit_name) - cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths( - "self") - - if cpu_cgroup_relative_path is None: - log_cgroup_warning("The agent's process is not within a CPU cgroup") - else: - if cpu_cgroup_relative_path == expected_relative_path: - log_cgroup_info('CPUAccounting: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUAccounting"))) - log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec"))) - else: - log_cgroup_warning( - "The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]".format(cpu_cgroup_relative_path, expected_relative_path)) - cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring - - if memory_cgroup_relative_path is None: - log_cgroup_warning("The agent's process is not within a memory cgroup") - else: - if memory_cgroup_relative_path == expected_relative_path: - memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting") - log_cgroup_info('MemoryAccounting: {0}'.format(memory_accounting)) - else: - log_cgroup_warning( - "The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]".format(memory_cgroup_relative_path, expected_relative_path)) - memory_cgroup_relative_path = None # Set the path to None to prevent monitoring - - if cpu_controller_root is not None and cpu_cgroup_relative_path is not None: - agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path) - else: - agent_cpu_cgroup_path = None - - if memory_controller_root is not None and memory_cgroup_relative_path is not None: - agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path) - else: - agent_memory_cgroup_path = None - - return agent_cpu_cgroup_path, agent_memory_cgroup_path - def supported(self): return self._cgroups_supported @@ -496,7 +441,11 @@ def disable(self, reason, disable_cgroups): elif disable_cgroups == DisableCgroups.AGENT: # disable agent self._agent_cgroups_enabled = False self.__reset_agent_cpu_quota() - CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) + agent_metrics = self._agent_cgroup.get_controller_metrics() + for metric in agent_metrics: + if isinstance(metric, CpuMetrics): + CGroupsTelemetry.stop_tracking(metric) + break log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled) @@ -612,11 +561,7 @@ def _check_processes_in_agent_cgroup(self): """ unexpected = [] agent_cgroup_proc_names = [] - # Now we call _check_processes_in_agent_cgroup before we enable the cgroups or any one of the controller is not mounted, agent cgroup paths can be None. - # so we need to check both. - cgroup_path = self._agent_cpu_cgroup_path if self._agent_cpu_cgroup_path is not None else self._agent_memory_cgroup_path - if cgroup_path is None: - return + try: daemon = os.getppid() extension_handler = os.getpid() @@ -624,12 +569,12 @@ def _check_processes_in_agent_cgroup(self): agent_commands.update(shellutil.get_running_commands()) systemd_run_commands = set() systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) - agent_cgroup = self._cgroups_api.get_processes_in_cgroup(cgroup_path) + agent_cgroup_proccesses = self._agent_cgroup.get_processes() # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup; agent_commands.update(shellutil.get_running_commands()) systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) - for process in agent_cgroup: + for process in agent_cgroup_proccesses: agent_cgroup_proc_names.append(self.__format_process(process)) # Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't. if process in (daemon, extension_handler) or process in systemd_run_commands: @@ -753,8 +698,8 @@ def _check_agent_throttled_time(cgroup_metrics): raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value)) def check_agent_memory_usage(self): - if self.enabled() and self._agent_memory_cgroup: - metrics = self._agent_memory_cgroup.get_tracked_metrics() + if self.enabled() and self._agent_memory_metrics is not None: + metrics = self._agent_memory_metrics.get_tracked_metrics() current_usage = 0 for metric in metrics: if metric.counter == MetricsCounter.TOTAL_MEM_USAGE: @@ -780,59 +725,37 @@ def _get_parent(pid): return 0 def start_tracking_unit_cgroups(self, unit_name): - """ - TODO: Start tracking Memory Cgroups - """ try: - cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name) - - if cpu_cgroup_path is None: - log_cgroup_info("The CPU controller is not mounted or enabled; will not track resource usage", send_event=False) - else: - CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path)) + cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name) + metrics = cgroup.get_controller_metrics() - if memory_cgroup_path is None: - log_cgroup_info("The Memory controller is not mounted or enabled; will not track resource usage", send_event=False) - else: - CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path)) + for metric in metrics: + CGroupsTelemetry.track_cgroup(metric) except Exception as exception: log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False) def stop_tracking_unit_cgroups(self, unit_name): - """ - TODO: remove Memory cgroups from tracked list. - """ try: - cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name) - - if cpu_cgroup_path is not None: - CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path)) + cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name) + metrics = cgroup.get_controller_metrics() - if memory_cgroup_path is not None: - CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path)) + for metric in metrics: + CGroupsTelemetry.stop_tracking(metric) except Exception as exception: log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) def stop_tracking_extension_cgroups(self, extension_name): - """ - TODO: remove extension Memory cgroups from tracked list - """ try: extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name) - cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, - extension_slice_name) - - cpu_root_path, memory_root_path = self._cgroups_api.get_controller_root_paths() - cpu_cgroup_path = os.path.join(cpu_root_path, cgroup_relative_path) - memory_cgroup_path = os.path.join(memory_root_path, cgroup_relative_path) + cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, extension_slice_name) - if cpu_cgroup_path is not None: - CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path)) - - if memory_cgroup_path is not None: - CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path)) + cgroup = self._cgroups_api.get_cgroup_from_relative_path(relative_path=cgroup_relative_path, + cgroup_name=extension_name) + metrics = cgroup.get_controller_metrics() + for metric in metrics: + CGroupsTelemetry.stop_tracking(metric) except Exception as exception: log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) diff --git a/azurelinuxagent/ga/cgroupstelemetry.py b/azurelinuxagent/ga/cgroupstelemetry.py index 5943b45ade..e8efad0382 100644 --- a/azurelinuxagent/ga/cgroupstelemetry.py +++ b/azurelinuxagent/ga/cgroupstelemetry.py @@ -17,7 +17,7 @@ import threading from azurelinuxagent.common import logger -from azurelinuxagent.ga.cgroup import CpuCgroup +from azurelinuxagent.ga.controllermetrics import CpuMetrics from azurelinuxagent.common.future import ustr @@ -41,7 +41,7 @@ def track_cgroup(cgroup): """ Adds the given item to the dictionary of tracked cgroups """ - if isinstance(cgroup, CpuCgroup): + if isinstance(cgroup, CpuMetrics): # set the current cpu usage cgroup.initialize_cpu_usage() diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index d82933e963..d8ea3dba3d 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -25,7 +25,7 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.common import logger -from azurelinuxagent.ga.cgroup import MetricsCounter +from azurelinuxagent.ga.controllermetrics import MetricsCounter from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric from azurelinuxagent.common.future import ustr from azurelinuxagent.ga.interfaces import ThreadHandlerInterface diff --git a/azurelinuxagent/ga/cgroup.py b/azurelinuxagent/ga/controllermetrics.py similarity index 96% rename from azurelinuxagent/ga/cgroup.py rename to azurelinuxagent/ga/controllermetrics.py index b2bf32fbc1..3aaeab3193 100644 --- a/azurelinuxagent/ga/cgroup.py +++ b/azurelinuxagent/ga/controllermetrics.py @@ -88,7 +88,7 @@ class MetricsCounter(object): re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n') -class CGroup(object): +class ControllerMetrics(object): def __init__(self, name, cgroup_path): """ Initialize _data collection for the Memory controller @@ -169,10 +169,16 @@ def get_tracked_metrics(self, **_): """ raise NotImplementedError() + def get_unit_properties(self): + """ + Returns a list of the unit properties to collect for the controller. + """ + raise NotImplementedError() -class CpuCgroup(CGroup): + +class CpuMetrics(ControllerMetrics): def __init__(self, name, cgroup_path): - super(CpuCgroup, self).__init__(name, cgroup_path) + super(CpuMetrics, self).__init__(name, cgroup_path) self._osutil = get_osutil() self._previous_cgroup_cpu = None @@ -306,10 +312,13 @@ def get_tracked_metrics(self, **kwargs): return tracked + def get_unit_properties(self): + return ["CPUAccounting", "CPUQuotaPerSecUSec"] + -class MemoryCgroup(CGroup): +class MemoryMetrics(ControllerMetrics): def __init__(self, name, cgroup_path): - super(MemoryCgroup, self).__init__(name, cgroup_path) + super(MemoryMetrics, self).__init__(name, cgroup_path) self._counter_not_found_error_count = 0 @@ -390,3 +399,6 @@ def get_tracked_metrics(self, **_): MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name, self.try_swap_memory_usage(), _REPORT_EVERY_HOUR) ] + + def get_unit_properties(self): + return["MemoryAccounting"] diff --git a/azurelinuxagent/ga/extensionprocessutil.py b/azurelinuxagent/ga/extensionprocessutil.py index d2b37551ba..9061fd3fff 100644 --- a/azurelinuxagent/ga/extensionprocessutil.py +++ b/azurelinuxagent/ga/extensionprocessutil.py @@ -31,7 +31,7 @@ TELEMETRY_MESSAGE_MAX_LEN = 3200 -def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup): +def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics): """ Utility function that waits for the process to complete within the given time frame. This function will terminate the process if when the given time frame elapses. @@ -47,7 +47,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup): throttled_time = 0 if timeout == 0: - throttled_time = get_cpu_throttled_time(cpu_cgroup) + throttled_time = get_cpu_throttled_time(cpu_metrics) os.killpg(os.getpgid(process.pid), signal.SIGKILL) else: # process completed or forked; sleep 1 sec to give the child process (if any) a chance to start @@ -57,7 +57,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup): return timeout == 0, return_code, throttled_time -def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_cgroup=None): +def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_metrics=None): """ Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised. @@ -68,15 +68,15 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c :param stdout: Must be a file since we seek on it when parsing the subprocess output :param stderr: Must be a file since we seek on it when parsing the subprocess outputs :param error_code: The error code to set if we raise an ExtensionError - :param cpu_cgroup: Reference the cpu cgroup name and path + :param cpu_metrics: References the cpu metrics for the cgroup :return: """ # Wait for process completion or timeout - timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup) + timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_metrics) process_output = read_output(stdout, stderr) if timed_out: - if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens + if cpu_metrics is not None: # Report CPUThrottledTime when timeout happens raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output), code=ExtensionErrorCodes.PluginHandlerScriptTimedout) @@ -211,14 +211,14 @@ def to_s(captured_stdout, stdout_offset, captured_stderr, stderr_offset): return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each) -def get_cpu_throttled_time(cpu_cgroup): +def get_cpu_throttled_time(cpu_metrics): """ return the throttled time for the given cgroup. """ throttled_time = 0 - if cpu_cgroup is not None: + if cpu_metrics is not None: try: - throttled_time = cpu_cgroup.get_cpu_throttled_time(read_previous_throttled_time=False) + throttled_time = cpu_metrics.get_cpu_throttled_time(read_previous_throttled_time=False) except Exception as e: logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e)) diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index a5ff29aa01..f34192be72 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -22,7 +22,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.networkutil as networkutil -from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter +from azurelinuxagent.ga.controllermetrics import MetricValue, MetricsCategory, MetricsCounter from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.errorstate import ErrorState diff --git a/tests/common/utils/test_extension_process_util.py b/tests/common/utils/test_extension_process_util.py index 316bad6a37..7771de4fd2 100644 --- a/tests/common/utils/test_extension_process_util.py +++ b/tests/common/utils/test_extension_process_util.py @@ -19,7 +19,7 @@ import subprocess import tempfile -from azurelinuxagent.ga.cgroup import CpuCgroup +from azurelinuxagent.ga.controllermetrics import CpuMetrics from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.future import ustr from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \ @@ -52,7 +52,7 @@ def test_wait_for_process_completion_or_timeout_should_terminate_cleanly(self): stdout=subprocess.PIPE, stderr=subprocess.PIPE) - timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None) + timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None) self.assertEqual(timed_out, False) self.assertEqual(ret, 0) @@ -70,7 +70,8 @@ def test_wait_for_process_completion_or_timeout_should_kill_process_on_timeout(s # We don't actually mock the kill, just wrap it so we can assert its call count with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill: with patch('time.sleep') as mock_sleep: - timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, cpu_cgroup=None) + timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, + cpu_metrics=None) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process @@ -89,7 +90,7 @@ def test_handle_process_completion_should_return_nonzero_when_process_fails(self stdout=subprocess.PIPE, stderr=subprocess.PIPE) - timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None) + timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None) self.assertEqual(timed_out, False) self.assertEqual(ret, 2) @@ -105,12 +106,8 @@ def test_handle_process_completion_should_return_process_output(self): stderr=stderr, preexec_fn=os.setsid) - process_output = handle_process_completion(process=process, - command=command, - timeout=5, - stdout=stdout, - stderr=stderr, - error_code=42) + process_output = handle_process_completion(process=process, command=command, timeout=5, stdout=stdout, + stderr=stderr, error_code=42) expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n" self.assertEqual(process_output, expected_output) @@ -130,12 +127,8 @@ def test_handle_process_completion_should_raise_on_timeout(self): stderr=stderr, preexec_fn=os.setsid) - handle_process_completion(process=process, - command=command, - timeout=timeout, - stdout=stdout, - stderr=stderr, - error_code=42) + handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, + stderr=stderr, error_code=42) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process and raising an exception @@ -158,7 +151,7 @@ def test_handle_process_completion_should_log_throttled_time_on_timeout(self): test_file = os.path.join(self.tmp_dir, "cpu.stat") shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50 - cgroup = CpuCgroup("test", self.tmp_dir) + cgroup = CpuMetrics("test", self.tmp_dir) process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn shell=True, cwd=self.tmp_dir, @@ -167,13 +160,8 @@ def test_handle_process_completion_should_log_throttled_time_on_timeout(self): stderr=stderr, preexec_fn=os.setsid) - handle_process_completion(process=process, - command=command, - timeout=timeout, - stdout=stdout, - stderr=stderr, - error_code=42, - cpu_cgroup=cgroup) + handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, + stderr=stderr, error_code=42, cpu_metrics=cgroup) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process and raising an exception @@ -200,11 +188,7 @@ def test_handle_process_completion_should_raise_on_nonzero_exit_code(self): stderr=stderr, preexec_fn=os.setsid) - handle_process_completion(process=process, - command=command, - timeout=4, - stdout=stdout, - stderr=stderr, + handle_process_completion(process=process, command=command, timeout=4, stdout=stdout, stderr=stderr, error_code=error_code) self.assertEqual(context_manager.exception.code, error_code) diff --git a/tests/data/cgroups/cgroup.procs b/tests/data/cgroups/cgroup.procs new file mode 100644 index 0000000000..93c25c16df --- /dev/null +++ b/tests/data/cgroups/cgroup.procs @@ -0,0 +1,3 @@ +123 +234 +345 \ No newline at end of file diff --git a/tests/ga/test_cgroupapi.py b/tests/ga/test_cgroupapi.py index ec077c90a0..717adbb6f0 100644 --- a/tests/ga/test_cgroupapi.py +++ b/tests/ga/test_cgroupapi.py @@ -24,10 +24,11 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \ - InvalidCgroupMountpointException + InvalidCgroupMountpointException, CgroupV1, CgroupV2 from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \ mock_cgroup_hybrid_environment from tests.lib.mock_environment import MockCommand @@ -85,7 +86,7 @@ def test_cgroups_should_be_supported_only_on_ubuntu16_centos7dot4_redhat7dot4_an class SystemdCgroupsApiTestCase(AgentTestCase): - def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self): + def test_get_cgroup_api_raises_exception_when_systemd_mountpoint_does_not_exist(self): with mock_cgroup_v1_environment(self.tmp_dir): # Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check with patch("os.path.exists", return_value=False): @@ -151,106 +152,16 @@ def test_get_unit_property_should_return_the_value_of_the_given_property(self): class SystemdCgroupsApiv1TestCase(AgentTestCase): - def test_get_unit_cgroup_paths_should_return_the_cgroup_v1_mount_points(self): - with mock_cgroup_v1_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', - "The mount point for the CPU controller is incorrect") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service', - "The mount point for the memory controller is incorrect") - - def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self): - with mock_cgroup_v1_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', - "The mount point for the CPU controller is incorrect") - self.assertIsNone(memory, - "The mount point for the memory controller is None so unit cgroup should be None") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertIsNone(cpu, "The mount point for the cpu controller is None so unit cgroup should be None") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service', - "The mount point for the memory controller is incorrect") - - def test_get_process_cgroup_paths_should_return_the_cgroup_v1_mount_points(self): - with mock_cgroup_v1_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', - "The mount point for the CPU controller is incorrect") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', - "The mount point for the memory controller is incorrect") - - def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self): - with mock_cgroup_v1_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', - "The mount point for the CPU controller is incorrect") - self.assertIsNone(memory, - "The mount point for the memory controller is None so unit cgroup should be None") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIsNone(cpu, "The mount point for the CPU controller is None so unit cgroup should be None") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', - "The mount point for the memory controller is incorrect") - - def test_get_process_cgroup_v1_path_should_return_None_if_either_relative_path_is_None(self): - with mock_cgroup_v1_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=('system.slice/walinuxagent.service', None)): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', - "The mount point for the CPU controller is incorrect") - self.assertIsNone(memory, - "The relative cgroup path for the memory controller is None so unit cgroup should be None") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=(None, 'system.slice/walinuxagent.service')): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None") - self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service', - "The mount point for the memory controller is incorrect") - - def test_get_controller_root_paths_should_return_the_cgroup_v1_controller_mount_points(self): - with mock_cgroup_v1_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the CPU controller is incorrect") - self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect") - - def test_get_controller_root_paths_should_return_None_if_either_controller_not_mounted(self): - with mock_cgroup_v1_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory', 'io': '/sys/fs/cgroup/io'}): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertIsNone(cpu, "The CPU controller is mot mounted, so the cpu controller path should be None") - self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'io': '/sys/fs/cgroup/io'}): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertIsNone(memory, "The memory controller is mot mounted, so the memory controller path should be None") - self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the cpu controller is incorrect") - - def test_get_controller_mountpoints_should_return_all_controller_mount_points(self): + def test_get_controller_mountpoints_should_return_only_supported_controllers(self): with mock_cgroup_v1_environment(self.tmp_dir): cgroup_api = get_cgroup_api() # Expected value comes from findmnt output in the mocked environment self.assertEqual(cgroup_api._get_controller_mountpoints(), { - 'systemd': '/sys/fs/cgroup/systemd', - 'devices': '/sys/fs/cgroup/devices', - 'rdma': '/sys/fs/cgroup/rdma', - 'perf_event': '/sys/fs/cgroup/perf_event', - 'net_cls,net_prio': '/sys/fs/cgroup/net_cls,net_prio', - 'blkio': '/sys/fs/cgroup/blkio', - 'cpuset': '/sys/fs/cgroup/cpuset', - 'misc': '/sys/fs/cgroup/misc', 'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', - 'memory': '/sys/fs/cgroup/memory', - 'freezer': '/sys/fs/cgroup/freezer', - 'hugetlb': '/sys/fs/cgroup/hugetlb', - 'pids': '/sys/fs/cgroup/pids', + 'memory': '/sys/fs/cgroup/memory' }, "The controller mountpoints are not correct") - def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self): + def test_are_mountpoints_systemd_created_should_return_False_if_mountpoints_are_not_systemd(self): with mock_cgroup_v1_environment(self.tmp_dir): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}): self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) @@ -261,23 +172,123 @@ def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_ar with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}): self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) - def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self): + def test_are_mountpoints_systemd_created_should_return_True_if_mountpoints_are_systemd(self): with mock_cgroup_v1_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}): - self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}): + self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created()) # are_mountpoints_systemd_created should only check controllers which are mounted - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}): - self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): + self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created()) - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}): - self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}): + self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created()) + + def test_get_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self): + with mock_cgroup_v1_environment(self.tmp_dir): + relative_paths = get_cgroup_api()._get_process_relative_controller_paths('self') + self.assertEqual(len(relative_paths), 2) + self.assertEqual(relative_paths.get('cpu,cpuacct'), "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") + self.assertEqual(relative_paths.get('memory'), "system.slice/walinuxagent.service", "The relative memory for the memory cgroup is incorrect") + + def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'}) + self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', 'memory': '/sys/fs/cgroup/memory/system.slice/extension.service'}) - def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self): + def test_get_unit_cgroup_should_return_only_mounted_controllers_v1(self): with mock_cgroup_v1_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') - self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") - self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct'}) + self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'}) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._controller_mountpoints, {}) + self.assertEqual(cgroup._controller_paths, {}) + + def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "test_cgroup") + self.assertEqual(cgroup._controller_mountpoints, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}) + self.assertEqual(cgroup._controller_paths, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path', + 'memory': '/sys/fs/cgroup/memory/some/relative/path'}) + + def test_get_cgroup_from_relative_path_should_return_only_mounted_controllers_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): + cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "test_cgroup") + self.assertEqual(cgroup._controller_mountpoints, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}) + self.assertEqual(cgroup._controller_paths, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path'}) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "test_cgroup") + self.assertEqual(cgroup._controller_mountpoints, {}) + self.assertEqual(cgroup._controller_paths, {}) + + def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._controller_mountpoints, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}) + self.assertEqual(cgroup._controller_paths, + {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service', + 'memory': '/sys/fs/cgroup/memory/system.slice/walinuxagent.service'}) + + def test_get_process_cgroup_should_return_only_mounted_controllers_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}) + self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service'}) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._controller_mountpoints, {}) + self.assertEqual(cgroup._controller_paths, {}) + + def test_get_process_cgroup_should_return_only_mounted_process_controllers_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'relative/path'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'}) + self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/relative/path'}) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV1) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'}) + self.assertEqual(cgroup._controller_paths, {}) @patch('time.sleep', side_effect=lambda _: mock_sleep()) def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _): @@ -354,17 +365,6 @@ def test_start_extension_cgroups_v1_command_should_use_systemd_to_execute_the_co class SystemdCgroupsApiv2TestCase(AgentTestCase): - def test_get_controllers_enabled_at_root_should_return_list_of_enabled_controllers(self): - with mock_cgroup_v2_environment(self.tmp_dir): - cgroup_api = get_cgroup_api() - self.assertEqual(cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup'), ['cpuset', 'cpu', 'io', 'memory', 'pids']) - - def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_None(self): - with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None): - cgroup_api = get_cgroup_api() - self.assertEqual(cgroup_api._controllers_enabled_at_root, []) - def test_get_root_cgroup_path_should_return_v2_cgroup_root(self): with mock_cgroup_v2_environment(self.tmp_dir): cgroup_api = get_cgroup_api() @@ -374,97 +374,113 @@ def test_get_root_cgroup_path_should_only_match_systemd_mountpoint(self): with mock_cgroup_v2_environment(self.tmp_dir) as env: # Mock an environment which has multiple v2 mountpoints env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$", -'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime -/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime -/custom/mountpoint/path2 none cgroup2 rw,relatime -''')) + '''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime + /sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime + /custom/mountpoint/path2 none cgroup2 rw,relatime + ''')) cgroup_api = get_cgroup_api() self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup') - def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self): + def test_get_controllers_enabled_at_root_should_return_list_of_agent_supported_and_enabled_controllers(self): with mock_cgroup_v2_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertEqual(cpu, '/sys/fs/cgroup/system.slice/extension.service', - "The cgroup path for the CPU controller is incorrect") - self.assertEqual(memory, '/sys/fs/cgroup/system.slice/extension.service', - "The cgroup path for the memory controller is incorrect") + cgroup_api = get_cgroup_api() + enabled_controllers = cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup') + self.assertEqual(len(enabled_controllers), 2) + self.assertIn('cpu', enabled_controllers) + self.assertIn('memory', enabled_controllers) - def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): + def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_empty(self): with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertIn(cpu, '/sys/fs/cgroup/system.slice/extension.service', - "The cgroup path for the CPU controller is incorrect") - self.assertIsNone(memory, - "The cgroup path for the memory controller is None so unit cgroup should be None") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): - cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") - self.assertIsNone(cpu, "The cgroup path for the cpu controller is None so unit cgroup should be None") - self.assertIn(memory, '/sys/fs/cgroup/system.slice/extension.service', - "The cgroup path for the memory controller is incorrect") - - def test_get_process_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._controllers_enabled_at_root, []) + + def test_get_process_relative_cgroup_path_should_return_relative_path(self): with mock_cgroup_v2_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', - "The cgroup path for the CPU controller is incorrect") - self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service', - "The cgroup path for the memory controller is incorrect") + cgroup_api = get_cgroup_api() + self.assertEqual(cgroup_api._get_process_relative_cgroup_path(process_id="self"), "system.slice/walinuxagent.service") - def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): + def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', - "The cgroup path for the CPU controller is incorrect") - self.assertIsNone(memory, - "The cgroup path for the memory controller is None so unit cgroup should be None") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIsNone(cpu, "The cgroup path for the CPU controller is None so unit cgroup should be None") - self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service', - "The cgroup path for the memory controller is incorrect") - - def test_get_process_cgroup_v2_path_should_return_None_if_relative_path_is_None(self): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup") + self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service") + self.assertEqual(len(cgroup._enabled_controllers), 2) + self.assertIn('cpu', cgroup._enabled_controllers) + self.assertIn('memory', cgroup._enabled_controllers) + + def test_get_unit_cgroup_should_return_empty_paths_if_root_path_empty_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup_relative_paths', return_value=(None, None)): - cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") - self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None") - self.assertIsNone(memory, - "The relative cgroup path for the memory controller is None so unit cgroup should be None") - - def test_get_controller_root_paths_should_return_the_cgroup_v2_root_cgroup_path(self): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._root_cgroup_path, "") + self.assertEqual(cgroup._cgroup_path, "") + self.assertEqual(len(cgroup._enabled_controllers), 0) + + def test_get_unit_cgroup_should_return_only_enabled_controllers_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect") - self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") - - def test_get_controller_root_paths_should_return_None_if_root_cgroup_path_is_None(self): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu']): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup") + self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service") + self.assertEqual(len(cgroup._enabled_controllers), 1) + self.assertIn('cpu', cgroup._enabled_controllers) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=[]): + cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "extension") + self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup") + self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service") + self.assertEqual(len(cgroup._enabled_controllers), 0) + + def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertIsNone(cpu, "The root cgroup path is None, so the CPU controller path should be None") - self.assertIsNone(memory, "The root cgroup path is None, so the memory controller path should be None") - - def test_get_controller_root_paths_should_return_None_if_either_controller_not_enabled(self): + cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "test_cgroup") + self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup") + self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/some/relative/path") + self.assertEqual(len(cgroup._enabled_controllers), 2) + self.assertIn('cpu', cgroup._enabled_controllers) + self.assertIn('memory', cgroup._enabled_controllers) + + def test_get_cgroup_from_relative_path_should_return_empty_paths_if_root_path_empty_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['io', 'memory']): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertIsNone(cpu, "The CPU controller is not enabled, so the CPU controller path should be None") - self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") - - with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu', 'io']): - cpu, memory = get_cgroup_api().get_controller_root_paths() - self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect") - self.assertIsNone(memory, "The memory controller is not enabled, so the memory controller path should be None") - - def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v2_relative_paths(self): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "test_cgroup") + self.assertEqual(cgroup._root_cgroup_path, "") + self.assertEqual(cgroup._cgroup_path, "") + self.assertEqual(len(cgroup._enabled_controllers), 0) + + def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup") + self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/walinuxagent.service") + self.assertEqual(len(cgroup._enabled_controllers), 2) + self.assertIn('cpu', cgroup._enabled_controllers) + self.assertIn('memory', cgroup._enabled_controllers) + + def test_get_process_cgroup_should_return_empty_paths_if_root_path_empty_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): - cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') - self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") - self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertIsInstance(cgroup, CgroupV2) + self.assertEqual(cgroup._cgroup_name, "walinuxagent") + self.assertEqual(cgroup._root_cgroup_path, "") + self.assertEqual(cgroup._cgroup_path, "") + self.assertEqual(len(cgroup._enabled_controllers), 0) class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase): @@ -483,3 +499,176 @@ def test_cleanup_legacy_cgroups_should_remove_legacy_cgroups(self): self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups") self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup") self.assertFalse(os.path.exists(legacy_memory_cgroup), "cleanup_legacy_cgroups() did not remove the memory legacy cgroup") + + +class CgroupsApiv1TestCase(AgentTestCase): + def test_get_supported_controllers_returns_v1_controllers(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_supported_controllers() + self.assertEqual(len(controllers), 2) + self.assertIn('cpu,cpuacct', controllers) + self.assertIn('memory', controllers) + + def test_check_in_expected_slice_returns_True_if_all_paths_in_expected_slice(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + def test_check_in_expected_slice_returns_False_if_any_paths_not_in_expected_slice(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice')) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'user.slice/walinuxagent.service'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice')) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': '', 'memory': ''}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + def test_get_controller_metrics_returns_all_supported_controllers_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics() + self.assertEqual(len(metrics), 2) + self.assertIsInstance(metrics[0], CpuMetrics) + self.assertEqual(metrics[0].name, "walinuxagent") + self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + self.assertIsInstance(metrics[1], MemoryMetrics) + self.assertEqual(metrics[1].name, "walinuxagent") + self.assertEqual(metrics[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") + + def test_get_controller_metrics_returns_only_mounted_controllers_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics() + self.assertEqual(len(metrics), 1) + self.assertIsInstance(metrics[0], CpuMetrics) + self.assertEqual(metrics[0].name, "walinuxagent") + self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics() + self.assertEqual(len(metrics), 1) + self.assertIsInstance(metrics[0], MemoryMetrics) + self.assertEqual(metrics[0].name, "walinuxagent") + self.assertEqual(metrics[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics() + self.assertEqual(len(metrics), 0) + + def test_get_controller_metrics_returns_only_controllers_at_expected_path_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'unexpected/path'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service") + self.assertEqual(len(metrics), 1) + self.assertIsInstance(metrics[0], CpuMetrics) + self.assertEqual(metrics[0].name, "walinuxagent") + self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'unexpected/path', 'memory': 'unexpected/path'}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service") + self.assertEqual(len(metrics), 0) + + def test_get_procs_path_returns_correct_path_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs_path = cgroup.get_controller_procs_path(controller='cpu,cpuacct') + self.assertEqual(procs_path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs") + + procs_path = cgroup.get_controller_procs_path(controller='memory') + self.assertEqual(procs_path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs") + + def test_get_processes_returns_processes_at_all_controller_paths_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs = cgroup.get_processes() + self.assertEqual(len(procs), 3) + self.assertIn(int(123), procs) + self.assertIn(int(234), procs) + self.assertIn(int(345), procs) + + def test_get_processes_returns_empty_list_if_no_controllers_mounted_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs = cgroup.get_processes() + self.assertIsInstance(procs, list) + self.assertEqual(len(procs), 0) + + def test_get_processes_returns_empty_list_if_procs_path_empty_v1(self): + with mock_cgroup_v1_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.CgroupV1.get_controller_procs_path', return_value=""): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs = cgroup.get_processes() + self.assertIsInstance(procs, list) + self.assertEqual(len(procs), 0) + + +class CgroupsApiv2TestCase(AgentTestCase): + def test_get_supported_controllers_returns_v2_controllers(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_supported_controllers() + self.assertEqual(len(controllers), 2) + self.assertIn('cpu', controllers) + self.assertIn('memory', controllers) + + def test_check_in_expected_slice_returns_True_if_cgroup_path_in_expected_slice(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + def test_check_in_expected_slice_returns_False_if_cgroup_path_not_in_expected_slice(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice')) + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_process_relative_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice')) + + def test_get_procs_path_returns_empty_if_root_cgroup_empty_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs_path = cgroup.get_procs_path() + self.assertEqual(procs_path, "") + + def test_get_procs_path_returns_correct_path_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs_path = cgroup.get_procs_path() + self.assertEqual(procs_path, "/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs") + + def test_get_processes_returns_processes_at_all_controller_paths_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs = cgroup.get_processes() + self.assertEqual(len(procs), 3) + self.assertIn(int(123), procs) + self.assertIn(int(234), procs) + self.assertIn(int(345), procs) + + def test_get_processes_returns_empty_list_if_root_cgroup_empty_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + procs = cgroup.get_processes() + self.assertEqual(len(procs), 0) diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 5b4b0976e9..1d1465a47d 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -27,7 +27,7 @@ import threading from azurelinuxagent.common import conf -from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup +from azurelinuxagent.ga.controllermetrics import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuMetrics from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import WALAEventOperation @@ -272,7 +272,7 @@ def test_remove_extension_slice_should_remove_unit_files(self): CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ - CpuCgroup('Microsoft.CPlat.Extension', + CpuMetrics('Microsoft.CPlat.Extension', '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension") @@ -369,10 +369,10 @@ def test_disable_should_reset_cpu_quota_for_all_cgroups(self): configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5) configurator.set_extension_services_cpu_memory_quota(service_list) CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ - CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') + CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ - CpuCgroup('Microsoft.CPlat.Extension', + CpuMetrics('Microsoft.CPlat.Extension', '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') configurator.disable("UNIT TEST", DisableCgroups.ALL) @@ -717,7 +717,7 @@ def test_it_should_stop_tracking_extension_services_cgroups(self): with self._get_cgroup_configurator() as configurator: with patch("os.path.exists") as mock_path: mock_path.return_value = True - CGroupsTelemetry.track_cgroup(CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')) + CGroupsTelemetry.track_cgroup(CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')) configurator.stop_tracking_extension_services_cgroups(service_list) tracked = CGroupsTelemetry._tracked @@ -776,7 +776,7 @@ def side_effect(path): with patch("os.path.exists") as mock_path: mock_path.side_effect = side_effect CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ - CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') + CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') configurator.stop_tracking_unit_cgroups("extension.service") tracked = CGroupsTelemetry._tracked @@ -911,7 +911,7 @@ def get_completed_process(): agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid] other_processes = [1, get_completed_process()] + extension_processes - with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): + with patch("azurelinuxagent.ga.cgroupapi.CgroupV1.get_processes", return_value=agent_processes + other_processes): with self.assertRaises(CGroupsException) as context_manager: configurator._check_processes_in_agent_cgroup() @@ -1012,7 +1012,7 @@ def test_check_agent_memory_usage_should_raise_a_cgroups_exception_when_the_limi with self.assertRaises(AgentMemoryExceededException) as context_manager: with self._get_cgroup_configurator() as configurator: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_tracked_metrics") as tracked_metrics: tracked_metrics.return_value = metrics configurator.check_agent_memory_usage() diff --git a/tests/ga/test_cgroupstelemetry.py b/tests/ga/test_cgroupstelemetry.py index 26fcecbf65..457b20e473 100644 --- a/tests/ga/test_cgroupstelemetry.py +++ b/tests/ga/test_cgroupstelemetry.py @@ -19,7 +19,7 @@ import random import time -from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.utils import fileutil from tests.lib.tools import AgentTestCase, data_dir, patch @@ -105,10 +105,10 @@ def tearDown(self): @staticmethod def _track_new_extension_cgroups(num_extensions): for i in range(num_extensions): - dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) + dummy_cpu_cgroup = CpuMetrics("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) - dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) + dummy_memory_cgroup = MemoryMetrics("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) def _assert_cgroups_are_tracked(self, num_extensions): @@ -136,12 +136,12 @@ def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable= self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 @@ -163,10 +163,10 @@ def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable= self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.cgroup.CGroup.is_active", return_value=False) + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active", return_value=False) def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 # pylint: disable=unused-variable @@ -182,10 +182,10 @@ def test_telemetry_polling_with_inactive_cgroups(self, *_): self.assertEqual(len(metrics), 0) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") + @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 @@ -274,11 +274,11 @@ def test_telemetry_polling_to_generate_transient_logs_index_error(self): CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") + @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage, *args): # pylint: disable=unused-argument num_polls = 10 @@ -321,13 +321,13 @@ def test_cgroup_is_tracked(self, *args): # pylint: disable=unused-argument self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path")) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 @@ -341,16 +341,16 @@ def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # py self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0) - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: patch_is_active.return_value = True current_memory = 209715200 @@ -367,14 +367,14 @@ def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylin self.assertEqual(len(metrics), num_extensions * 3) self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument num_extensions = 5 self._track_new_extension_cgroups(num_extensions) - with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: patch_is_active.return_value = False poll_count = 1 @@ -383,9 +383,9 @@ def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # py metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics)) - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_throttled_time") - @patch("azurelinuxagent.ga.cgroup.CGroup.is_active") + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_throttled_time") + @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage): num_polls = 5 @@ -396,7 +396,7 @@ def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_ac cpu_percent_values.append(-1) cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)] - dummy_cpu_cgroup = CpuCgroup("dummy_extension_name", "dummy_cpu_path") + dummy_cpu_cgroup = CpuMetrics("dummy_extension_name", "dummy_cpu_path") CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) self.assertEqual(1, len(CGroupsTelemetry._tracked)) diff --git a/tests/ga/test_collect_logs.py b/tests/ga/test_collect_logs.py index 4ac3f03fb4..2b8c4f412e 100644 --- a/tests/ga/test_collect_logs.py +++ b/tests/ga/test_collect_logs.py @@ -18,7 +18,7 @@ import os from azurelinuxagent.common import logger, conf -from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.logger import Logger from azurelinuxagent.common.protocol.util import ProtocolUtil @@ -197,8 +197,8 @@ def run_and_wait(): monitor_log_collector.join() cgroups = [ - CpuCgroup("test", "dummy_cpu_path"), - MemoryCgroup("test", "dummy_memory_path") + CpuMetrics("test", "dummy_cpu_path"), + MemoryMetrics("test", "dummy_memory_path") ] monitor_log_collector = get_log_collector_monitor_handler(cgroups) monitor_log_collector.run_and_wait = run_and_wait diff --git a/tests/ga/test_cgroups.py b/tests/ga/test_controllermetrics.py similarity index 59% rename from tests/ga/test_cgroups.py rename to tests/ga/test_controllermetrics.py index 0ffcfed1bd..cdd31395f1 100644 --- a/tests/ga/test_cgroups.py +++ b/tests/ga/test_controllermetrics.py @@ -22,7 +22,7 @@ import random import shutil -from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricsCounter, CounterNotFound from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil @@ -36,35 +36,35 @@ def consume_cpu_time(): return waste -class TestCGroup(AgentTestCase): +class TestControllerMetrics(AgentTestCase): def test_is_active(self): - test_cgroup = CpuCgroup("test_extension", self.tmp_dir) - self.assertEqual(False, test_cgroup.is_active()) + test_metrics = CpuMetrics("test_extension", self.tmp_dir) + self.assertEqual(False, test_metrics.is_active()) with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks: tasks.write(str(1000).encode()) - self.assertEqual(True, test_cgroup.is_active()) + self.assertEqual(True, test_metrics.is_active()) @patch("azurelinuxagent.common.logger.periodic_warn") def test_is_active_file_not_present(self, patch_periodic_warn): - test_cgroup = CpuCgroup("test_extension", self.tmp_dir) - self.assertEqual(False, test_cgroup.is_active()) + test_metrics = CpuMetrics("test_extension", self.tmp_dir) + self.assertEqual(False, test_metrics.is_active()) - test_cgroup = MemoryCgroup("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist")) - self.assertEqual(False, test_cgroup.is_active()) + test_metrics = MemoryMetrics("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist")) + self.assertEqual(False, test_metrics.is_active()) self.assertEqual(0, patch_periodic_warn.call_count) @patch("azurelinuxagent.common.logger.periodic_warn") def test_is_active_incorrect_file(self, patch_periodic_warn): open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close() - test_cgroup = CpuCgroup("test_extension", os.path.join(self.tmp_dir, "tasks")) - self.assertEqual(False, test_cgroup.is_active()) + test_metrics = CpuMetrics("test_extension", os.path.join(self.tmp_dir, "tasks")) + self.assertEqual(False, test_metrics.is_active()) self.assertEqual(1, patch_periodic_warn.call_count) -class TestCpuCgroup(AgentTestCase): +class TestCpuMetrics(AgentTestCase): @classmethod def setUpClass(cls): AgentTestCase.setUpClass() @@ -96,147 +96,147 @@ def tearDownClass(cls): def setUp(self): AgentTestCase.setUp(self) - TestCpuCgroup.mock_read_file_map.clear() + TestCpuMetrics.mock_read_file_map.clear() def test_initialize_cpu_usage_should_set_current_cpu_usage(self): - cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") + metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") } - cgroup.initialize_cpu_usage() + metrics.initialize_cpu_usage() - self.assertEqual(cgroup._current_cgroup_cpu, 63763) - self.assertEqual(cgroup._current_system_cpu, 5496872) + self.assertEqual(metrics._current_cgroup_cpu, 63763) + self.assertEqual(metrics._current_system_cpu, 5496872) def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self): osutil = get_osutil() - cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") + metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") } - cgroup.initialize_cpu_usage() + metrics.initialize_cpu_usage() - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"), - os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1") + os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1") } - cpu_usage = cgroup.get_cpu_usage() + cpu_usage = metrics.get_cpu_usage() self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3)) - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"), - os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2") + os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2") } - cpu_usage = cgroup.get_cpu_usage() + cpu_usage = metrics.get_cpu_usage() self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3)) def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self): - cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") + metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") io_error_2 = IOError() io_error_2.errno = errno.ENOENT # "No such directory" - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(cgroup.path, "cpuacct.stat"): io_error_2 + os.path.join(metrics.path, "cpuacct.stat"): io_error_2 } - cgroup.initialize_cpu_usage() + metrics.initialize_cpu_usage() - self.assertEqual(cgroup._current_cgroup_cpu, 0) - self.assertEqual(cgroup._current_system_cpu, 5496872) # check the system usage just for test sanity + self.assertEqual(metrics._current_cgroup_cpu, 0) + self.assertEqual(metrics._current_system_cpu, 5496872) # check the system usage just for test sanity def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self): - cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") + metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - TestCpuCgroup.mock_read_file_map = { + TestCpuMetrics.mock_read_file_map = { "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") } - cgroup.initialize_cpu_usage() + metrics.initialize_cpu_usage() with self.assertRaises(CGroupsException): - cgroup.initialize_cpu_usage() + metrics.initialize_cpu_usage() def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self): - cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") + metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") with self.assertRaises(CGroupsException): - cpu_usage = cgroup.get_cpu_usage() # pylint: disable=unused-variable + cpu_usage = metrics.get_cpu_usage() # pylint: disable=unused-variable def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self): test_file = os.path.join(self.tmp_dir, "cpu.stat") shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50 - cgroup = CpuCgroup("test", self.tmp_dir) - cgroup.initialize_cpu_usage() + metrics = CpuMetrics("test", self.tmp_dir) + metrics.initialize_cpu_usage() shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327 - throttled_time = cgroup.get_cpu_throttled_time() + throttled_time = metrics.get_cpu_throttled_time() self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect") def test_get_tracked_metrics_should_return_the_throttled_time(self): - cgroup = CpuCgroup("test", os.path.join(data_dir, "cgroups")) - cgroup.initialize_cpu_usage() + metrics = CpuMetrics("test", os.path.join(data_dir, "cgroups")) + metrics.initialize_cpu_usage() def find_throttled_time(metrics): return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME] - found = find_throttled_time(cgroup.get_tracked_metrics()) + found = find_throttled_time(metrics.get_tracked_metrics()) self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found)) - found = find_throttled_time(cgroup.get_tracked_metrics(track_throttled_time=True)) + found = find_throttled_time(metrics.get_tracked_metrics(track_throttled_time=True)) self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found)) -class TestMemoryCgroup(AgentTestCase): +class TestMemoryMetrics(AgentTestCase): def test_get_metrics(self): - test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount")) + test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "memory_mount")) - memory_usage = test_mem_cg.get_memory_usage() + memory_usage = test_mem_metrics.get_memory_usage() self.assertEqual(150000, memory_usage) - max_memory_usage = test_mem_cg.get_max_memory_usage() + max_memory_usage = test_mem_metrics.get_max_memory_usage() self.assertEqual(1000000, max_memory_usage) - swap_memory_usage = test_mem_cg.try_swap_memory_usage() + swap_memory_usage = test_mem_metrics.try_swap_memory_usage() self.assertEqual(20000, swap_memory_usage) def test_get_metrics_when_files_not_present(self): - test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups")) + test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups")) with self.assertRaises(IOError) as e: - test_mem_cg.get_memory_usage() + test_mem_metrics.get_memory_usage() self.assertEqual(e.exception.errno, errno.ENOENT) with self.assertRaises(IOError) as e: - test_mem_cg.get_max_memory_usage() + test_mem_metrics.get_max_memory_usage() self.assertEqual(e.exception.errno, errno.ENOENT) with self.assertRaises(IOError) as e: - test_mem_cg.try_swap_memory_usage() + test_mem_metrics.try_swap_memory_usage() self.assertEqual(e.exception.errno, errno.ENOENT) def test_get_memory_usage_counters_not_found(self): - test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters")) + test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters")) with self.assertRaises(CounterNotFound): - test_mem_cg.get_memory_usage() + test_mem_metrics.get_memory_usage() - swap_memory_usage = test_mem_cg.try_swap_memory_usage() + swap_memory_usage = test_mem_metrics.try_swap_memory_usage() self.assertEqual(0, swap_memory_usage) diff --git a/tests/ga/test_monitor.py b/tests/ga/test_monitor.py index 1dbec27c39..420645fe0e 100644 --- a/tests/ga/test_monitor.py +++ b/tests/ga/test_monitor.py @@ -21,7 +21,7 @@ import string from azurelinuxagent.common import event, logger -from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR +from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue, _REPORT_EVERY_HOUR from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import EVENTS_DIRECTORY from azurelinuxagent.common.protocol.healthservice import HealthService @@ -222,7 +222,7 @@ def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_poll_all_ self.assertEqual(0, patch_add_metric.call_count) @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") + @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument patch_get_memory_usage, @@ -231,14 +231,14 @@ def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errn ioerror.errno = 2 patch_get_memory_usage.side_effect = ioerror - CGroupsTelemetry._tracked["/test/path"] = MemoryCgroup("cgroup_name", "/test/path") + CGroupsTelemetry._tracked["/test/path"] = MemoryMetrics("_cgroup_name", "/test/path") PollResourceUsage().run() self.assertEqual(0, patch_periodic_warn.call_count) self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument patch_cpu_usage, patch_add_metric, @@ -247,7 +247,7 @@ def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2( ioerror.errno = 2 patch_cpu_usage.side_effect = ioerror - CGroupsTelemetry._tracked["/test/path"] = CpuCgroup("cgroup_name", "/test/path") + CGroupsTelemetry._tracked["/test/path"] = CpuMetrics("_cgroup_name", "/test/path") PollResourceUsage().run() self.assertEqual(0, patch_periodic_warn.call_count) diff --git a/tests/lib/mock_cgroup_environment.py b/tests/lib/mock_cgroup_environment.py index d9f79cb6a1..a8f5fa9a3a 100644 --- a/tests/lib/mock_cgroup_environment.py +++ b/tests/lib/mock_cgroup_environment.py @@ -122,7 +122,9 @@ _MOCKED_FILES_V1 = [ ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')), - (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')) + (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')), + (r"/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')), + (r"/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')) ] _MOCKED_FILES_V2 = [ @@ -130,7 +132,8 @@ (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')), ("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), ("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), - ("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')) + ("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')), + (r"/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')) ] _MOCKED_FILES_HYBRID = [ diff --git a/tests/test_agent.py b/tests/test_agent.py index 4b643ca36f..df1a7ca131 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -24,8 +24,9 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException +from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1 from azurelinuxagent.ga.collect_logs import CollectLogsHandler +from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment from tests.lib.tools import AgentTestCase, data_dir, Mock, patch @@ -247,16 +248,24 @@ def test_calls_collect_logs_on_valid_cgroups_v1(self, mock_log_collector): CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() - # Mock cgroup paths so process is in the log collector slice - def mock_cgroup_paths(*args, **kwargs): - if args and args[0] == "self": - relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) - return (relative_path, relative_path) - return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) + # Mock cgroup so process is in the log collector slice + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) + return CgroupV1( + cgroup_name=AGENT_LOG_COLLECTOR, + controller_mountpoints={ + 'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct", + 'memory':"/sys/fs/cgroup/memory" + }, + controller_paths={ + 'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path), + 'memory':"/sys/fs/cgroup/memory/{0}".format(relative_path) + } + ) with mock_cgroup_v1_environment(self.tmp_dir): - with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", - side_effect=mock_cgroup_paths): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", + side_effect=mock_cgroup): agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent.collect_logs(is_full_mode=True) @@ -296,17 +305,26 @@ def test_doesnt_call_collect_logs_on_invalid_cgroups_v1(self, mock_log_collector CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() - # Mock cgroup paths so process is in incorrect slice - def mock_cgroup_paths(*args, **kwargs): - if args and args[0] == "self": - return ("NOT_THE_CORRECT_PATH", "NOT_THE_CORRECT_PATH") - return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) + # Mock cgroup so process is in incorrect slice + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "NOT_THE_CORRECT_PATH" + return CgroupV1( + cgroup_name=AGENT_LOG_COLLECTOR, + controller_mountpoints={ + 'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct", + 'memory': "/sys/fs/cgroup/memory" + }, + controller_paths={ + 'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path), + 'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path) + } + ) def raise_on_sys_exit(*args): raise RuntimeError(args[0] if args else "Exiting") with mock_cgroup_v1_environment(self.tmp_dir): - with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", side_effect=mock_cgroup_paths): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", side_effect=mock_cgroup): agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: @@ -346,19 +364,25 @@ def test_doesnt_call_collect_logs_if_either_controller_not_mounted(self, mock_lo CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.run = Mock() - # Mock cgroup paths so process is in the log collector slice and cpu is not mounted - def mock_cgroup_paths(*args, **kwargs): - if args and args[0] == "self": - relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) - return (None, relative_path) - return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) + # Mock cgroup so process is in the log collector slice and cpu is not mounted + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) + return CgroupV1( + cgroup_name=AGENT_LOG_COLLECTOR, + controller_mountpoints={ + 'memory': "/sys/fs/cgroup/memory" + }, + controller_paths={ + 'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path) + } + ) def raise_on_sys_exit(*args): raise RuntimeError(args[0] if args else "Exiting") with mock_cgroup_v1_environment(self.tmp_dir): - with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", - side_effect=mock_cgroup_paths): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", + side_effect=mock_cgroup): agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index 1fe21c329a..c3bb468b02 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -7,7 +7,7 @@ from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION -from azurelinuxagent.ga.cgroupapi import get_cgroup_api +from azurelinuxagent.ga.cgroupapi import get_cgroup_api, SystemdCgroupApiv1 from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false @@ -164,9 +164,14 @@ def check_log_message(message, after_timestamp=datetime.datetime.min): return False -def get_unit_cgroup_paths(unit_name): +def get_unit_cgroup_proc_path(unit_name, controller): """ - Returns the cgroup paths for the given unit + Returns the cgroup.procs path for the given unit and controller. """ cgroups_api = get_cgroup_api() - return cgroups_api.get_unit_cgroup_paths(unit_name) + unit_cgroup = cgroups_api.get_unit_cgroup(unit_name=unit_name, cgroup_name="test cgroup") + if isinstance(cgroups_api, SystemdCgroupApiv1): + return unit_cgroup.get_controller_procs_path(controller=controller) + else: + return unit_cgroup.get_procs_path() + diff --git a/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py b/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py index d1b3014a03..fff5746cce 100755 --- a/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py +++ b/tests_e2e/tests/scripts/agent_cgroups_process_check-unknown_process_check.py @@ -18,14 +18,13 @@ # This script forces the process check by putting unknown process in the agent's cgroup -import os import subprocess import datetime from assertpy import fail from azurelinuxagent.common.utils import shellutil -from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_paths, AGENT_SERVICE_NAME +from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.retry import retry_if_false @@ -62,8 +61,8 @@ def disable_agent_cgroups_with_unknown_process(pid): Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process """ - def unknown_process_found(cpu_cgroup): - cgroup_procs_path = os.path.join(cpu_cgroup, "cgroup.procs") + def unknown_process_found(): + cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct') log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path) try: with open(cgroup_procs_path, 'a') as f: @@ -81,9 +80,7 @@ def unknown_process_found(cpu_cgroup): pid)), attempts=3) return found and retry_if_false(check_agent_quota_disabled, attempts=3) - cpu_cgroup, _ = get_unit_cgroup_paths(AGENT_SERVICE_NAME) - - found: bool = retry_if_false(lambda: unknown_process_found(cpu_cgroup), attempts=3) + found: bool = retry_if_false(unknown_process_found, attempts=3) if not found: fail("The agent did not detect unknown process: {0}".format(pid)) From e871f222102d32e221daf1e4345a66c103a190bb Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 17 Jun 2024 16:48:44 -0700 Subject: [PATCH 211/240] updated PR template (#3144) --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index fdcc07c954..9ac83e6c63 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,6 +12,7 @@ This will expedite the process of getting your pull request merged and avoid ext --- ### PR information +- [ ] Ensure development PR is based on the `develop` branch. - [ ] The title of the PR is clear and informative. - [ ] There are a small number of commits, each of which has an informative message. This means that previously merged commits do not appear in the history of the PR. For information on cleaning up the commits in your pull request, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). - [ ] If applicable, the PR references the bug/issue that it fixes in the description. From bf45d50a1deeb48e3eeeeb0d6af6daa8afba12f7 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:15:19 -0700 Subject: [PATCH 212/240] fixing custom image test run (#3147) --- tests_e2e/orchestrator/docker/Dockerfile | 2 +- tests_e2e/test_suites/agent_wait_for_cloud_init.yml | 2 +- .../agent_wait_for_cloud_init/add_cloud_init_script.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests_e2e/orchestrator/docker/Dockerfile b/tests_e2e/orchestrator/docker/Dockerfile index 219c9b8694..f71d6c02e1 100644 --- a/tests_e2e/orchestrator/docker/Dockerfile +++ b/tests_e2e/orchestrator/docker/Dockerfile @@ -67,7 +67,7 @@ RUN \ cd $HOME && \ git clone https://github.com/microsoft/lisa.git && \ cd lisa && \ - git checkout 95c09ff7d5b6e71d1642a628607ac9bb441c69f5 && \ + git checkout 0e37ed07304b74362cfb3d3c55ac932d3bdc660c && \ \ python3 -m pip install --upgrade pip && \ python3 -m pip install --editable .[azure,libvirt] --config-settings editable_mode=compat && \ diff --git a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml index 727803811e..09c00aa7ee 100644 --- a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml +++ b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml @@ -2,7 +2,7 @@ # This test verifies that the Agent waits for cloud-init to complete before it starts processing extensions. # # NOTE: This test is not fully automated. It requires a custom image where the test Agent has been installed and Extensions.WaitForCloudInit is enabled in waagent.conf. -# To execute it manually, create a custom image and use the 'image' runbook parameter, for example: "-v: image:gallery/wait-cloud-init/1.0.1". +# To execute it manually, create a custom image and use the 'image' runbook parameter, for example: "-v: image:gallery/wait-cloud-init/1.0.2". # name: "AgentWaitForCloudInit" tests: diff --git a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py index 14f2cdecac..0c1a6611b9 100755 --- a/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py +++ b/tests_e2e/tests/agent_wait_for_cloud_init/add_cloud_init_script.py @@ -35,10 +35,10 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # # cloud-init configuration needs to be added in the osProfile.customData property as a base64-encoded string. # - # LISA uses the getOSProfile function to generate the value for osProfile; add customData to its output, checking that we do not + # LISA uses the generateOsProfile function to generate the value for osProfile; add customData to its output, checking that we do not # override any existing value (the current LISA template does not have any). # - # "getOSProfile": { + # "generateOsProfile": { # "parameters": [ # ... # ], @@ -55,7 +55,7 @@ def update(self, template: Dict[str, Any], is_lisa_template: bool) -> None: # encoded_script = base64.b64encode(AgentWaitForCloudInit.CloudInitScript.encode('utf-8')).decode('utf-8') - get_os_profile = self.get_lisa_function(template, 'getOsProfile') + get_os_profile = self.get_lisa_function(template, 'generateOsProfile') output = self.get_function_output(get_os_profile) if output.get('customData') is not None: raise Exception(f"The getOSProfile function already has a 'customData'. Won't override it. Definition: {get_os_profile}") From 5b98f1d2e52230a6f8a9e21f8c3f67d4adc73e4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miriam=20Espa=C3=B1a=20Acebal?= Date: Thu, 20 Jun 2024 02:03:50 +0200 Subject: [PATCH 213/240] Avoiding mocked exception from being lost on test (#3149) If another exception arises (that's the case here when python 3.12 is used due to the changes in os.shutil.rmtree), the mocked exception is lost because it is incomplete (neither errno nor strerror are set: it goes to args). --- tests/ga/test_exthandlers_exthandlerinstance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ga/test_exthandlers_exthandlerinstance.py b/tests/ga/test_exthandlers_exthandlerinstance.py index 846bb89e92..5b98c9f41c 100644 --- a/tests/ga/test_exthandlers_exthandlerinstance.py +++ b/tests/ga/test_exthandlers_exthandlerinstance.py @@ -117,7 +117,7 @@ def test_rm_ext_handler_dir_should_report_an_event_if_an_error_occurs_while_dele def mock_remove(path, dir_fd=None): # pylint: disable=unused-argument if path.endswith("extension_file2"): - raise IOError("A mocked error") + raise IOError(999,"A mocked error","extension_file2") original_remove_api(path) with patch.object(shutil.os, remove_api_name, mock_remove): From 4dd6946eaab914c0a66e1426b3039ba67dc169b5 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 25 Jun 2024 16:52:57 -0700 Subject: [PATCH 214/240] Add more useful logging for agent unit properties (#3154) --- azurelinuxagent/ga/cgroupconfigurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index a36b9dae10..f18c23e3c5 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -206,7 +206,7 @@ def initialize(self): for metric in metrics: for prop in metric.get_unit_properties(): - log_cgroup_info('{0}: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop))) + log_cgroup_info('Agent {0} unit property value: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop))) if isinstance(metric, CpuMetrics): self.__set_cpu_quota(conf.get_agent_cpu_quota()) elif isinstance(metric, MemoryMetrics): From 8f6f7fb6b8381b2716f340dc986920dcf68eb0ca Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jun 2024 13:53:52 -0700 Subject: [PATCH 215/240] Remove wireserver fallback for imds calls (#3152) * Remove wireserver fallback for imds calls * remove unused method * remove obsolete unit test * remove unused import --------- Co-authored-by: narrieta@microsoft --- azurelinuxagent/common/event.py | 2 +- azurelinuxagent/common/protocol/imds.py | 11 +- azurelinuxagent/ga/monitor.py | 6 +- azurelinuxagent/ga/update.py | 20 --- tests/common/protocol/test_imds.py | 155 +++++++++++------------- tests/ga/test_update.py | 6 +- tests/lib/tools.py | 16 --- 7 files changed, 82 insertions(+), 134 deletions(-) diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 830dd6fc9a..7e2b10c991 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -429,7 +429,7 @@ def initialize_vminfo_common_parameters(self, protocol): logger.warn("Failed to get VM info from goal state; will be missing from telemetry: {0}", ustr(e)) try: - imds_client = get_imds_client(protocol.get_endpoint()) + imds_client = get_imds_client() imds_info = imds_client.get_compute() parameters[CommonTelemetryEventSchema.Location].value = imds_info.location parameters[CommonTelemetryEventSchema.SubscriptionId].value = imds_info.subscriptionId diff --git a/azurelinuxagent/common/protocol/imds.py b/azurelinuxagent/common/protocol/imds.py index 5b9e206a13..fba88e0eee 100644 --- a/azurelinuxagent/common/protocol/imds.py +++ b/azurelinuxagent/common/protocol/imds.py @@ -27,8 +27,8 @@ IMDS_INTERNAL_SERVER_ERROR = 3 -def get_imds_client(wireserver_endpoint): - return ImdsClient(wireserver_endpoint) +def get_imds_client(): + return ImdsClient() # A *slightly* future proof list of endorsed distros. @@ -256,7 +256,7 @@ def image_origin(self): class ImdsClient(object): - def __init__(self, wireserver_endpoint, version=APIVERSION): + def __init__(self, version=APIVERSION): self._api_version = version self._headers = { 'User-Agent': restutil.HTTP_USER_AGENT, @@ -268,7 +268,6 @@ def __init__(self, wireserver_endpoint, version=APIVERSION): } self._regex_ioerror = re.compile(r".*HTTP Failed. GET http://[^ ]+ -- IOError .*") self._regex_throttled = re.compile(r".*HTTP Retry. GET http://[^ ]+ -- Status Code 429 .*") - self._wireserver_endpoint = wireserver_endpoint def _get_metadata_url(self, endpoint, resource_path): return BASE_METADATA_URI.format(endpoint, resource_path, self._api_version) @@ -326,14 +325,12 @@ def get_metadata(self, resource_path, is_health): endpoint = IMDS_ENDPOINT status, resp = self._get_metadata_from_endpoint(endpoint, resource_path, headers) - if status == IMDS_CONNECTION_ERROR: - endpoint = self._wireserver_endpoint - status, resp = self._get_metadata_from_endpoint(endpoint, resource_path, headers) if status == IMDS_RESPONSE_SUCCESS: return MetadataResult(True, False, resp) elif status == IMDS_INTERNAL_SERVER_ERROR: return MetadataResult(False, True, resp) + # else it's a client-side error, e.g. IMDS_CONNECTION_ERROR return MetadataResult(False, False, resp) def get_compute(self): diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index f34192be72..bdf2603fa8 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -216,10 +216,10 @@ class SendImdsHeartbeat(PeriodicOperation): Periodic operation to report the IDMS's health. The signal is 'Healthy' when we have successfully called and validated a response in the last _IMDS_HEALTH_PERIOD. """ - def __init__(self, protocol_util, health_service): + def __init__(self, health_service): super(SendImdsHeartbeat, self).__init__(SendImdsHeartbeat._IMDS_HEARTBEAT_PERIOD) self.health_service = health_service - self.imds_client = get_imds_client(protocol_util.get_wireserver_endpoint()) + self.imds_client = get_imds_client() self.imds_error_state = ErrorState(min_timedelta=SendImdsHeartbeat._IMDS_HEALTH_PERIOD) _IMDS_HEARTBEAT_PERIOD = datetime.timedelta(minutes=1) @@ -298,7 +298,7 @@ def daemon(self): PollResourceUsage(), PollSystemWideResourceUsage(), SendHostPluginHeartbeat(protocol, health_service), - SendImdsHeartbeat(protocol_util, health_service) + SendImdsHeartbeat(health_service) ] report_network_configuration_changes = ReportNetworkConfigurationChanges() diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 845f096866..2c2b3c263e 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -31,7 +31,6 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger -from azurelinuxagent.common.protocol.imds import get_imds_client from azurelinuxagent.common.utils import fileutil, textutil from azurelinuxagent.common.agent_supported_feature import get_supported_feature_by_name, SupportedFeatureNames, \ get_agent_supported_features_list_for_crp @@ -475,25 +474,6 @@ def _wait_for_cloud_init(self): add_event(op=WALAEventOperation.CloudInit, message=message, is_success=False, log_event=False) self._cloud_init_completed = True # Mark as completed even on error since we will proceed to execute extensions - def _get_vm_size(self, protocol): - """ - Including VMSize is meant to capture the architecture of the VM (i.e. arm64 VMs will - have arm64 included in their vmsize field and amd64 will have no architecture indicated). - """ - if self._vm_size is None: - - imds_client = get_imds_client(protocol.get_endpoint()) - - try: - imds_info = imds_client.get_compute() - self._vm_size = imds_info.vmSize - except Exception as e: - err_msg = "Attempts to retrieve VM size information from IMDS are failing: {0}".format(textutil.format_exception(e)) - logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(err_msg)) - return "unknown" - - return self._vm_size - def _get_vm_arch(self): return platform.machine() diff --git a/tests/common/protocol/test_imds.py b/tests/common/protocol/test_imds.py index efc705ffab..9333a5f9a3 100644 --- a/tests/common/protocol/test_imds.py +++ b/tests/common/protocol/test_imds.py @@ -56,7 +56,7 @@ class TestImds(AgentTestCase): def test_get(self, mock_http_get): mock_http_get.return_value = get_mock_compute_response() - test_subject = imds.ImdsClient(restutil.KNOWN_WIRESERVER_IP) + test_subject = imds.ImdsClient() test_subject.get_compute() self.assertEqual(1, mock_http_get.call_count) @@ -71,21 +71,21 @@ def test_get(self, mock_http_get): def test_get_bad_request(self, mock_http_get): mock_http_get.return_value = MockHttpResponse(status=restutil.httpclient.BAD_REQUEST) - test_subject = imds.ImdsClient(restutil.KNOWN_WIRESERVER_IP) + test_subject = imds.ImdsClient() self.assertRaises(HttpError, test_subject.get_compute) @patch("azurelinuxagent.common.protocol.imds.restutil.http_get") def test_get_internal_service_error(self, mock_http_get): mock_http_get.return_value = MockHttpResponse(status=restutil.httpclient.INTERNAL_SERVER_ERROR) - test_subject = imds.ImdsClient(restutil.KNOWN_WIRESERVER_IP) + test_subject = imds.ImdsClient() self.assertRaises(HttpError, test_subject.get_compute) @patch("azurelinuxagent.common.protocol.imds.restutil.http_get") def test_get_empty_response(self, mock_http_get): mock_http_get.return_value = MockHttpResponse(status=httpclient.OK, body=''.encode('utf-8')) - test_subject = imds.ImdsClient(restutil.KNOWN_WIRESERVER_IP) + test_subject = imds.ImdsClient() self.assertRaises(ValueError, test_subject.get_compute) def test_deserialize_ComputeInfo(self): @@ -359,7 +359,7 @@ def _imds_response(f): return fh.read() def _assert_validation(self, http_status_code, http_response, expected_valid, expected_response): - test_subject = imds.ImdsClient(restutil.KNOWN_WIRESERVER_IP) + test_subject = imds.ImdsClient() with patch("azurelinuxagent.common.utils.restutil.http_get") as mock_http_get: mock_http_get.return_value = MockHttpResponse(status=http_status_code, reason='reason', @@ -386,99 +386,86 @@ def test_endpoint_fallback(self): # http GET calls and enforces a single GET call (fallback would cause 2) and # checks the url called. - test_subject = imds.ImdsClient("foo.bar") + test_subject = imds.ImdsClient() # ensure user-agent gets set correctly for is_health, expected_useragent in [(False, restutil.HTTP_USER_AGENT), (True, restutil.HTTP_USER_AGENT_HEALTH)]: # set a different resource path for health query to make debugging unit test easier resource_path = 'something/health' if is_health else 'something' - for has_primary_ioerror in (False, True): - # secondary endpoint unreachable - test_subject._http_get = Mock(side_effect=self._mock_http_get) - self._mock_imds_setup(primary_ioerror=has_primary_ioerror, secondary_ioerror=True) - result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) - self.assertFalse(result.success) if has_primary_ioerror else self.assertTrue(result.success) # pylint: disable=expression-not-assigned - self.assertFalse(result.service_error) - if has_primary_ioerror: - self.assertEqual('IMDS error in /metadata/{0}: Unable to connect to endpoint'.format(resource_path), result.response) - else: - self.assertEqual('Mock success response', result.response) - for _, kwargs in test_subject._http_get.call_args_list: - self.assertTrue('User-Agent' in kwargs['headers']) - self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) - self.assertEqual(2 if has_primary_ioerror else 1, test_subject._http_get.call_count) - - # IMDS success - test_subject._http_get = Mock(side_effect=self._mock_http_get) - self._mock_imds_setup(primary_ioerror=has_primary_ioerror) - result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) - self.assertTrue(result.success) - self.assertFalse(result.service_error) - self.assertEqual('Mock success response', result.response) - for _, kwargs in test_subject._http_get.call_args_list: - self.assertTrue('User-Agent' in kwargs['headers']) - self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) - self.assertEqual(2 if has_primary_ioerror else 1, test_subject._http_get.call_count) - - # IMDS throttled - test_subject._http_get = Mock(side_effect=self._mock_http_get) - self._mock_imds_setup(primary_ioerror=has_primary_ioerror, throttled=True) - result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) - self.assertFalse(result.success) - self.assertFalse(result.service_error) - self.assertEqual('IMDS error in /metadata/{0}: Throttled'.format(resource_path), result.response) - for _, kwargs in test_subject._http_get.call_args_list: - self.assertTrue('User-Agent' in kwargs['headers']) - self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) - self.assertEqual(2 if has_primary_ioerror else 1, test_subject._http_get.call_count) - - # IMDS gone error - test_subject._http_get = Mock(side_effect=self._mock_http_get) - self._mock_imds_setup(primary_ioerror=has_primary_ioerror, gone_error=True) - result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) - self.assertFalse(result.success) - self.assertTrue(result.service_error) - self.assertEqual('IMDS error in /metadata/{0}: HTTP Failed with Status Code 410: Gone'.format(resource_path), result.response) - for _, kwargs in test_subject._http_get.call_args_list: - self.assertTrue('User-Agent' in kwargs['headers']) - self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) - self.assertEqual(2 if has_primary_ioerror else 1, test_subject._http_get.call_count) - - # IMDS bad request - test_subject._http_get = Mock(side_effect=self._mock_http_get) - self._mock_imds_setup(primary_ioerror=has_primary_ioerror, bad_request=True) - result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) - self.assertFalse(result.success) - self.assertFalse(result.service_error) - self.assertEqual('IMDS error in /metadata/{0}: [HTTP Failed] [404: reason] Mock not found'.format(resource_path), result.response) - for _, kwargs in test_subject._http_get.call_args_list: - self.assertTrue('User-Agent' in kwargs['headers']) - self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) - self.assertEqual(2 if has_primary_ioerror else 1, test_subject._http_get.call_count) - - def _mock_imds_setup(self, primary_ioerror=False, secondary_ioerror=False, gone_error=False, throttled=False, bad_request=False): - self._mock_imds_expect_fallback = primary_ioerror # pylint: disable=attribute-defined-outside-init - self._mock_imds_primary_ioerror = primary_ioerror # pylint: disable=attribute-defined-outside-init - self._mock_imds_secondary_ioerror = secondary_ioerror # pylint: disable=attribute-defined-outside-init + # IMDS success + test_subject._http_get = Mock(side_effect=self._mock_http_get) + self._mock_imds_setup() + result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) + self.assertTrue(result.success) + self.assertFalse(result.service_error) + self.assertEqual('Mock success response', result.response) + for _, kwargs in test_subject._http_get.call_args_list: + self.assertTrue('User-Agent' in kwargs['headers']) + self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) + self.assertEqual(1, test_subject._http_get.call_count) + + # Connection error + test_subject._http_get = Mock(side_effect=self._mock_http_get) + self._mock_imds_setup(ioerror=True) + result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) + self.assertFalse(result.success) + self.assertFalse(result.service_error) + self.assertEqual('IMDS error in /metadata/{0}: Unable to connect to endpoint'.format(resource_path), result.response) + for _, kwargs in test_subject._http_get.call_args_list: + self.assertTrue('User-Agent' in kwargs['headers']) + self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) + self.assertEqual(1, test_subject._http_get.call_count) + + # IMDS throttled + test_subject._http_get = Mock(side_effect=self._mock_http_get) + self._mock_imds_setup(throttled=True) + result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) + self.assertFalse(result.success) + self.assertFalse(result.service_error) + self.assertEqual('IMDS error in /metadata/{0}: Throttled'.format(resource_path), result.response) + for _, kwargs in test_subject._http_get.call_args_list: + self.assertTrue('User-Agent' in kwargs['headers']) + self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) + self.assertEqual(1, test_subject._http_get.call_count) + + # IMDS gone error + test_subject._http_get = Mock(side_effect=self._mock_http_get) + self._mock_imds_setup(gone_error=True) + result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) + self.assertFalse(result.success) + self.assertTrue(result.service_error) + self.assertEqual('IMDS error in /metadata/{0}: HTTP Failed with Status Code 410: Gone'.format(resource_path), result.response) + for _, kwargs in test_subject._http_get.call_args_list: + self.assertTrue('User-Agent' in kwargs['headers']) + self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) + self.assertEqual(1, test_subject._http_get.call_count) + + # IMDS bad request + test_subject._http_get = Mock(side_effect=self._mock_http_get) + self._mock_imds_setup(bad_request=True) + result = test_subject.get_metadata(resource_path=resource_path, is_health=is_health) + self.assertFalse(result.success) + self.assertFalse(result.service_error) + self.assertEqual('IMDS error in /metadata/{0}: [HTTP Failed] [404: reason] Mock not found'.format(resource_path), result.response) + for _, kwargs in test_subject._http_get.call_args_list: + self.assertTrue('User-Agent' in kwargs['headers']) + self.assertEqual(expected_useragent, kwargs['headers']['User-Agent']) + self.assertEqual(1, test_subject._http_get.call_count) + + def _mock_imds_setup(self, ioerror=False, gone_error=False, throttled=False, bad_request=False): + self._mock_imds_ioerror = ioerror # pylint: disable=attribute-defined-outside-init self._mock_imds_gone_error = gone_error # pylint: disable=attribute-defined-outside-init self._mock_imds_throttled = throttled # pylint: disable=attribute-defined-outside-init self._mock_imds_bad_request = bad_request # pylint: disable=attribute-defined-outside-init def _mock_http_get(self, *_, **kwargs): - if "foo.bar" == kwargs['endpoint'] and not self._mock_imds_expect_fallback: - raise Exception("Unexpected endpoint called") - if self._mock_imds_primary_ioerror and "169.254.169.254" == kwargs['endpoint']: - raise HttpError("[HTTP Failed] GET http://{0}/metadata/{1} -- IOError timed out -- 6 attempts made" - .format(kwargs['endpoint'], kwargs['resource_path'])) - if self._mock_imds_secondary_ioerror and "foo.bar" == kwargs['endpoint']: - raise HttpError("[HTTP Failed] GET http://{0}/metadata/{1} -- IOError timed out -- 6 attempts made" - .format(kwargs['endpoint'], kwargs['resource_path'])) + if self._mock_imds_ioerror: + raise HttpError("[HTTP Failed] GET http://{0}/metadata/{1} -- IOError timed out -- 6 attempts made".format(kwargs['endpoint'], kwargs['resource_path'])) if self._mock_imds_gone_error: raise ResourceGoneError("Resource is gone") if self._mock_imds_throttled: - raise HttpError("[HTTP Retry] GET http://{0}/metadata/{1} -- Status Code 429 -- 25 attempts made" - .format(kwargs['endpoint'], kwargs['resource_path'])) + raise HttpError("[HTTP Retry] GET http://{0}/metadata/{1} -- Status Code 429 -- 25 attempts made".format(kwargs['endpoint'], kwargs['resource_path'])) resp = MagicMock() resp.reason = 'reason' diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index f06e64a902..44a6d73243 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -52,7 +52,7 @@ from tests.lib.mock_update_handler import mock_update_handler from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse from tests.lib.wire_protocol_data import DATA_FILE, DATA_FILE_MULTIPLE_EXT, DATA_FILE_VM_SETTINGS -from tests.lib.tools import AgentTestCase, AgentTestCaseWithGetVmSizeMock, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ +from tests.lib.tools import AgentTestCase, data_dir, DEFAULT, patch, load_bin_data, Mock, MagicMock, \ clear_singleton_instances, is_python_version_26_or_34, skip_if_predicate_true from tests.lib import wire_protocol_data from tests.lib.http_request_predicates import HttpRequestPredicates @@ -119,7 +119,7 @@ def _get_update_handler(iterations=1, test_data=None, protocol=None, autoupdate_ yield update_handler, protocol -class UpdateTestCase(AgentTestCaseWithGetVmSizeMock): +class UpdateTestCase(AgentTestCase): _test_suite_tmp_dir = None _agent_zip_dir = None @@ -1928,7 +1928,7 @@ def reload_conf(url, protocol): @patch('azurelinuxagent.ga.update.get_collect_logs_handler') @patch('azurelinuxagent.ga.update.get_monitor_handler') @patch('azurelinuxagent.ga.update.get_env_handler') -class MonitorThreadTest(AgentTestCaseWithGetVmSizeMock): +class MonitorThreadTest(AgentTestCase): def setUp(self): super(MonitorThreadTest, self).setUp() self.event_patch = patch('azurelinuxagent.common.event.add_event') diff --git a/tests/lib/tools.py b/tests/lib/tools.py index dd0d961724..5ad4b97f85 100644 --- a/tests/lib/tools.py +++ b/tests/lib/tools.py @@ -447,22 +447,6 @@ def create_script(script_file, contents): os.chmod(script_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) -class AgentTestCaseWithGetVmSizeMock(AgentTestCase): - - def setUp(self): - - self._get_vm_size_patch = patch('azurelinuxagent.ga.update.UpdateHandler._get_vm_size', return_value="unknown") - self._get_vm_size_patch.start() - - super(AgentTestCaseWithGetVmSizeMock, self).setUp() - - def tearDown(self): - - if self._get_vm_size_patch: - self._get_vm_size_patch.stop() - - super(AgentTestCaseWithGetVmSizeMock, self).tearDown() - def load_data(name): """Load test data""" path = os.path.join(data_dir, name) From b4291a3e17649fa877309b80d718bbd75e5ea269 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jun 2024 14:39:20 -0700 Subject: [PATCH 216/240] Remove unused import (#3155) Co-authored-by: narrieta@microsoft --- tests/lib/tools.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/lib/tools.py b/tests/lib/tools.py index 5ad4b97f85..fc1f72150d 100644 --- a/tests/lib/tools.py +++ b/tests/lib/tools.py @@ -42,9 +42,6 @@ try: from unittest.mock import Mock, patch, MagicMock, ANY, DEFAULT, call, PropertyMock # pylint: disable=unused-import - - # Import mock module for Python2 and Python3 - from bin.waagent2 import Agent # pylint: disable=unused-import except ImportError: from mock import Mock, patch, MagicMock, ANY, DEFAULT, call, PropertyMock From 15aa419f15af4867844d965d82805f2de22d6b80 Mon Sep 17 00:00:00 2001 From: Anam Ahmad Date: Wed, 3 Jul 2024 10:18:47 -0700 Subject: [PATCH 217/240] Expand support for backend ethernet (#3150) IBManager will continue to be used for a new ethernet-backend offering from AzureHPC. While the key name remains the same (IPoIB_data), the interfaces will be of the format ethXX. Removing the check that skips anything that isn't ibXX. We are not at the risk of proceeding for any other nics since the IPoIB_data will only have the backend RDMA ones, and despite reading from the system for the loop, we match it against the array parsed from the IPoIB_data KVP. IB interfaces have padded virtual macs, non-IB interfaces won't. Adding if-else to only do the padded-octet check for IB. Everything else will use the standard 6-octet pattern. --- azurelinuxagent/pa/rdma/rdma.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/azurelinuxagent/pa/rdma/rdma.py b/azurelinuxagent/pa/rdma/rdma.py index edd6f2b555..a6e7c3fe69 100644 --- a/azurelinuxagent/pa/rdma/rdma.py +++ b/azurelinuxagent/pa/rdma/rdma.py @@ -368,10 +368,6 @@ def update_iboip_interfaces(self, mac_ip_array): count = 0 for nic in nics: - # look for IBoIP interface of format ibXXX - if not re.match(r"ib\w+", nic): - continue - mac_addr = None with open(os.path.join(net_dir, nic, "address")) as address_file: mac_addr = address_file.read() @@ -382,7 +378,11 @@ def update_iboip_interfaces(self, mac_ip_array): mac_addr = mac_addr.upper() - match = re.match(r".+(\w\w):(\w\w):(\w\w):\w\w:\w\w:(\w\w):(\w\w):(\w\w)\n", mac_addr) + # if this is an IB interface, match IB-specific regex + if re.match(r"ib\w+", nic): + match = re.match(r".+(\w\w):(\w\w):(\w\w):\w\w:\w\w:(\w\w):(\w\w):(\w\w)\n", mac_addr) + else: + match = re.match(r"^(\w\w):(\w\w):(\w\w):(\w\w):(\w\w):(\w\w)$", mac_addr) if not match: logger.error("RDMA: failed to parse address for device {0} address {1}".format(nic, mac_addr)) continue From feb6edb664dac5630b19edc95eaab03be8adb2e0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Tue, 16 Jul 2024 09:08:43 -0700 Subject: [PATCH 218/240] Allow use of node 16 (#3160) Co-authored-by: narrieta@microsoft --- .github/workflows/ci_pr.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index fd8d91a380..05d2744273 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -29,7 +29,8 @@ jobs: env: NOSEOPTS: "--verbose" - + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + steps: - uses: actions/checkout@v3 From a97c637ccc9495474497b92aa7f05f0aa312b95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miriam=20Espa=C3=B1a=20Acebal?= Date: Tue, 16 Jul 2024 18:16:26 +0200 Subject: [PATCH 219/240] Fix Ubuntu version codename for 24.04 (#3159) 24.04 is noble, not focal Co-authored-by: Norberto Arrieta --- tests/common/osutil/test_factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/osutil/test_factory.py b/tests/common/osutil/test_factory.py index 46bf6a8758..5bfb867d43 100644 --- a/tests/common/osutil/test_factory.py +++ b/tests/common/osutil/test_factory.py @@ -99,7 +99,7 @@ def test_get_osutil_it_should_return_ubuntu(self): self.assertEqual(ret.get_service_name(), "walinuxagent") ret = _get_osutil(distro_name="ubuntu", - distro_code_name="focal", + distro_code_name="noble", distro_version="24.04", distro_full_name="") self.assertTrue(isinstance(ret, Ubuntu18OSUtil)) From 23d4c5b1b8a4b82e7f3db8f78560721fb08c31c4 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:32:26 -0700 Subject: [PATCH 220/240] Fix regex pattern for ext seq scenario (#3162) --- tests_e2e/tests/ext_sequencing/ext_sequencing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/tests/ext_sequencing/ext_sequencing.py b/tests_e2e/tests/ext_sequencing/ext_sequencing.py index b2b3b9a705..7bd9c93dfa 100644 --- a/tests_e2e/tests/ext_sequencing/ext_sequencing.py +++ b/tests_e2e/tests/ext_sequencing/ext_sequencing.py @@ -226,7 +226,7 @@ def run(self): # fail. We know an extension should fail if "failing" is in the case name. Otherwise, report the # failure. deployment_failure_pattern = r"[\s\S]*\"details\": [\s\S]* \"code\": \"(?P.*)\"[\s\S]* \"message\": \"(?P.*)\"[\s\S]*" - msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM. Please see the VM extension instance view for other failures. The first extension failed due to the error: VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." + msg_pattern = r"Multiple VM extensions failed to be provisioned on the VM.*VM Extension '.*' is marked as failed since it depends upon the VM Extension 'CustomScript' which has failed." deployment_failure_match = re.match(deployment_failure_pattern, str(e)) if "failing" not in case.__name__: fail("Extension template deployment unexpectedly failed: {0}".format(e)) From 93ee7264c97252ec58494798492a3c4610507e0d Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Fri, 2 Aug 2024 11:34:45 -0700 Subject: [PATCH 221/240] Update test certificate data (#3166) Co-authored-by: narrieta@microsoft --- tests/common/protocol/test_goal_state.py | 4 +- tests/common/protocol/test_wire.py | 6 +- .../ext_conf-agent_family_version.xml | 4 +- .../ext_conf-rsm_version_properties_false.xml | 4 +- tests/data/hostgaplugin/ext_conf.xml | 4 +- .../vm_settings-agent_family_version.json | 4 +- ...tings-difference_in_required_features.json | 4 +- .../hostgaplugin/vm_settings-out-of-sync.json | 2 +- ...gs-requested_version_properties_false.json | 4 +- tests/data/hostgaplugin/vm_settings.json | 6 +- tests/data/wire/certs-2.xml | 158 ++++++++--------- tests/data/wire/certs.xml | 158 ++++++++--------- tests/data/wire/certs_no_format_specified.xml | 160 +++++++++--------- tests/data/wire/ext_conf-no_gs_metadata.xml | 2 +- tests/data/wire/ext_conf.xml | 2 +- .../wire/ext_conf_additional_locations.xml | 2 +- tests/data/wire/ext_conf_autoupgrade.xml | 2 +- .../ext_conf_autoupgrade_internalversion.xml | 2 +- ..._conf_dependencies_with_empty_settings.xml | 2 +- .../wire/ext_conf_in_vm_artifacts_profile.xml | 2 +- ...ext_conf_in_vm_empty_artifacts_profile.xml | 2 +- tests/data/wire/ext_conf_in_vm_metadata.xml | 2 +- tests/data/wire/ext_conf_internalversion.xml | 2 +- .../ext_conf_invalid_and_valid_handlers.xml | 6 +- .../wire/ext_conf_invalid_vm_metadata.xml | 2 +- .../wire/ext_conf_multiple_extensions.xml | 8 +- tests/data/wire/ext_conf_no_public.xml | 2 +- .../data/wire/ext_conf_required_features.xml | 2 +- tests/data/wire/ext_conf_rsm_version.xml | 2 +- tests/data/wire/ext_conf_sequencing.xml | 4 +- .../wire/ext_conf_settings_case_mismatch.xml | 10 +- tests/data/wire/ext_conf_upgradeguid.xml | 2 +- ...t_conf_version_missing_in_agent_family.xml | 2 +- .../ext_conf_version_missing_in_manifest.xml | 2 +- .../wire/ext_conf_version_not_from_rsm.xml | 2 +- ...t_conf_vm_not_enabled_for_rsm_upgrades.xml | 2 +- ...multiple_depends_on_for_single_handler.xml | 6 +- ..._multiple_runtime_settings_same_plugin.xml | 4 +- ...onf_multiple_settings_for_same_handler.xml | 4 +- ..._conf_plugin_settings_version_mismatch.xml | 4 +- ..._and_multi_config_settings_same_plugin.xml | 4 +- tests/data/wire/trans_cert | 34 ++-- tests/data/wire/trans_prv | 52 +++--- tests/data/wire/trans_pub | 14 +- 44 files changed, 353 insertions(+), 353 deletions(-) diff --git a/tests/common/protocol/test_goal_state.py b/tests/common/protocol/test_goal_state.py index 5b4a2948af..5a63586e5d 100644 --- a/tests/common/protocol/test_goal_state.py +++ b/tests/common/protocol/test_goal_state.py @@ -401,7 +401,7 @@ def test_it_should_download_certs_on_a_new_fast_track_goal_state(self): with mock_wire_protocol(data_file) as protocol: goal_state = GoalState(protocol.client) - cert = "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F" + cert = "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9" crt_path = os.path.join(self.tmp_dir, cert + ".crt") prv_path = os.path.join(self.tmp_dir, cert + ".prv") @@ -426,7 +426,7 @@ def test_it_should_download_certs_on_a_new_fabric_goal_state(self): protocol.mock_wire_data.set_vm_settings_source(GoalStateSource.Fabric) goal_state = GoalState(protocol.client) - cert = "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F" + cert = "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9" crt_path = os.path.join(self.tmp_dir, cert + ".crt") prv_path = os.path.join(self.tmp_dir, cert + ".prv") diff --git a/tests/common/protocol/test_wire.py b/tests/common/protocol/test_wire.py index 9ce8339e94..8cf0c7d462 100644 --- a/tests/common/protocol/test_wire.py +++ b/tests/common/protocol/test_wire.py @@ -95,11 +95,11 @@ def _test_getters(self, test_data, certsMustBePresent, __, MockCryptUtil, _): protocol.get_goal_state().fetch_extension_manifest(ext_handler.name, ext_handler.manifest_uris) crt1 = os.path.join(self.tmp_dir, - '38B85D88F03D1A8E1C671EB169274C09BC4D4703.crt') + '8979F1AC8C4215827BF3B5A403E6137B504D02A4.crt') crt2 = os.path.join(self.tmp_dir, - 'BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F.crt') + 'F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9.crt') prv2 = os.path.join(self.tmp_dir, - 'BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F.prv') + 'F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9.prv') if certsMustBePresent: self.assertTrue(os.path.isfile(crt1)) self.assertTrue(os.path.isfile(crt2)) diff --git a/tests/data/hostgaplugin/ext_conf-agent_family_version.xml b/tests/data/hostgaplugin/ext_conf-agent_family_version.xml index 5c9e0028fe..a277db3d7c 100644 --- a/tests/data/hostgaplugin/ext_conf-agent_family_version.xml +++ b/tests/data/hostgaplugin/ext_conf-agent_family_version.xml @@ -64,7 +64,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"GCS_AUTO_CONFIG":true} } @@ -77,7 +77,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"enableGenevaUpload":true} } diff --git a/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml b/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml index e1f1d6ba8c..6590c562d5 100644 --- a/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml +++ b/tests/data/hostgaplugin/ext_conf-rsm_version_properties_false.xml @@ -64,7 +64,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"GCS_AUTO_CONFIG":true} } @@ -77,7 +77,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": {"enableGenevaUpload":true} } diff --git a/tests/data/hostgaplugin/ext_conf.xml b/tests/data/hostgaplugin/ext_conf.xml index 8ede27f8a0..0e3dec4c83 100644 --- a/tests/data/hostgaplugin/ext_conf.xml +++ b/tests/data/hostgaplugin/ext_conf.xml @@ -58,7 +58,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": {"GCS_AUTO_CONFIG":true} } @@ -71,7 +71,7 @@ "runtimeSettings": [ { "handlerSettings": { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": {"enableGenevaUpload":true} } diff --git a/tests/data/hostgaplugin/vm_settings-agent_family_version.json b/tests/data/hostgaplugin/vm_settings-agent_family_version.json index 734cc8147b..99d435e51a 100644 --- a/tests/data/hostgaplugin/vm_settings-agent_family_version.json +++ b/tests/data/hostgaplugin/vm_settings-agent_family_version.json @@ -60,7 +60,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -78,7 +78,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"enableGenevaUpload\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json index 71cdbf5c55..f36524e280 100644 --- a/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json +++ b/tests/data/hostgaplugin/vm_settings-difference_in_required_features.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -76,7 +76,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"enableGenevaUpload\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings-out-of-sync.json b/tests/data/hostgaplugin/vm_settings-out-of-sync.json index 0d4806af9d..d971bcaa8a 100644 --- a/tests/data/hostgaplugin/vm_settings-out-of-sync.json +++ b/tests/data/hostgaplugin/vm_settings-out-of-sync.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json b/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json index 3a6eb8b1a5..d902d94719 100644 --- a/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json +++ b/tests/data/hostgaplugin/vm_settings-requested_version_properties_false.json @@ -60,7 +60,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -78,7 +78,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==", "publicSettings": "{\"enableGenevaUpload\":true}" } diff --git a/tests/data/hostgaplugin/vm_settings.json b/tests/data/hostgaplugin/vm_settings.json index 1f6d44debc..dffac88966 100644 --- a/tests/data/hostgaplugin/vm_settings.json +++ b/tests/data/hostgaplugin/vm_settings.json @@ -56,7 +56,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Monitor.AzureMonitorLinuxAgent==", "publicSettings": "{\"GCS_AUTO_CONFIG\":true}" } @@ -76,7 +76,7 @@ "settingsSeqNo": 0, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent==", "publicSettings": "{\"enableGenevaUpload\":true}" } @@ -192,7 +192,7 @@ "isMultiConfig": false, "settings": [ { - "protectedSettingsCertThumbprint": "BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F", + "protectedSettingsCertThumbprint": "F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9", "protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpddesZQewdDBgegkxNzA1BgoJkgergres/Microsoft.OSTCExtensions.VMAccessForLinux==" } ] diff --git a/tests/data/wire/certs-2.xml b/tests/data/wire/certs-2.xml index 66a231ee87..e58e0aeacd 100644 --- a/tests/data/wire/certs-2.xml +++ b/tests/data/wire/certs-2.xml @@ -1,85 +1,85 @@ 2012-11-30 - 5 + 1 Pkcs7BlobWithPfxContents - MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAUiF8ZYMs9mMa8 -QOEMxDaIhGza+0IwDQYJKoZIhvcNAQEBBQAEggEAQW7GyeRVEhHSU1/dzV0IndH0 -rDQk+27MvlsWTcpNcgGFtfRYxu5bzmp0+DoimX3pRBlSFOpMJ34jpg4xs78EsSWH -FRhCf3EGuEUBHo6yR8FhXDTuS7kZ0UmquiCI2/r8j8gbaGBNeP8IRizcAYrPMA5S -E8l1uCrw7DHuLscbVni/7UglGaTfFS3BqS5jYbiRt2Qh3p+JPUfm51IG3WCIw/WS -2QHebmHxvMFmAp8AiBWSQJizQBEJ1lIfhhBMN4A7NadMWAe6T2DRclvdrQhJX32k -amOiogbW4HJsL6Hphn7Frrw3CENOdWMAvgQBvZ3EjAXgsJuhBA1VIrwofzlDljCC -DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQIxcvw9qx4y0qAgg0QrINXpC23BWT2 -Fb9N8YS3Be9eO3fF8KNdM6qGf0kKR16l/PWyP2L+pZxCcCPk83d070qPdnJK9qpJ -6S1hI80Y0oQnY9VBFrdfkc8fGZHXqm5jNS9G32v/AxYpJJC/qrAQnWuOdLtOZaGL -94GEh3XRagvz1wifv8SRI8B1MzxrpCimeMxHkL3zvJFg9FjLGdrak868feqhr6Nb -pqH9zL7bMq8YP788qTRELUnL72aDzGAM7HEj7V4yu2uD3i3Ryz3bqWaj9IF38Sa0 -6rACBkiNfZBPgExoMUm2GNVyx8hTis2XKRgz4NLh29bBkKrArK9sYDncE9ocwrrX -AQ99yn03Xv6TH8bRp0cSj4jzBXc5RFsUQG/LxzJVMjvnkDbwNE41DtFiYz5QVcv1 -cMpTH16YfzSL34a479eNq/4+JAs/zcb2wjBskJipMUU4hNx5fhthvfKwDOQbLTqN -HcP23iPQIhjdUXf6gpu5RGu4JZ0dAMHMHFKvNL6TNejwx/H6KAPp6rCRsYi6QhAb -42SXdZmhAyQsFpGD9U5ieJApqeCHfj9Xhld61GqLJA9+WLVhDPADjqHoAVvrOkKH -OtPegId/lWnCB7p551klAjiEA2/DKxFBIAEhqZpiLl+juZfMXovkdmGxMP4gvNNF -gbS2k5A0IJ8q51gZcH1F56smdAmi5kvhPnFdy/9gqeI/F11F1SkbPVLImP0mmrFi -zQD5JGfEu1psUYvhpOdaYDkmAK5qU5xHSljqZFz5hXNt4ebvSlurHAhunJb2ln3g -AJUHwtZnVBrtYMB0w6fdwYqMxXi4vLeqUiHtIQtbOq32zlSryNPQqG9H0iP9l/G1 -t7oUfr9woI/B0kduaY9jd5Qtkqs1DoyfNMSaPNohUK/CWOTD51qOadzSvK0hJ+At -033PFfv9ilaX6GmzHdEVEanrn9a+BoBCnGnuysHk/8gdswj9OzeCemyIFJD7iObN -rNex3SCf3ucnAejJOA0awaLx88O1XTteUjcFn26EUji6DRK+8JJiN2lXSyQokNeY -ox6Z4hFQDmw/Q0k/iJqe9/Dq4zA0l3Krkpra0DZoWh5kzYUA0g5+Yg6GmRNRa8YG -tuuD6qK1SBEzmCYff6ivjgsXV5+vFBSjEpx2dPEaKdYxtHMOjkttuTi1mr+19dVf -hSltbzfISbV9HafX76dhwZJ0QwsUx+aOW6OrnK8zoQc5AFOXpe9BrrOuEX01qrM0 -KX5tS8Zx5HqDLievjir194oi3r+nAiG14kYlGmOTHshu7keGCgJmzJ0iVG/i+TnV -ZSLyd8OqV1F6MET1ijgR3OPL3kt81Zy9lATWk/DgKbGBkkKAnXO2HUw9U34JFyEy -vEc81qeHci8sT5QKSFHiP3r8EcK8rT5k9CHpnbFmg7VWSMVD0/wRB/C4BiIw357a -xyJ/q1NNvOZVAyYzIzf9TjwREtyeHEo5kS6hyWSn7fbFf3sNGO2I30veWOvE6kFA -HMtF3NplOrTYcM7fAK5zJCBK20oU645TxI8GsICMog7IFidFMdRn4MaXpwAjEZO4 -44m2M+4XyeRCAZhp1Fu4mDiHGqgd44mKtwvLACVF4ygWZnACDpI17X88wMnwL4uU -vgehLZdAE89gvukSCsET1inVBnn/hVenCRbbZ++IGv2XoYvRfeezfOoNUcJXyawQ -JFqN0CRB5pliuCesTO2urn4HSwGGoeBd507pGWZmOAjbNjGswlJJXF0NFnNW/zWw -UFYy+BI9axuhWTSnCXbNbngdNQKHznKe1Lwit6AI3U9jS33pM3W+pwUAQegVdtpG -XT01YgiMCBX+b8B/xcWTww0JbeUwKXudzKsPhQmaA0lubAo04JACMfON8jSZCeRV -TyIzgacxGU6YbEKH4PhYTGl9srcWIT9iGSYD53V7Kyvjumd0Y3Qc3JLnuWZT6Oe3 -uJ4xz9jJtoaTDvPJQNK3igscjZnWZSP8XMJo1/f7vbvD57pPt1Hqdirp1EBQNshk -iX9CUh4fuGFFeHf6MtGxPofbXmvA2GYcFsOez4/2eOTEmo6H3P4Hrya97XHS0dmD -zFSAjzAlacTrn1uuxtxFTikdOwvdmQJJEfyYWCB1lqWOZi97+7nzqyXMLvMgmwug -ZF/xHFMhFTR8Wn7puuwf36JpPQiM4oQ/Lp66zkS4UlKrVsmSXIXudLMg8SQ5WqK8 -DjevEZwsHHaMtfDsnCAhAdRc2jCpyHKKnmhCDdkcdJJEymWKILUJI5PJ3XtiMHnR -Sa35OOICS0lTq4VwhUdkGwGjRoY1GsriPHd6LOt1aom14yJros1h7ta604hSCn4k -zj9p7wY9gfgkXWXNfmarrZ9NNwlHxzgSva+jbJcLmE4GMX5OFHHGlRj/9S1xC2Wf -MY9orzlooGM74NtmRi4qNkFj3dQCde8XRR4wh2IvPUCsr4j+XaoCoc3R5Rn/yNJK -zIkccJ2K14u9X/A0BLXHn5Gnd0tBYcVOqP6dQlW9UWdJC/Xooh7+CVU5cZIxuF/s -Vvg+Xwiv3XqekJRu3cMllJDp5rwe5EWZSmnoAiGKjouKAIszlevaRiD/wT6Zra3c -Wn/1U/sGop6zRscHR7pgI99NSogzpVGThUs+ez7otDBIdDbLpMjktahgWoi1Vqhc -fNZXjA6ob4zTWY/16Ys0YWxHO+MtyWTMP1dnsqePDfYXGUHe8yGxylbcjfrsVYta -4H6eYR86eU3eXB+MpS/iA4jBq4QYWR9QUkd6FDfmRGgWlMXhisPv6Pfnj384NzEV -Emeg7tW8wzWR64EON9iGeGYYa2BBl2FVaayMEoUhthhFcDM1r3/Mox5xF0qnlys4 -goWkMzqbzA2t97bC0KDGzkcHT4wMeiJBLDZ7S2J2nDAEhcTLY0P2zvOB4879pEWx -Bd15AyG1DvNssA5ooaDzKi/Li6NgDuMJ8W7+tmsBwDvwuf2N3koqBeXfKhR4rTqu -Wg1k9fX3+8DzDf0EjtDZJdfWZAynONi1PhZGbNbaMKsQ+6TflkCACInRdOADR5GM -rL7JtrgF1a9n0HD9vk2WGZqKI71tfS8zODkOZDD8aAusD2DOSmVZl48HX/t4i4Wc -3dgi/gkCMrfK3wOujb8tL4zjnlVkM7kzKk0MgHuA1w81zFjeMFvigHes4IWhQVcz -ek3l4bGifI2kzU7bGIi5e/019ppJzGsVcrOE/3z4GS0DJVk6fy7MEMIFx0LhJPlL -T+9HMH85sSYb97PTiMWpfBvNw3FSC7QQT9FC3L8d/XtMY3NvZoc7Fz7cSGaj7NXG -1OgVnAzMunPa3QaduoxMF9346s+4a+FrpRxL/3bb4skojjmmLqP4dsbD1uz0fP9y -xSifnTnrtjumYWMVi+pEb5kR0sTHl0XS7qKRi3SEfv28uh72KdvcufonIA5rnEb5 -+yqAZiqW2OxVsRoVLVODPswP4VIDiun2kCnfkQygPzxlZUeDZur0mmZ3vwC81C1Q -dZcjlukZcqUaxybUloUilqfNeby+2Uig0krLh2+AM4EqR63LeZ/tk+zCitHeRBW0 -wl3Bd7ShBFg6kN5tCJlHf/G6suIJVr+A9BXfwekO9+//CutKakCwmJTUiNWbQbtN -q3aNCnomyD3WjvUbitVO0CWYjZrmMLIsPtzyLQydpT7tjXpHgvwm5GYWdUGnNs4y -NbA262sUl7Ku/GDw1CnFYXbxl+qxbucLtCdSIFR2xUq3rEO1MXlD/txdTxn6ANax -hi9oBg8tHzuGYJFiCDCvbVVTHgWUSnm/EqfclpJzGmxt8g7vbaohW7NMmMQrLBFP -G6qBypgvotx1iJWaHVLNNiXvyqQwTtelNPAUweRoNawBp/5KTwwy/tHeF0gsVQ7y -mFX4umub9YT34Lpe7qUPKNxXzFcUgAf1SA6vyZ20UI7p42S2OT2PrahJ+uO6LQVD -+REhtN0oyS3G6HzAmKkBgw7LcV3XmAr39iSR7mdmoHSJuI9bjveAPhniK+N6uuln -xf17Qnw5NWfr9MXcLli7zqwMglU/1bNirkwVqf/ogi/zQ3JYCo6tFGf/rnGQAORJ -hvOq2SEYXnizPPIH7VrpE16+jUXwgpiQ8TDyeLPmpZVuhXTXiCaJO5lIwmLQqkmg -JqNiT9V44sksNFTGNKgZo5O9rEqfqX4dLjfv6pGJL+MFXD9if4f1JQiXJfhcRcDh -Ff9B6HukgbJ1H96eLUUNj8sL1+WPOqawkS4wg7tVaERE8CW7mqk15dCysn9shSut -I+7JU7+dZsxpj0ownrxuPAFuT8ZlcBPrFzPUwTlW1G0CbuEco8ijfy5IfbyGCn5s -K/0bOfAuNVGoOpLZ1dMki2bGdBwQOQlkLKhAxYcCVQ0/urr1Ab+VXU9kBsIU8ssN -GogKngYpuUV0PHmpzmobielOHLjNqA2v9vQSV3Ed48wRy5OCwLX1+vYmYlggMDGt -wfl+7QbXYf+k5WnELf3IqYvh8ZWexa0= + MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAU08PI+CBUqOd4 +Nbte7MLw2qCYn1UwDQYJKoZIhvcNAQEBBQAEggEAU1y8uuEyQMXa7eGlK/PB5F5+ +ZEYBHRpBpSKlyIpTJhN+emNLtuPRlIJ0L0zlfkvjMmnoApXujUb91tnHVQu2tUV4 +9Ws3goQjqIb6baQmxf8pctsL56vHts763Wl+AwiFLc7twoq/4FmmqwvFzxHE+c2o +IyxxYY72ZNorN5sux0b+ghEeZHkdds6uR/DHtht+zCy/JP63Phf53dAoUoO4p9Ym +WJhe2Mccv9t/yrtneVEIw/p1GqUPSY+tiGMNMxNvXlUrtdoaUzyzzXmqVbMXb6PB +bWFtkkRJBCMYA8Ozh4La6y8Y1jgFj6vCkoxX3s9GVQbpeyon7leanAiHwArgejCC +DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQIY87YJhlLuuSAgg0QnoSp+Z+aYRAI +uNaSDIyvQ/1/xYMW6TCqp19yiOGRu5bzDNX0tKN5cCLIvRX5FZmLbLbApziZlMsV +wrHCmVBnN8XYCdZsK+Wy39ORULAfurkjem6arn/NFnfN9DLiSEYwKbSC4VNegfkT +lJlgnSVUs7Z6v86YUEuwBnmvyCDbIit3PbfKJzaCr9DSPXKwBFRZqTTsFWovBGaA +cQvbuqxbbkm4cNYmwmT84TXhjYDuTfP8KEPgdBD1F8cqB+e6OuQSG3N+tBHKi7DH +Gc+30IimJVcrwbPCNDlteHHTLxaeDM4g3eoyj7B6J+/kAMLdoWuH9kwdr75Dd5OJ +SGY7utJ+v4A92SKc7G01tQnHZYOxn+JFKWQ4y/CR2lTtYfhh8pd9jSSHsg0jGtKs +Zte/mpfrOHTpXd3K7F1/UiXTRNWbfy/7pBWPdqgSaOAuVH180VAHCDnaOtvf2w7L +tJN74gesbcwPgQiAiD9um1eOqOMObu3gqXdeIkMksbhrTSOzLuO8c3t0R+8lL6QE +2K54t7PMDQ8ScmktNMWG9heBbZmAlkLZ2VK+jfGpbVEGRSWKRkpBMQOqLGh7iRkv +EPtr44/F5cWwXVN6ofCg25aGwLrAaD9hlprGNByjGezjrFxj4NSDyKYmjhfF4+RA +CfEN/j19OadJgY8ByH+L190VOOc3Xcf0aiFJPqV+MmTm0QcOmaOIPFfwRHjWiuS1 +K5kzX15uDgIZED2NWvJtwyuJ+p8xcWtmdE0nGxhOHV+3ZZu8WZ9Qv7LU2eSJQ5De +5uzb5sDzVZI8zfQ6LX2nF7ilntzxzODcv5Eoor8NQAU5xPKvb66aRa5BV5xzCl8A +/FY61ztGpCD4DfPHFpCldcHKCPk1qzu/7kL3LQ49DV5GcVwzzanHQaINWo5xhUu1 +XaUcWe7LVOPYvqCrSF8v3dB56RHF1MJMxCNdZo1oVup3FjIU3N4ZUl5qX5Ixetp2 +ftUZHsw3r+cotronsrne8R4gl3PejIc6rVmz7cpnPY6l1T70QEEtnxcHgqIFZeCB +n3IHOBOlaS3DbtOVzclySUF3z1+Gtk8Entc1ksNX2MwknFUM2AjQWuvjVDm/ZKaY +kPtbr52IDKURYzDecuBeTuZCq7ztaOqdc0D+sLFn4Z8CBzl0OdOrDU25h/wir/r7 +DiCGFAGuPIVtsaO0C/aLCM0IJlDW9Lj9YMXy5jZ4ziRT6CmarmjO+BLBL9yHK7pR +rCEJoYRZUyw6nAZNW3EkftxMWJNe00SkJyccMPLgQA6ORnuHC3wo4EBH62vBA4vq +JszIKm8xselXbAQoyeRtXBVvFEV7gz/3US43K2HoHi+Z9N60LRw7V+aihz+nKTnC +lioA+owDvgsJmVwuERse8ZaUwXigfKyCUnrbEAYFeSIQyvKs0TG6pAGm2ZjqFJw/ +L0HLPQVUf6HLZY7HD/xCz21X3mL28VZ82Fr/luOqIk187M4CnyudmZX64tS/o+TR +n9lSJhV4H6y5WCCTSnyGnjcLSm5lMg9H+4vwRB95qfKS9B8ZLSesBbk/VUwCw1fw +IeR2S1S9PUO+J0lUKGWWrBjDNKIkR5vVLXyazO+BFz6HIq3U0Df9Gya3kng4BfZK +a3X9ALP1PEdfFeRyH7T83NN20686Q1uSzkKIKmKYp5YRuUsZdrGSSIbgO5UlWayF +YWQPIrpTy+v2lP9la9YLPdSWG0a/pMA5BFzovHgSJ733yowmw7sqn2wsZyiMTTOy +lbF7im1hbB3bfzow6SA8IE7O5XiAIyIk35HNJswPMkJWQzzuwNGKIla3f+HfPaRO +7weJPIEeQr7jUdgiQLl9A9/kHdp8jMy2jwrys6LY9rwEMAodpaN/yXYF9oOFvBsC +75az848gx7OTB/OcBKFNkeKkdWYo3GYP0DwzTcV3sV+bIllKGzGhuN7KOyn7XLSN +ZG2kEm/+s05DdxpagcGyAWKT6myDjuMo/lAll/A3bnmwrP/I5YO0bLn2cmEq6dGx +AcWC5eELHoKo9hv6pjU9BszkHIgMq2B6Oe35xnAi36RlarOU8D4+xop3IqN2Jy65 +eec15LopFUrCcVgSddf7h+qS0jQGiEPuUNZAuZBA0ZVmHzDtkHJqdSpSAXTvykVC +GIPbCWce/0X9UxxrciJ7foXebz7A9b1dkEMI0UCNBkiO5kGVJBBxGcHOtYvzWc9+ +oRhN68tOksmNFiNIuxTRG1iariPQrDocbsEy+yDDmSxJPZ4wNjPofjZ1XXaXkjs4 +Q79ptA8JLwzHv7dRCsV+r3GUllIn5TOb9adbIowmZG+nSWq3vE1AoHgymwYo064p +ZlcrtsZRfo9SeqMf3aAOgQtYDpCi2QhCipQYe0IFYWdShdQzxqXyCObm7zey6PnI +4LZ2J56Z8QXPloo8LfsmcqILWEMOxCc66k5+QFb/MKDV/lYtWZzTES/TFhRdNydw +yCdizmdTWo2wfk9YU/pcwRZUAzhk+/JQJA0tef6kyUv+ozQue4JVw8UBRoWJRrXf +mO4kGeEpoVu8Hlk3XVeEQTEMP8gre2t1WSQhgRuUPWHvsVMjRfn4K8rk4MxU94Op +XselOgz+E0n3XpwHh9gcv43t+qd5YBpE3uAI11hUJpZqsjAo8AiAXppzXZQ9Xx66 +duz3UZLobVZL5CwFuCiaE3b3rx5Qlt9SKNQA8aG6e6N1hwHzl69zT1BN2ZIvrSuL +ihtQ4E7D6KlEWhPV2c12tMgiDs1CTbOyY5uX8Q+dMilp1Y/5iC6LwzAjJ8IvhtqY +NniVsVocO9uyRe5cYPLM/F/4rcnnmoIeTbPeGiI91vGnLH+wrgZ/HSntN7C5nG6s +oay685GW620S0Ac71IcRZajNTM7Rfc9JpCNzwb2WnZw4LKyybfXcHSStk4aqw8P+ +oRsOLgRLO4m9CYnsJBcVX7oF+/IUWyPfL/4sAIUIF+7mXP+Z18paTmbZRIrvjwcA ++QhctZXYVSeUQE4RtLu7pKxTYlZZesZqVhEXj733RMwgYuQecqCMTcF6StpEsKPs +BUZDXZZrCl9kUMMB7m5bsnBGB3o/QbyS/hkNwI8pVmQHNIVKdKOcxH0cCRouKUH1 +MzYxuZfVS1dvgkhVhPeySy1AZ2A/oBFFz2PWxzftKwaZ5KwDx4VI8x3yYaMuXmvK +cyIWS+2s+Ky/ofOOAJPYiv2aaKtLnOjo+78oLyAm7NVNaQ31JFVPAxCbmEnIu4Ai +GngAH4hmVp/f2/pfGq/OI/HFFeAwwsxUKWOsLu+Di7QcT81PrkHVFadmLXxA9iyc +UmT5Oqg0h4V5PWwaGVfgDMFs7VO0dThZ+cjXLGWvC2bTWpvxJVsgq+J/MCIZsiSJ +eECBhDvvsKCmigM9+qQ7iPjLWP2DL+CvbLXWLVuaj+rjwpoAx+2ALfWP0aRsetBk +3vbKm4Pm92401TyGmV8HJfpgMrjbScrmsdv+10ljj3eigaUGGzS0UImJIXEerbia +3m31u8IaYF0fFsONHa0+0RuEhFVhtgx3ojI9wN6OM4sxIgDMY+Iyrny/Dn4qlVJo +bmW2hahljpIgT0x9KwZgflyM7VVckRIk+SzJDmqqYdEVk6CnxpKcVJgaD3z/Q4ez +0doYtQeeK7W4EWNJACosqMCFKnFZlOyMELE0gyhdeCgM1xXOU4nxzzUJXFAKukSi +6RQANERsNoXnkfYd6Pt39k4IaBkJ3/lmBVdONqoPDjwDJT887kyFo9GfxgOZ+ZAS +KlVD9YiDSXkgq4/KGq8zNb0jZiZjd02uzzYVvLfKx/TGhVy5WEnf2IeC0gLZ3wNI +jo0894/Ss0uXbbl5HoOhLdOQbYuZ5QB5S6W6TbcM5Mrt9S0rkJY7xYxnlmXTQ3A7 +q+wfi5IIAIYuRd1uwZ/msCF6L2UM6y0+So5P0X8YVY4tT1Oq8AxjJVLVMZVBPq7b +nQwChfVf5HOEfNehO52UwRA1C6IGH9/2T6lPrJOuZp7oxUE0CtVYNDbqcj9lbb7A +cEcQjQzgYnH3xmj1ZjBpyQ9zL5o0g7ZTwAq8zA1LhMBjrgSlYd2s3947Ii4xBaof +CCA8OVDeqHTqVxFQQk5rrHCDPOSHLCXAqqArXb5yl90Vk1wU7BnPe6iwScCcPbWd +rkw8twZYLNp7sCDTZ5es77Zzs431R1sc8pL/SOwbv9o30cQfbW9FZAhboyI3o/ug +RdKYlB72y8wN8ijh/UENo3W89MzHtbZ1XYMCauYn9zDUGci4Bnziqfpd/dV+CUeC +Fs/DP5f2OkiinHRmf060xj7HN7Q3SWziFbMRVO85/e7jjUcNQyBqikHXBl3V2hpM +hRPsObhPAoLVxz8fBVMYfxR1E7wTpv5KWzvWSPh4QUX+gRpCYL/h/WJ6qUqjeXMP +1u6vM7uX9+OjNkEAql9L9cPmm1GIam8yBoRsP/Om0VFKDZUvhTo1QC1Q3finiSm4 +89s7tlobx0KafcD+yNKpSFtq/XUIv3Q= \ No newline at end of file diff --git a/tests/data/wire/certs.xml b/tests/data/wire/certs.xml index 5908de7938..2db71ca055 100644 --- a/tests/data/wire/certs.xml +++ b/tests/data/wire/certs.xml @@ -1,85 +1,85 @@ 2012-11-30 - 3 + 1 Pkcs7BlobWithPfxContents - MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAUZcG9X+5aK8VZ -FY8eJV9j+RImq58wDQYJKoZIhvcNAQEBBQAEggEAn/hOytP/StyRuXHcqFq6x+Za -7gHfO8prXWdZW4e28NLt/x5ZOBHDDZ6buwwdXEZME0+RoiJvLqP2RNhZkEO8bkna -pS76xLZE4NXyfxkeEs1vJYis0WJdt/56uCzBuud2SBLuMWoAWgF5alokN0uFpVgm -CKCos+xv6Pisolc6geM8xQTYe6sLf5Z23LWftWfJqzuo/29glCCre7R80OLeZe5w -pN6XztbYz06nhVByC35To8Lm0akWAAKU7sfqM1Nty4P0rwUJPKXo42uN1GKYbDbF -x8piCAd+rs+q4Alu3qK/YaTPpMb2ECRMH6CYB8Klf/CbuWykkfS8zrsnpXT1kzCC -DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQInjJWFaJcZz2Agg0QX6NlJUH17o20 -90gfjWV01mPmzLKx71JT+hyzKr5vHywDSRI/mdb3RqA59ZrIKeyWr0HXEOuABlul -nxjc/Rfk1tiLQwh0iqlOjlMtRsxS6yDA0WNwK2Y9gaXcdgDm5Vioai18l4Pd0qzK -fsof5a/jEJyunW1CZK19QciwfQ2pS8QbRYgeLRZRft2I+kv6cWXlGS6YrMqKQC8t -QMxnXR4AuzVllPLbbIgtM3l9oS+6jl7jKyKogeroJ9FNLjoMBJLldRLGPRhkCmdJ -Z1m+s/BAVUH08qgj2kmHzucdULLjlRcmma9m/h91TcQCXHAavf7S+U9QwIyGRh83 -t4Y7EqbQ93mOgjajFzILSL7AT/irgJpDu6CJqMu3EMNDA0mjxn5Cdvj40sufL/g3 -UyBwqosmIwAPzNDmhPtTKvHaHfGY/k8WhoIYfAA5Lhq1z22/RODZOY0Ch2XyxQM4 -s35eppe6IhnwyMv6HfrCrqE/o/16OrvvbaFQTeTlMvU0P7MIR4pVW6tRq4NEa5Wx -JcvGutuMuzH1VMcqcKdc7wYyOqDOGU43kcV8PiALTIcqrhD8NDrKks1jSkyqQw2h -sJQckNaQIcCXkUQecQa2UGe0l4HDSJ5gAETSenjyLBKFHf3hxiWBpw446/bODgdk -0+oyreZqMpRz3vn9LC+Yt7SuVbTzRdx7nlKIiNvo8+btOuVm44evchLFHAq3Ni8D -c+tP/wss3K4Xdp+t5SvEY/nLIu11Lw44HDVMYTuNz3Ya9psL70ZLaLZM8NromnEl -CUMRNTPoOC7/KDRh2E9d6c1V4CC43wAsRhksGJnSYoiSVAhaVgLqVFQTsqNHxmcg -3Y9AEBVzm3fZg6+DxAYu+amb+r8lk0Pp+N1t6rVbKXhkbAAxg0UDO3pY8Xcz0Y3g -Qdd5rnHh1rJrehku7zTHvQaXEddUWmCoUGIXJ+bt4VOhErL6s5/j8GSG0xmfxgSE -jnGj4Jwd0Vv19uZjsBDQ54R88GcA9YX8r48gr9JAwplrQ50m9KX6GwQhDRYKN/Dh -zOt9DCUkqMqdi5T4v2qNTfkL7iXBMhsSkeYUQ/tFLyv4QQyli5uTUZ5FNXohOVAx -TNyV9+gcV5WiBR0Aje6rwPW3oTkrPnVfZCdBwt/mZjPNMO5Se7D/lWE33yYu7bJ+ -gaxRNynhEOB7RaOePzDjn7LExahFmTFV0sgQxwQ2BYsfI22cdkAf6qOxdK/kqiQm -lgzRpDjyPIFhaCCHnXyJdSqcHmDrCjcg2P6AVCDJGdFOBvupeJ7Kg7WV5EY7G6AU -ng16tyumJSMWSzSks9M0Ikop6xhq3cV+Q0OArJoreQ6eonezXjM9Y865xjF80nJL -V4lcRxdXfoKpXJwzc++pgkY9t55J0+cEyBvIXfKud1/HHOhewhoy5ATyi9LLM91n -iW1DaQXlvHZgE7GFMSCVLxy6ZopBbm9tF0NQDFi8zUtGulD3Gkoc/Bp+DWb2vsX4 -S8W9vByNvIz/SWOGNbEs2irTRXccMAL7JHJ+74bwZZi5DRrqyQWHCn/3Ls2YPI6z -lnfl15EE4G7g3+nrvP2lZFBXjsdG/U3HYi+tAyHkRN3oXvgnt9N76PoY8dlsNf6c -RuNqgk31uO1sX/8du3Jxz87MlzWiG3kbAHMvbcoCgy/dW4JQcM3Sqg5PmF8i9wD1 -ZuqZ7zHpWILIWd13TM3UDolQZzl+GXEX62dPPL1vBtxHhDgQicdaWFXa6DX3dVwt -DToWaAqrAPIrgxvNk5FHNCTEVTQkmCIL5JoinZSk7BAl8b085CPM6F7OjB5CR4Ts -V+6UaTUZqk+z+raL+HJNW2ds1r7+t8Po5CydMBS4M/pE7b/laUnbRu7rO8cqKucn -n+eYimib/0YuqZj9u2RXso4kzdOyIxGSGHkmSzYuoNRx80r+jHtcBBTqXk37t0FY -X5O7QItCE+uwV1Sa12yg2dgJ6vKRPCEVyMoYUBwNbKEcw1pjG9Em7HwjOZK0UrO1 -yKRz6kxffVKN9Naf7lOnXooVuedY/jcaZ2zCZtASlOe8iiQK5prM4sbMixMp9ovL -tTxy9E9kgvaI/mkzarloKPQGsk0WzuH+i39M3DOXrMf5HwfE+A55u1gnrHsxQlxp -z5acwN42+4ln6axs4aweMGAhyEtBW8TdsNomwuPk+tpqZXHI2pqS4/aVOk8R8VE7 -IqtBx2QBMINT79PDPOn3K6v9HEt9fUHJ2TWJvKRKfsu5lECJPJSJA8OQ7zzw6zQt -NXw8UhZRmNW0+eI5dykg+XsII7+njYa33EJ1Sy1Ni8ZT/izKfrKCwEm44KVAyUG5 -qUjghPPMNQY3D0qOl54DRfGVOxbHztUooblW+DnlLlpOy/+/B+H9Dscxosdx2/Mo -RftJOMlLqK7AYIYAlw1zvqZo0pf7rCcLSLt+6FrPtNZe6ULFUacZ3RqyTZovsZi5 -Ucda3bLdOHX6tKL21bRfN7L0/BjF6BJETpG3p+rBYOyCwO6HvdenpMm6cT02nrfP -QJtImjeW1ov6Pw02zNlIZAXFir78Z6AcMhV2iKEJxc1RMFBcXmylNXJmGlKYB3lJ -jWo6qumLewTz5vzRu0vZCmOf+bKmuyVxckPbrzP+4OHKhpm95Kp6sUn2pvh0S8H3 -w1pjfZ9+sIaVgMspfRPgoWTyZ0flFvAX6DHWYVejMebwfAqZaa+UAJJ6jWQbMNzo -ZtOhzCjV+2ZBYHvSiY7dtfaLwQJeMWEKIw32kEYv/Ts33n7dD/pAzZu0WCyfoqsQ -MEXhbZYSCQTJ8/gqvdlurWOJL091z6Uw810YVt+wMqsBo5lnMsS3GqkzgM2PVzuV -taddovr5CrWfAjQaFG8wcETiKEQFWS9JctKo0F+gwLwkVyc4fBSkjVmIliw1jXGu -Enf2mBei+n8EaRB2nNa/CBVGQM24WEeMNq+TqaMvnEonvMtCIEpuJAO/NzJ1pxw0 -9S+LKq3lFoIQoON5glsjV82WseAbFXmynBmSbyUY/mZQpjuNSnwLfpz4630x5vuV -VNglsZ8lW9XtSPh6GkMj+lLOCqJ5aZ4UEXDSYW7IaH4sPuQ4eAAUsKx/XlbmaOad -hgK+3gHYi98fiGGQjt9OqKzQRxVFnHtoSwbMp/gjAWqjDCFdo7RkCqFjfB1DsSj0 -TrjZU1lVMrmdEhtUNjqfRpWN82f55fxZdrHEPUQIrOywdbRiNbONwm4AfSE8ViPz -+SltYpQfF6g+tfZMwsoPSevLjdcmb1k3n8/lsEL99wpMT3NbibaXCjeJCZbAYK05 -rUw5bFTVAuv6i3Bax3rx5DqyQANS3S8TBVYrdXf9x7RpQ8oeb4oo+qn293bP4n5m -nW/D/yvsAJYcm3lD7oW7D369nV/mwKPpNC4B9q6N1FiUndvdFSbyzfNfSF9LV0RU -A/4Qm05HtE3PAUFYfwwP8MDg0HdltMn83VfqrEi/d76xlcxfoIh2RQQgqxCIS6KE -AExIY/hPYDVxApznI39xNOp7IqdPEX3i7Cv7aHeFAwbhXYMNnkfFJJTkHRdcRiJ/ -RE1QPlC7ijH+IF02PE/seYg4GWrkeW3jvi+IKQ9BPBoYIx0P+7wHXf4ZGtZMourd -N4fdwzFCDMFkS7wQC/GOqZltzF/gz1fWEGXRTH3Lqx0iKyiiLs2trQhFOzNw3B7E -WxCIUjRMAAJ6vvUdvoFlMw8WfBkzCVple4yrCqIw6fJEq8v0q8EQ7qKDTfyPnFBt -CtQZuTozfdPDnVHGmGPQKUODH/6Vwl+9/l7HDvV8/D/HKDnP581ix1a3bdokNtSK -7rBfovpzYltYGpVxsC6MZByYEpvIh5nHQouLR4L3Je2wB3F9nBGjNhBvGDQlxcne -AAgywpOpQfvfsnYRWt2vlQzwhHUgWhJmGMhGMmn4oKc5su87G7yzFEnq/yIUMOm/ -X0Zof/Qm92KCJS7YkLzP1GDO9XPMe+ZHeHVNXhVNCRxGNbHCHB9+g9v090sLLmal -jpgrDks19uHv0yYiMqBdpstzxClRWxgHwrZO6jtbr5jeJuLVUxV0uuX76oeomUj2 -mAwoD5cB1U8W9Ew+cMjp5v6gg0LTk90HftjhrZmMA0Ll6TqFWjxge+jsswOY1SZi -peuQGIHFcuQ7SEcyIbqju3bmeEGZwTz51yo8x2WqpCwB1a4UTngWJgDCySAI58fM -eRL6r478CAZjk+fu9ZA85B7tFczl3lj0B4QHxkX370ZeCHy39qw8vMYIcPk3ytI0 -vmj5UCSeQDHHDcwo54wi83IFEWUFh18gP4ty5Tfvs6qv7qd455UQZTAO7lwpdBlp -MJGlMqBHjDLGyY80p+O4vdlQBZ1uMH+48u91mokUP8p+tVVKh7bAw/HPG+SQsuNR -DXF+gTm/hRuY7IYe3C7Myzc8bDTtFw6Es9BLAqzFFAMjzDVz7wY1rnZQq4mmLcKg -AAMJaqItipKAroYIntXXJ3U8fsUt03M= + MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAU08PI+CBUqOd4 +Nbte7MLw2qCYn1UwDQYJKoZIhvcNAQEBBQAEggEASTTfHNyY+9hdXd+Eqtqk+yPb +RA7rRXWR8tQAJsdy3zAlu8WHymq945fnsf0bAW4mODIPYhhevmdo5VaI54AzAWhk +EfJvtRQlZZEMGZVKgUSwP4AG6cFaSnJuAYbi27nffM45PgD26O2WjOhnmM7minEC +31/wUoxjxVOxIc8x+Ngo+TquyBeaK1iXcchwIUnbM0xRYMfccOAEhe/iytKFPzdg +DJbDk+KbVGaUuUfhF+o4mMyJNezMUFxWkePcUgP12li57GTJSIyi8OQaFUu1qh0L +KzQ2sYl8U0WmWQBhXqvuug47WI/6XrRDpKslIV1aV4XxD1Or6H3nf0fULjQZajCC +DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQI+4Ch/cEogOSAgg0QvlelG9yDK2GE +XX1wn8Xw0wCt+zIceXs8C6QuRSmZLEkZVv8Y+duMwi2A0tcg63HOmY2AfIPvTTt8 +eto3YwIklrfF20jBvCg/pT3kfm6TICWmMNd5XesTq8UNmkqzJQQ84L3Kbs/ix2pG +9RaeXkrg0VO7FBDVH8b+jIT9IVDAEXgBQVefcCImVZ9L2hQWNABFrFXAQSTKjfFJ +IEOfXUhTiH434V1RKJczhFiH5SNZ0kbaRjmaQkXqbXQ5kKoq8VNkmFc6vPCclTmq +QJFfIUTepljWW/HuVkUycNYQQkblmWNF9FEwSx++x3Tz1FLR3UlzOkJCqr+tS3jv +WFnI16VlOHaaHA++YKhW1PUujJcEdZaXBE0FC6JZF7IOAOjSdLSmRL9yU95erfgZ +hRo2FB8EWVZitIG+DPU9vU59chGpqXYzZU4/aTpedGeWSZ9GFXRqwb6htmajjTWu +l5fIME3hWt7kcejpuXCTDcdG4YcbngZu4hcepMrUhm9g2BdmIDb1YiB7290PMop8 +4nNo97tSBvhzk300cg6+pfxy1iAv3++g/ggOI+Y/gFmgN88mmBMWm0+mocJ0SZGY +3+8K/8pDpJpfAAXSjayl7T2UXUdJe8fpOtetiHUr2zIbZXlM4IQw+0UMAVjTiaRT +BIDGoPEcpCcxqPlSTTEie166uzzPXG9skVgennjN6YopwMC/WPaFRJu/eTlQOqlB +EqvK9TKJG8u2yp00J04MGYXluY4l/o3/KLpT0mCOeOJm3KerfwQ/jU2oHHmvIATN +XYy32ULqx/CjL+N3ax0Nu+UrgMQPcVhrTN/7lnZpFLYwXetGzH/4jdNfIfTc4yGn +0GlVT6cVgJyV8wyYpbqCxHtCW83II8vXLjTfeIffHBoJU0fMMPWEIxRuMQSksm0H +F1u/rfGVSXnueshbJUD3pnvTiLPuWcOexSxP+B8BCNfi21jX5Ha+U9RKrKbHc4h9 +PkiWxU6ZEqCBkdP9ssKnmMKMsrC7sZRoYziHNeqlZp/GFQmkI+DeFlqSPn3Lv9Or +HF3bZokZCf0RGEkZDPrigaiEoL7PH/TtVZF8miL4JCLB0FVB08vWeeP5zjQT4H6J +jSC2pw+5bA2UWGshgsKKAJJihYcOuybtzglh7nqmSSZcszz3GyuDhdR8KDrYwChU +Hn13+rSWAbbqtxSyPc5fd22Q4Do2aD6PVdRadHjG0qeE7Dq46YHT3Z9KF0nQTLk8 +uYq8hL5+jQEgTnUB0yJTKdEcg05TyrMfNHWuM1pru0bqpf25vpwP5t+Sd/vgWJNc +XtRLWrMdYBuSG9zOyLaH7bj0rcMhN3ULisKej9IT/xHOWSXXZjNoe1P3q9fvtMbg +ZXAale/xJ6rXq6mLvZXivJfQJkPbSV7fByPPKO6TMnHbNEgLOGO3XtHEwC24JKup +C0ohq03QqQHEisS9Mk5LvWmSchXR3/7vCtJFyOemQom7nCy8cx4Y1JGmZ4SGSaEs +QZs7GC7Ftb/X82LRuknvS19ApOVFEs4/8t+LviD3x7Z9quVv+fZvydhzNKGRR6kQ +fYZwK7rqqkvuFKgXqNbzlrtlUqOUPXJgdO7QHOtU8z+k2NzBWfOp6j+Ef8rc3GDU +HSVZZ/Lz0RWedxRC1zoZJSol7ckMxIGIpDhtb9xgDmaGKILWOR9k+wG6+7ywQ2LE +PB3myDOclvKUDyb/DqwRS9ch9yyYSmz8WXTgdSeyOjp8QT2JQuuOOhoooHuKSxAk ++7v/Fh5bNGtjHByuzMYSdLcWsLX+UohpDoc1heVgUA3R6EuIOJTA0nC653YmqIBp +R5rsT+esub/EndweZTacmc2nDJxTKdZgMvdwhnsOZZBGsOaD7MXAS5vCsze+PQmY +4+VqqWPASaclV6CygN4qSxmww6mVgmAgWVmJqfa6vOyb3zhx68TkNEp9rxJFcJSJ +NiTTvWe0nF+o2/a1HZ8rZFdf65KsqGSiqu/6HoUuFzWLxRCqSjB9RkfSqrDVAVim +pwL46zGRsqZV+5xrRQlxINNUbg/D11zcp1zdhQvhDrpBoLMjK7AaxA5msPYFy6Gm +KMRAG2kyi802W5CPZWkbiEoUA8vkiICuxN+Pdh146zk9Ngl4PC3YpNCMtXK11ifd +hYxmWqEuQ2AcdVTckosaWrFMn5MqEcR0aAXZbnjIMgTZ6SMYJBZMWjzJhe/UQjTo +vICK7KAH82chpW2hG2I67z7e1Nv930RyL6JbYI8mSqgccPBzOBUhpHvKDM59z8Nc +eStEYDdOcMz8P+c/H3Bh4WsyMWMOwWvjyy6GX5Bpl5z94tWFRn6W4FK5iDqp+HHm +v5W1+hlFBxXtuzBcSQntcj8LoExJ2mK6BhZkaeAESMqPvNeNFmhEVUGq0/+c7T4I +L+1YkQPcm/nIpwW/ITmkGmi5n5VsvbJFDbQe+h9LI2aqvWtzA0YT5Ed77Glbdbgq +qB8EyXdr1BsBb7s7bbXm4Wf8UJkCZESg8iQExkUk8HqMJRxjctjma0DyyKVi4j8Q ++BA1EYBEX37641S+ZR9fYmQeuULGkf3d+w/ttgvm6YDZivsZYWkTscX+lUtoHhWN +5EOAfllI0/DaGX15mGONMV8YA1PoCNEX3yKJ5tVGkxxUPK+Op7ZHvJmtb1fPMRRY +z+evQ+NTXTZZzdr3Kfs4yYbuXG4e1odm2v/zBKG7JF3yWPMtXZZiMks/BkaXTq1P +LrB0VxGcMsLeQ5HbbWJtchyCWyy63CNNbfYNohjxru52DjaAQlDKQT9pOiSmGJzb +7+hNnKYnOfo6Du2ljz7C9C4mxnRJsRA2O9Cw66J5XPy1W+2+RmvP72jXwoFWYzPq +jxNs2wxOYQjEDpXBTmCbW58F5cTbSTk3D15iCtYtf31tpuPpHEnz+2OvrX0WhygN +esZJnln2Tu2ut1pVhAuJDLZTj24Y4MP0nmDINuLDAkFji0CwjACvW7M9SbIOLLYU ++5JHHjB7wqaTXWFzpt/ZKXMXlwCzWjo3pDERbrpYbwS3GHqmtcyIZK4EA7Ulka5Y +7rLPWS5eKcjX3tp2FyX5pD52TpuUMPAk6vyefX+NznP7opvJpusHbkschojFVRDA +zHIpIGeWjYcWLk5YTPagzH8o+4ci1OEk+OMc8i6PxkQDeBw1RiCAFfBnKPCSEtFk +KJlw7fspk3/chA6mmvOHjkrQmUhUuDxAVGCVxl0K5LU3Y2IQxKGtCJk5YO4XD2e7 +5b0Ub+wy4Bb0l+z8HjuqEypFXDpQTd80NbhStZBgf2cB01elsqmKD9sT9wpFGKbC +VaatDLsLx4XrBG6ueoFKBgFL6l7afEPct8wuSoUrX5MAGlge5xzQYAD5spLlEa9G +Dt2KiPCsZcqWiaHiw5vk849FXUcfFfGl+0rEKhzcfUn3zkL1mGfqZ8Nf7qjMXdMy +dbUUQYMZXtMtK3fnYBnavgaUcu0bZ7Av+GVTQvDxfpzSeMW8lK7Ko6mINFQVC8dx +TEKWX+eApFUnTb11vNNxwxdOB2l5N+kfNLnVMhuYd7l8IHQxMMQTcf8hYu0owry6 +JkIdkhnF1kXVC2YWxo4VrDPwzkBWZE28ygBNhWgKCRhZnnbDEWPuqGP/IaLN4vww +1lqkZltqZDddXvOTXN/tZmkkQHt2uP264vqJB2BkGzxOll5UDQ8V3gXwheuUGxYc +gVL4ZJSKfHnUp6oRafIBnQs5RBvqdj2wewzT8AyPWImRG6fkYvsub8qIFqG6mu4Y +ixAQ9oTgg/KOXYNsfYuLGswu/aNnAqMEjfMerSx7dDu7teETkWb+IQJtodOdE/LI +yO/puds1M+V2H0TD36zXRyvEnpfm5BTURkxM8dI6meR37/JGtObtjg+Gzjpu6HGm +sIYyhG8bvV0Vkuip4bEgBB6T39dt/DeElHABthUmzFZe/QC8j7IJjyCz40JWDJSo +8wPtOoLnLeX0ynD8x8A5NsQk3W9fgEtv0WG6Uahs7P8GEZ5Uh9GPvWQpAkjKv7OZ +XVHJdTBMJICbB1Bzr8Nl0qPfQrhFzTNBMjBEwyaBpzRiV1hdTB2YPJPbjQQtQGkO +vT/EsAEWwSqDrQrDCfGRl7mhjdAsVFMjERdJE3/2TctY8VnLaRzUTSGkpCKxl+V4 +CLrBi96N80pxer5eKYtt5gtLFw0gZeeeqb2VDj6ChVnUjJ9r0TXzyy8ztwpB8X5Y +mZUDASD1acdZZOiEp69WA6juQR0EGKQT5phh+k0HbziW+bXMM+7YwiRJzwX4obnd +wgF+wyHht3Rzaptv5JSZMkc1RGSFIdWUwEp+3Ik6DGywiTcVkU65TQ7CsQJjmmkL +AChG7tUBI4KmolT9D0rj3A90//wl3ACkCFq94m0BZOFiimUXFjqux135P5i37XRJ +/8wgWZ0nzmXdFyTkEJEessAMbCkMiDHwaT7Lbs+S0qFeobh4DD3tkONnqSNa7md4 +945Z9MJiapzD3P33TvKhyQ0wHe5W0z4= \ No newline at end of file diff --git a/tests/data/wire/certs_no_format_specified.xml b/tests/data/wire/certs_no_format_specified.xml index 4ab91a8597..14a9f6525e 100644 --- a/tests/data/wire/certs_no_format_specified.xml +++ b/tests/data/wire/certs_no_format_specified.xml @@ -1,85 +1,85 @@ 2012-11-30 - 12 + 1 - MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAUZcG9X+5aK8VZ -FY8eJV9j+RImq58wDQYJKoZIhvcNAQEBBQAEggEAn/hOytP/StyRuXHcqFq6x+Za -7gHfO8prXWdZW4e28NLt/x5ZOBHDDZ6buwwdXEZME0+RoiJvLqP2RNhZkEO8bkna -pS76xLZE4NXyfxkeEs1vJYis0WJdt/56uCzBuud2SBLuMWoAWgF5alokN0uFpVgm -CKCos+xv6Pisolc6geM8xQTYe6sLf5Z23LWftWfJqzuo/29glCCre7R80OLeZe5w -pN6XztbYz06nhVByC35To8Lm0akWAAKU7sfqM1Nty4P0rwUJPKXo42uN1GKYbDbF -x8piCAd+rs+q4Alu3qK/YaTPpMb2ECRMH6CYB8Klf/CbuWykkfS8zrsnpXT1kzCC -DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQInjJWFaJcZz2Agg0QX6NlJUH17o20 -90gfjWV01mPmzLKx71JT+hyzKr5vHywDSRI/mdb3RqA59ZrIKeyWr0HXEOuABlul -nxjc/Rfk1tiLQwh0iqlOjlMtRsxS6yDA0WNwK2Y9gaXcdgDm5Vioai18l4Pd0qzK -fsof5a/jEJyunW1CZK19QciwfQ2pS8QbRYgeLRZRft2I+kv6cWXlGS6YrMqKQC8t -QMxnXR4AuzVllPLbbIgtM3l9oS+6jl7jKyKogeroJ9FNLjoMBJLldRLGPRhkCmdJ -Z1m+s/BAVUH08qgj2kmHzucdULLjlRcmma9m/h91TcQCXHAavf7S+U9QwIyGRh83 -t4Y7EqbQ93mOgjajFzILSL7AT/irgJpDu6CJqMu3EMNDA0mjxn5Cdvj40sufL/g3 -UyBwqosmIwAPzNDmhPtTKvHaHfGY/k8WhoIYfAA5Lhq1z22/RODZOY0Ch2XyxQM4 -s35eppe6IhnwyMv6HfrCrqE/o/16OrvvbaFQTeTlMvU0P7MIR4pVW6tRq4NEa5Wx -JcvGutuMuzH1VMcqcKdc7wYyOqDOGU43kcV8PiALTIcqrhD8NDrKks1jSkyqQw2h -sJQckNaQIcCXkUQecQa2UGe0l4HDSJ5gAETSenjyLBKFHf3hxiWBpw446/bODgdk -0+oyreZqMpRz3vn9LC+Yt7SuVbTzRdx7nlKIiNvo8+btOuVm44evchLFHAq3Ni8D -c+tP/wss3K4Xdp+t5SvEY/nLIu11Lw44HDVMYTuNz3Ya9psL70ZLaLZM8NromnEl -CUMRNTPoOC7/KDRh2E9d6c1V4CC43wAsRhksGJnSYoiSVAhaVgLqVFQTsqNHxmcg -3Y9AEBVzm3fZg6+DxAYu+amb+r8lk0Pp+N1t6rVbKXhkbAAxg0UDO3pY8Xcz0Y3g -Qdd5rnHh1rJrehku7zTHvQaXEddUWmCoUGIXJ+bt4VOhErL6s5/j8GSG0xmfxgSE -jnGj4Jwd0Vv19uZjsBDQ54R88GcA9YX8r48gr9JAwplrQ50m9KX6GwQhDRYKN/Dh -zOt9DCUkqMqdi5T4v2qNTfkL7iXBMhsSkeYUQ/tFLyv4QQyli5uTUZ5FNXohOVAx -TNyV9+gcV5WiBR0Aje6rwPW3oTkrPnVfZCdBwt/mZjPNMO5Se7D/lWE33yYu7bJ+ -gaxRNynhEOB7RaOePzDjn7LExahFmTFV0sgQxwQ2BYsfI22cdkAf6qOxdK/kqiQm -lgzRpDjyPIFhaCCHnXyJdSqcHmDrCjcg2P6AVCDJGdFOBvupeJ7Kg7WV5EY7G6AU -ng16tyumJSMWSzSks9M0Ikop6xhq3cV+Q0OArJoreQ6eonezXjM9Y865xjF80nJL -V4lcRxdXfoKpXJwzc++pgkY9t55J0+cEyBvIXfKud1/HHOhewhoy5ATyi9LLM91n -iW1DaQXlvHZgE7GFMSCVLxy6ZopBbm9tF0NQDFi8zUtGulD3Gkoc/Bp+DWb2vsX4 -S8W9vByNvIz/SWOGNbEs2irTRXccMAL7JHJ+74bwZZi5DRrqyQWHCn/3Ls2YPI6z -lnfl15EE4G7g3+nrvP2lZFBXjsdG/U3HYi+tAyHkRN3oXvgnt9N76PoY8dlsNf6c -RuNqgk31uO1sX/8du3Jxz87MlzWiG3kbAHMvbcoCgy/dW4JQcM3Sqg5PmF8i9wD1 -ZuqZ7zHpWILIWd13TM3UDolQZzl+GXEX62dPPL1vBtxHhDgQicdaWFXa6DX3dVwt -DToWaAqrAPIrgxvNk5FHNCTEVTQkmCIL5JoinZSk7BAl8b085CPM6F7OjB5CR4Ts -V+6UaTUZqk+z+raL+HJNW2ds1r7+t8Po5CydMBS4M/pE7b/laUnbRu7rO8cqKucn -n+eYimib/0YuqZj9u2RXso4kzdOyIxGSGHkmSzYuoNRx80r+jHtcBBTqXk37t0FY -X5O7QItCE+uwV1Sa12yg2dgJ6vKRPCEVyMoYUBwNbKEcw1pjG9Em7HwjOZK0UrO1 -yKRz6kxffVKN9Naf7lOnXooVuedY/jcaZ2zCZtASlOe8iiQK5prM4sbMixMp9ovL -tTxy9E9kgvaI/mkzarloKPQGsk0WzuH+i39M3DOXrMf5HwfE+A55u1gnrHsxQlxp -z5acwN42+4ln6axs4aweMGAhyEtBW8TdsNomwuPk+tpqZXHI2pqS4/aVOk8R8VE7 -IqtBx2QBMINT79PDPOn3K6v9HEt9fUHJ2TWJvKRKfsu5lECJPJSJA8OQ7zzw6zQt -NXw8UhZRmNW0+eI5dykg+XsII7+njYa33EJ1Sy1Ni8ZT/izKfrKCwEm44KVAyUG5 -qUjghPPMNQY3D0qOl54DRfGVOxbHztUooblW+DnlLlpOy/+/B+H9Dscxosdx2/Mo -RftJOMlLqK7AYIYAlw1zvqZo0pf7rCcLSLt+6FrPtNZe6ULFUacZ3RqyTZovsZi5 -Ucda3bLdOHX6tKL21bRfN7L0/BjF6BJETpG3p+rBYOyCwO6HvdenpMm6cT02nrfP -QJtImjeW1ov6Pw02zNlIZAXFir78Z6AcMhV2iKEJxc1RMFBcXmylNXJmGlKYB3lJ -jWo6qumLewTz5vzRu0vZCmOf+bKmuyVxckPbrzP+4OHKhpm95Kp6sUn2pvh0S8H3 -w1pjfZ9+sIaVgMspfRPgoWTyZ0flFvAX6DHWYVejMebwfAqZaa+UAJJ6jWQbMNzo -ZtOhzCjV+2ZBYHvSiY7dtfaLwQJeMWEKIw32kEYv/Ts33n7dD/pAzZu0WCyfoqsQ -MEXhbZYSCQTJ8/gqvdlurWOJL091z6Uw810YVt+wMqsBo5lnMsS3GqkzgM2PVzuV -taddovr5CrWfAjQaFG8wcETiKEQFWS9JctKo0F+gwLwkVyc4fBSkjVmIliw1jXGu -Enf2mBei+n8EaRB2nNa/CBVGQM24WEeMNq+TqaMvnEonvMtCIEpuJAO/NzJ1pxw0 -9S+LKq3lFoIQoON5glsjV82WseAbFXmynBmSbyUY/mZQpjuNSnwLfpz4630x5vuV -VNglsZ8lW9XtSPh6GkMj+lLOCqJ5aZ4UEXDSYW7IaH4sPuQ4eAAUsKx/XlbmaOad -hgK+3gHYi98fiGGQjt9OqKzQRxVFnHtoSwbMp/gjAWqjDCFdo7RkCqFjfB1DsSj0 -TrjZU1lVMrmdEhtUNjqfRpWN82f55fxZdrHEPUQIrOywdbRiNbONwm4AfSE8ViPz -+SltYpQfF6g+tfZMwsoPSevLjdcmb1k3n8/lsEL99wpMT3NbibaXCjeJCZbAYK05 -rUw5bFTVAuv6i3Bax3rx5DqyQANS3S8TBVYrdXf9x7RpQ8oeb4oo+qn293bP4n5m -nW/D/yvsAJYcm3lD7oW7D369nV/mwKPpNC4B9q6N1FiUndvdFSbyzfNfSF9LV0RU -A/4Qm05HtE3PAUFYfwwP8MDg0HdltMn83VfqrEi/d76xlcxfoIh2RQQgqxCIS6KE -AExIY/hPYDVxApznI39xNOp7IqdPEX3i7Cv7aHeFAwbhXYMNnkfFJJTkHRdcRiJ/ -RE1QPlC7ijH+IF02PE/seYg4GWrkeW3jvi+IKQ9BPBoYIx0P+7wHXf4ZGtZMourd -N4fdwzFCDMFkS7wQC/GOqZltzF/gz1fWEGXRTH3Lqx0iKyiiLs2trQhFOzNw3B7E -WxCIUjRMAAJ6vvUdvoFlMw8WfBkzCVple4yrCqIw6fJEq8v0q8EQ7qKDTfyPnFBt -CtQZuTozfdPDnVHGmGPQKUODH/6Vwl+9/l7HDvV8/D/HKDnP581ix1a3bdokNtSK -7rBfovpzYltYGpVxsC6MZByYEpvIh5nHQouLR4L3Je2wB3F9nBGjNhBvGDQlxcne -AAgywpOpQfvfsnYRWt2vlQzwhHUgWhJmGMhGMmn4oKc5su87G7yzFEnq/yIUMOm/ -X0Zof/Qm92KCJS7YkLzP1GDO9XPMe+ZHeHVNXhVNCRxGNbHCHB9+g9v090sLLmal -jpgrDks19uHv0yYiMqBdpstzxClRWxgHwrZO6jtbr5jeJuLVUxV0uuX76oeomUj2 -mAwoD5cB1U8W9Ew+cMjp5v6gg0LTk90HftjhrZmMA0Ll6TqFWjxge+jsswOY1SZi -peuQGIHFcuQ7SEcyIbqju3bmeEGZwTz51yo8x2WqpCwB1a4UTngWJgDCySAI58fM -eRL6r478CAZjk+fu9ZA85B7tFczl3lj0B4QHxkX370ZeCHy39qw8vMYIcPk3ytI0 -vmj5UCSeQDHHDcwo54wi83IFEWUFh18gP4ty5Tfvs6qv7qd455UQZTAO7lwpdBlp -MJGlMqBHjDLGyY80p+O4vdlQBZ1uMH+48u91mokUP8p+tVVKh7bAw/HPG+SQsuNR -DXF+gTm/hRuY7IYe3C7Myzc8bDTtFw6Es9BLAqzFFAMjzDVz7wY1rnZQq4mmLcKg -AAMJaqItipKAroYIntXXJ3U8fsUt03M= + MIIOgwYJKoZIhvcNAQcDoIIOdDCCDnACAQIxggEwMIIBLAIBAoAU08PI+CBUqOd4 +Nbte7MLw2qCYn1UwDQYJKoZIhvcNAQEBBQAEggEASTTfHNyY+9hdXd+Eqtqk+yPb +RA7rRXWR8tQAJsdy3zAlu8WHymq945fnsf0bAW4mODIPYhhevmdo5VaI54AzAWhk +EfJvtRQlZZEMGZVKgUSwP4AG6cFaSnJuAYbi27nffM45PgD26O2WjOhnmM7minEC +31/wUoxjxVOxIc8x+Ngo+TquyBeaK1iXcchwIUnbM0xRYMfccOAEhe/iytKFPzdg +DJbDk+KbVGaUuUfhF+o4mMyJNezMUFxWkePcUgP12li57GTJSIyi8OQaFUu1qh0L +KzQ2sYl8U0WmWQBhXqvuug47WI/6XrRDpKslIV1aV4XxD1Or6H3nf0fULjQZajCC +DTUGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQI+4Ch/cEogOSAgg0QvlelG9yDK2GE +XX1wn8Xw0wCt+zIceXs8C6QuRSmZLEkZVv8Y+duMwi2A0tcg63HOmY2AfIPvTTt8 +eto3YwIklrfF20jBvCg/pT3kfm6TICWmMNd5XesTq8UNmkqzJQQ84L3Kbs/ix2pG +9RaeXkrg0VO7FBDVH8b+jIT9IVDAEXgBQVefcCImVZ9L2hQWNABFrFXAQSTKjfFJ +IEOfXUhTiH434V1RKJczhFiH5SNZ0kbaRjmaQkXqbXQ5kKoq8VNkmFc6vPCclTmq +QJFfIUTepljWW/HuVkUycNYQQkblmWNF9FEwSx++x3Tz1FLR3UlzOkJCqr+tS3jv +WFnI16VlOHaaHA++YKhW1PUujJcEdZaXBE0FC6JZF7IOAOjSdLSmRL9yU95erfgZ +hRo2FB8EWVZitIG+DPU9vU59chGpqXYzZU4/aTpedGeWSZ9GFXRqwb6htmajjTWu +l5fIME3hWt7kcejpuXCTDcdG4YcbngZu4hcepMrUhm9g2BdmIDb1YiB7290PMop8 +4nNo97tSBvhzk300cg6+pfxy1iAv3++g/ggOI+Y/gFmgN88mmBMWm0+mocJ0SZGY +3+8K/8pDpJpfAAXSjayl7T2UXUdJe8fpOtetiHUr2zIbZXlM4IQw+0UMAVjTiaRT +BIDGoPEcpCcxqPlSTTEie166uzzPXG9skVgennjN6YopwMC/WPaFRJu/eTlQOqlB +EqvK9TKJG8u2yp00J04MGYXluY4l/o3/KLpT0mCOeOJm3KerfwQ/jU2oHHmvIATN +XYy32ULqx/CjL+N3ax0Nu+UrgMQPcVhrTN/7lnZpFLYwXetGzH/4jdNfIfTc4yGn +0GlVT6cVgJyV8wyYpbqCxHtCW83II8vXLjTfeIffHBoJU0fMMPWEIxRuMQSksm0H +F1u/rfGVSXnueshbJUD3pnvTiLPuWcOexSxP+B8BCNfi21jX5Ha+U9RKrKbHc4h9 +PkiWxU6ZEqCBkdP9ssKnmMKMsrC7sZRoYziHNeqlZp/GFQmkI+DeFlqSPn3Lv9Or +HF3bZokZCf0RGEkZDPrigaiEoL7PH/TtVZF8miL4JCLB0FVB08vWeeP5zjQT4H6J +jSC2pw+5bA2UWGshgsKKAJJihYcOuybtzglh7nqmSSZcszz3GyuDhdR8KDrYwChU +Hn13+rSWAbbqtxSyPc5fd22Q4Do2aD6PVdRadHjG0qeE7Dq46YHT3Z9KF0nQTLk8 +uYq8hL5+jQEgTnUB0yJTKdEcg05TyrMfNHWuM1pru0bqpf25vpwP5t+Sd/vgWJNc +XtRLWrMdYBuSG9zOyLaH7bj0rcMhN3ULisKej9IT/xHOWSXXZjNoe1P3q9fvtMbg +ZXAale/xJ6rXq6mLvZXivJfQJkPbSV7fByPPKO6TMnHbNEgLOGO3XtHEwC24JKup +C0ohq03QqQHEisS9Mk5LvWmSchXR3/7vCtJFyOemQom7nCy8cx4Y1JGmZ4SGSaEs +QZs7GC7Ftb/X82LRuknvS19ApOVFEs4/8t+LviD3x7Z9quVv+fZvydhzNKGRR6kQ +fYZwK7rqqkvuFKgXqNbzlrtlUqOUPXJgdO7QHOtU8z+k2NzBWfOp6j+Ef8rc3GDU +HSVZZ/Lz0RWedxRC1zoZJSol7ckMxIGIpDhtb9xgDmaGKILWOR9k+wG6+7ywQ2LE +PB3myDOclvKUDyb/DqwRS9ch9yyYSmz8WXTgdSeyOjp8QT2JQuuOOhoooHuKSxAk ++7v/Fh5bNGtjHByuzMYSdLcWsLX+UohpDoc1heVgUA3R6EuIOJTA0nC653YmqIBp +R5rsT+esub/EndweZTacmc2nDJxTKdZgMvdwhnsOZZBGsOaD7MXAS5vCsze+PQmY +4+VqqWPASaclV6CygN4qSxmww6mVgmAgWVmJqfa6vOyb3zhx68TkNEp9rxJFcJSJ +NiTTvWe0nF+o2/a1HZ8rZFdf65KsqGSiqu/6HoUuFzWLxRCqSjB9RkfSqrDVAVim +pwL46zGRsqZV+5xrRQlxINNUbg/D11zcp1zdhQvhDrpBoLMjK7AaxA5msPYFy6Gm +KMRAG2kyi802W5CPZWkbiEoUA8vkiICuxN+Pdh146zk9Ngl4PC3YpNCMtXK11ifd +hYxmWqEuQ2AcdVTckosaWrFMn5MqEcR0aAXZbnjIMgTZ6SMYJBZMWjzJhe/UQjTo +vICK7KAH82chpW2hG2I67z7e1Nv930RyL6JbYI8mSqgccPBzOBUhpHvKDM59z8Nc +eStEYDdOcMz8P+c/H3Bh4WsyMWMOwWvjyy6GX5Bpl5z94tWFRn6W4FK5iDqp+HHm +v5W1+hlFBxXtuzBcSQntcj8LoExJ2mK6BhZkaeAESMqPvNeNFmhEVUGq0/+c7T4I +L+1YkQPcm/nIpwW/ITmkGmi5n5VsvbJFDbQe+h9LI2aqvWtzA0YT5Ed77Glbdbgq +qB8EyXdr1BsBb7s7bbXm4Wf8UJkCZESg8iQExkUk8HqMJRxjctjma0DyyKVi4j8Q ++BA1EYBEX37641S+ZR9fYmQeuULGkf3d+w/ttgvm6YDZivsZYWkTscX+lUtoHhWN +5EOAfllI0/DaGX15mGONMV8YA1PoCNEX3yKJ5tVGkxxUPK+Op7ZHvJmtb1fPMRRY +z+evQ+NTXTZZzdr3Kfs4yYbuXG4e1odm2v/zBKG7JF3yWPMtXZZiMks/BkaXTq1P +LrB0VxGcMsLeQ5HbbWJtchyCWyy63CNNbfYNohjxru52DjaAQlDKQT9pOiSmGJzb +7+hNnKYnOfo6Du2ljz7C9C4mxnRJsRA2O9Cw66J5XPy1W+2+RmvP72jXwoFWYzPq +jxNs2wxOYQjEDpXBTmCbW58F5cTbSTk3D15iCtYtf31tpuPpHEnz+2OvrX0WhygN +esZJnln2Tu2ut1pVhAuJDLZTj24Y4MP0nmDINuLDAkFji0CwjACvW7M9SbIOLLYU ++5JHHjB7wqaTXWFzpt/ZKXMXlwCzWjo3pDERbrpYbwS3GHqmtcyIZK4EA7Ulka5Y +7rLPWS5eKcjX3tp2FyX5pD52TpuUMPAk6vyefX+NznP7opvJpusHbkschojFVRDA +zHIpIGeWjYcWLk5YTPagzH8o+4ci1OEk+OMc8i6PxkQDeBw1RiCAFfBnKPCSEtFk +KJlw7fspk3/chA6mmvOHjkrQmUhUuDxAVGCVxl0K5LU3Y2IQxKGtCJk5YO4XD2e7 +5b0Ub+wy4Bb0l+z8HjuqEypFXDpQTd80NbhStZBgf2cB01elsqmKD9sT9wpFGKbC +VaatDLsLx4XrBG6ueoFKBgFL6l7afEPct8wuSoUrX5MAGlge5xzQYAD5spLlEa9G +Dt2KiPCsZcqWiaHiw5vk849FXUcfFfGl+0rEKhzcfUn3zkL1mGfqZ8Nf7qjMXdMy +dbUUQYMZXtMtK3fnYBnavgaUcu0bZ7Av+GVTQvDxfpzSeMW8lK7Ko6mINFQVC8dx +TEKWX+eApFUnTb11vNNxwxdOB2l5N+kfNLnVMhuYd7l8IHQxMMQTcf8hYu0owry6 +JkIdkhnF1kXVC2YWxo4VrDPwzkBWZE28ygBNhWgKCRhZnnbDEWPuqGP/IaLN4vww +1lqkZltqZDddXvOTXN/tZmkkQHt2uP264vqJB2BkGzxOll5UDQ8V3gXwheuUGxYc +gVL4ZJSKfHnUp6oRafIBnQs5RBvqdj2wewzT8AyPWImRG6fkYvsub8qIFqG6mu4Y +ixAQ9oTgg/KOXYNsfYuLGswu/aNnAqMEjfMerSx7dDu7teETkWb+IQJtodOdE/LI +yO/puds1M+V2H0TD36zXRyvEnpfm5BTURkxM8dI6meR37/JGtObtjg+Gzjpu6HGm +sIYyhG8bvV0Vkuip4bEgBB6T39dt/DeElHABthUmzFZe/QC8j7IJjyCz40JWDJSo +8wPtOoLnLeX0ynD8x8A5NsQk3W9fgEtv0WG6Uahs7P8GEZ5Uh9GPvWQpAkjKv7OZ +XVHJdTBMJICbB1Bzr8Nl0qPfQrhFzTNBMjBEwyaBpzRiV1hdTB2YPJPbjQQtQGkO +vT/EsAEWwSqDrQrDCfGRl7mhjdAsVFMjERdJE3/2TctY8VnLaRzUTSGkpCKxl+V4 +CLrBi96N80pxer5eKYtt5gtLFw0gZeeeqb2VDj6ChVnUjJ9r0TXzyy8ztwpB8X5Y +mZUDASD1acdZZOiEp69WA6juQR0EGKQT5phh+k0HbziW+bXMM+7YwiRJzwX4obnd +wgF+wyHht3Rzaptv5JSZMkc1RGSFIdWUwEp+3Ik6DGywiTcVkU65TQ7CsQJjmmkL +AChG7tUBI4KmolT9D0rj3A90//wl3ACkCFq94m0BZOFiimUXFjqux135P5i37XRJ +/8wgWZ0nzmXdFyTkEJEessAMbCkMiDHwaT7Lbs+S0qFeobh4DD3tkONnqSNa7md4 +945Z9MJiapzD3P33TvKhyQ0wHe5W0z4= - + \ No newline at end of file diff --git a/tests/data/wire/ext_conf-no_gs_metadata.xml b/tests/data/wire/ext_conf-no_gs_metadata.xml index 605e484254..ef5d3a1647 100644 --- a/tests/data/wire/ext_conf-no_gs_metadata.xml +++ b/tests/data/wire/ext_conf-no_gs_metadata.xml @@ -19,7 +19,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf.xml b/tests/data/wire/ext_conf.xml index 54d785159f..099ebacf30 100644 --- a/tests/data/wire/ext_conf.xml +++ b/tests/data/wire/ext_conf.xml @@ -19,7 +19,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_additional_locations.xml b/tests/data/wire/ext_conf_additional_locations.xml index 8f5e746b06..20c7fb873d 100644 --- a/tests/data/wire/ext_conf_additional_locations.xml +++ b/tests/data/wire/ext_conf_additional_locations.xml @@ -24,7 +24,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_autoupgrade.xml b/tests/data/wire/ext_conf_autoupgrade.xml index 77a201ad9c..74acf0af7e 100644 --- a/tests/data/wire/ext_conf_autoupgrade.xml +++ b/tests/data/wire/ext_conf_autoupgrade.xml @@ -21,7 +21,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml index 44cad87819..afa27c6797 100644 --- a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml +++ b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml @@ -21,7 +21,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml b/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml index b26395ec23..f705c2f1b0 100644 --- a/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml +++ b/tests/data/wire/ext_conf_dependencies_with_empty_settings.xml @@ -25,7 +25,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml b/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml index a1af74f784..9575139a62 100644 --- a/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml +++ b/tests/data/wire/ext_conf_in_vm_artifacts_profile.xml @@ -20,7 +20,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml b/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml index cd5bb3d3e9..a0c87cfb6d 100644 --- a/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml +++ b/tests/data/wire/ext_conf_in_vm_empty_artifacts_profile.xml @@ -20,7 +20,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_in_vm_metadata.xml b/tests/data/wire/ext_conf_in_vm_metadata.xml index 9a4f89cb81..ff5e92ae25 100644 --- a/tests/data/wire/ext_conf_in_vm_metadata.xml +++ b/tests/data/wire/ext_conf_in_vm_metadata.xml @@ -21,7 +21,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_internalversion.xml b/tests/data/wire/ext_conf_internalversion.xml index 44cad87819..afa27c6797 100644 --- a/tests/data/wire/ext_conf_internalversion.xml +++ b/tests/data/wire/ext_conf_internalversion.xml @@ -21,7 +21,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml b/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml index f9c95d694a..bede284e7e 100644 --- a/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml +++ b/tests/data/wire/ext_conf_invalid_and_valid_handlers.xml @@ -22,11 +22,11 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_invalid_vm_metadata.xml b/tests/data/wire/ext_conf_invalid_vm_metadata.xml index 7c766220e5..4eb35e87ae 100644 --- a/tests/data/wire/ext_conf_invalid_vm_metadata.xml +++ b/tests/data/wire/ext_conf_invalid_vm_metadata.xml @@ -21,7 +21,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_multiple_extensions.xml b/tests/data/wire/ext_conf_multiple_extensions.xml index 5845a179f5..bde568bd1b 100644 --- a/tests/data/wire/ext_conf_multiple_extensions.xml +++ b/tests/data/wire/ext_conf_multiple_extensions.xml @@ -25,22 +25,22 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIIB4AYJKoZIhvcNAQcDoIIB0TCCAc0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEANYey5W0qDqC6RHZlVnpLp2dWrMr1Rt5TCFkOjq1jU4y2y1FPtsTTKq9Z5pdGb/IHQo9VcT+OFglO3bChMbqc1vgmk4wkTQkgJVD3C8Rq4nv3uvQIux+g8zsa1MPKT5fTwG/dcrBp9xqySJLexUiuJljmNJgorGc0KtLwjnad4HTSKudDSo5DGskSDLxxLZYx0VVtQvgekOOwT/0C0pN4+JS/766jdUAnHR3oOuD5Dx7/c6EhFSoiYXMA0bUzH7VZeF8j/rkP1xscLQRrCScCNV2Ox424Y4RBbcbP/p69lDxGURcIKLKrIUhQdC8CfUMkQUEmFDLcOtxutCTFBZYMJzBbBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECCuc0a4Gl8PAgDgcHekee/CivSTCXntJiCrltUDob8cX4YtIS6lq3H08Ar+2tKkpg5e3bOkdAo3q2GfIrGDm4MtVWw==","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIIB4AYJKoZIhvcNAQcDoIIB0TCCAc0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEANYey5W0qDqC6RHZlVnpLp2dWrMr1Rt5TCFkOjq1jU4y2y1FPtsTTKq9Z5pdGb/IHQo9VcT+OFglO3bChMbqc1vgmk4wkTQkgJVD3C8Rq4nv3uvQIux+g8zsa1MPKT5fTwG/dcrBp9xqySJLexUiuJljmNJgorGc0KtLwjnad4HTSKudDSo5DGskSDLxxLZYx0VVtQvgekOOwT/0C0pN4+JS/766jdUAnHR3oOuD5Dx7/c6EhFSoiYXMA0bUzH7VZeF8j/rkP1xscLQRrCScCNV2Ox424Y4RBbcbP/p69lDxGURcIKLKrIUhQdC8CfUMkQUEmFDLcOtxutCTFBZYMJzBbBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECCuc0a4Gl8PAgDgcHekee/CivSTCXntJiCrltUDob8cX4YtIS6lq3H08Ar+2tKkpg5e3bOkdAo3q2GfIrGDm4MtVWw==","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIIBwAYJKoZIhvcNAQcDoIIBsTCCAa0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEABILhQPoMx3NEbd/sS0xAAE4rJXwzJSE0bWr4OaKpcGS4ePtaNW8XWm+psYR9CBlXuGCuDVlFEdPmO2Ai8NX8TvT7RVYYc6yVQKpNQqO6Q9g9O52XXX4tBSFSCfoTzd1kbGC1c2wbXDyeROGCjraWuGHd4C9s9gytpgAlYicZjOqV3deo30F4vXZ+ZhCNpMkOvSXcsNpzTzQ/mskwNubN8MPkg/jEAzTHRpiJl3tjGtTqm00GHMqFF8/31jnoLQeQnWSmY+FBpiTUhPzyjufIcoZ+ueGXZiJ77xyH2Rghh5wvQM8oTVy2dwFQGeqjHOVgdgRNi/HgfZhcdltaQ8kjYDA7BgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECHPM0ZKBn+aWgBiVPT7zlkJA8eGuH7bNMTQCtGoJezToa24=","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIIBwAYJKoZIhvcNAQcDoIIBsTCCAa0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEABILhQPoMx3NEbd/sS0xAAE4rJXwzJSE0bWr4OaKpcGS4ePtaNW8XWm+psYR9CBlXuGCuDVlFEdPmO2Ai8NX8TvT7RVYYc6yVQKpNQqO6Q9g9O52XXX4tBSFSCfoTzd1kbGC1c2wbXDyeROGCjraWuGHd4C9s9gytpgAlYicZjOqV3deo30F4vXZ+ZhCNpMkOvSXcsNpzTzQ/mskwNubN8MPkg/jEAzTHRpiJl3tjGtTqm00GHMqFF8/31jnoLQeQnWSmY+FBpiTUhPzyjufIcoZ+ueGXZiJ77xyH2Rghh5wvQM8oTVy2dwFQGeqjHOVgdgRNi/HgfZhcdltaQ8kjYDA7BgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECHPM0ZKBn+aWgBiVPT7zlkJA8eGuH7bNMTQCtGoJezToa24=","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIIB4AYJKoZIhvcNAQcDoIIB0TCCAc0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEAGSKUDRN64DIB7FS7yKXa07OXaFPhmdNnNDOAOD3/WVFb9fQ2bztV46waq7iRO+lpz7LSerRzIe6Kod9zCfK7ryukRomVHIfTIBwPjQ+Otn8ZD2aVcrxR0EI95x/SGyiESJRQnOMbpoVSWSu2KJUCPfycQ4ODbaazDc61k0JCmmRy12rQ4ttyWKhYwpwI2OYFHGr39N/YYq6H8skHj5ve1605i4P9XpfEyIwF5BbX59tDOAFFQtX7jzQcz//LtaHHjwLmysmD9OG5XyvfbBICwSYJfMX9Jh1aahLwcjL8Bd0vYyGL1ItMQF5KfDwog4+HLcRGx+S02Yngm3/YKS9DmzBbBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECFGLNfK0bO5OgDgH90bRzqfgKK6EEh52XJfHz9G/ZL1mqP/ueWqo95PtEFo1gvI7z25V/pT0tBGibXgRhQXLFmwVTA==","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIIB4AYJKoZIhvcNAQcDoIIB0TCCAc0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEAGSKUDRN64DIB7FS7yKXa07OXaFPhmdNnNDOAOD3/WVFb9fQ2bztV46waq7iRO+lpz7LSerRzIe6Kod9zCfK7ryukRomVHIfTIBwPjQ+Otn8ZD2aVcrxR0EI95x/SGyiESJRQnOMbpoVSWSu2KJUCPfycQ4ODbaazDc61k0JCmmRy12rQ4ttyWKhYwpwI2OYFHGr39N/YYq6H8skHj5ve1605i4P9XpfEyIwF5BbX59tDOAFFQtX7jzQcz//LtaHHjwLmysmD9OG5XyvfbBICwSYJfMX9Jh1aahLwcjL8Bd0vYyGL1ItMQF5KfDwog4+HLcRGx+S02Yngm3/YKS9DmzBbBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECFGLNfK0bO5OgDgH90bRzqfgKK6EEh52XJfHz9G/ZL1mqP/ueWqo95PtEFo1gvI7z25V/pT0tBGibXgRhQXLFmwVTA==","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIIEzAYJKoZIhvcNAQcDoIIEvTCCBLkCAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEAFqLDBFGeuglluYmZb0Zw+ZlMiMIws9/LgmurVSRUTU/nSleIc9vOLcukfMeCpMativzHe23iDFy6p3XDkViNcuzqbhlPq5LQsXXg+xaUrrg8Xy+q7KUQdxzPdNBdpgkUh6yE2EFbqVLQ/7x+TkkSsw35uPT0nEqSj3yYFGH7X/NJ49fKU+ZvFDp/N+o54UbE6ZdxlHFtz6NJFxx5w4z5adQ8DgnUyS0bJ2denolknODfSW2D2alm00SXlI88CAjeHgEDkoLCduwkrDkSFAODcAiEHHX8oYCnfanatpjm7ZgSutS9y7+XUnGWxDYoujHDI9bbV0WpyDcx/DIrlZ+WcTCCA0UGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQIrL18Lbp1qU6AggMgGklvozqr8HqYP+DwkvxdwHSpo+23QFxh70os+NJRtVgBv5NjPEziXo3FpXHMPvt0kp0IwXbwyy5vwnjCTA2sQOYgj77X6RmwF6+1gt2DIHDN1Q6jWzdcXZVHykSiF3gshbebRKO0hydfCaCyYL36HOZ8ugyCctOon5EflrnoOYDDHRbsr30DAxZCAwGOGZEeoU2+U+YdhuMvplnMryD1f6b8FQ7jXihe/zczAibX5/22NxhsVgALdsV5h6hwuTbspDt3V15/VU8ak7a4xxdBfXOX0HcQI86oqsFr7S7zIveoQHsW+wzlyMjwi6DRPFpz2wFkv5ivgFEvtCzDQP4aCqGI8VdqzR7aUDnuqiSCe/cbmv5mSmTYlDPTR03WS0IvgyeoNAzqCbYQe44AUBEZb/yT8Z3XxwW0GzcPMZQ0XjpcZiaKAueN9V8nJgNCEDPTJqpSjy+tEHmSgxn70+E57F0vzPvdQ3vOEeRj8zlBblHd4uVrhxdBMUuQ73JEQEha5rz0qcUy04Wmjld1rBuX6pdOqrArAYzTLJbIuLqDjlnYFsHLs9QBGvIEb9VFOlAm5JW8npBbIRHXqPfwZWs60+uNksTtsN3MxBxUWJPOByb4xRNx+nRpTOvfKKFlgq1ReK5bGSTCB7x0Ft3+T42LOQDrBPyxxtGzWs+aq05qFgI4n0h8X82wxJflK+kUdwvvG/ZY5MM+/le2zOrUeyzvxXsHoRetgg+DOk7v+v7VsuT1KuvTXvgzxoOFF3/T2pNPpE3h6bbP2BUqZ2yzPNziGFslywDLZ8W3OUZoQejGqobRePdgUoBi5q2um/sPnq81kOJ/qhIOVq581ZD4IQWLot8eK8vX0G/y7y71YelRR51cUfgR5WvZZf6LvYw+GpwOtSViugl9QxGCviSLgHTJSSEm0ijtbzKhwP4vEyydNDrz8+WYB8DNIV7K2Pc8JyxAM03FYX30CaaJ40pbEUuVQVEnkAD2E//29/ZzgNTf/LBMzMEP5j7wlL+QQpmPAtL/FlBrOJ4nDEqsOOhWzI1MN51xRZuv3e2RqzVPiSmrKtk=","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIIEzAYJKoZIhvcNAQcDoIIEvTCCBLkCAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEH3vWjYIrceWQigVQwoS8z0wDQYJKoZIhvcNAQEBBQAEggEAFqLDBFGeuglluYmZb0Zw+ZlMiMIws9/LgmurVSRUTU/nSleIc9vOLcukfMeCpMativzHe23iDFy6p3XDkViNcuzqbhlPq5LQsXXg+xaUrrg8Xy+q7KUQdxzPdNBdpgkUh6yE2EFbqVLQ/7x+TkkSsw35uPT0nEqSj3yYFGH7X/NJ49fKU+ZvFDp/N+o54UbE6ZdxlHFtz6NJFxx5w4z5adQ8DgnUyS0bJ2denolknODfSW2D2alm00SXlI88CAjeHgEDkoLCduwkrDkSFAODcAiEHHX8oYCnfanatpjm7ZgSutS9y7+XUnGWxDYoujHDI9bbV0WpyDcx/DIrlZ+WcTCCA0UGCSqGSIb3DQEHATAUBggqhkiG9w0DBwQIrL18Lbp1qU6AggMgGklvozqr8HqYP+DwkvxdwHSpo+23QFxh70os+NJRtVgBv5NjPEziXo3FpXHMPvt0kp0IwXbwyy5vwnjCTA2sQOYgj77X6RmwF6+1gt2DIHDN1Q6jWzdcXZVHykSiF3gshbebRKO0hydfCaCyYL36HOZ8ugyCctOon5EflrnoOYDDHRbsr30DAxZCAwGOGZEeoU2+U+YdhuMvplnMryD1f6b8FQ7jXihe/zczAibX5/22NxhsVgALdsV5h6hwuTbspDt3V15/VU8ak7a4xxdBfXOX0HcQI86oqsFr7S7zIveoQHsW+wzlyMjwi6DRPFpz2wFkv5ivgFEvtCzDQP4aCqGI8VdqzR7aUDnuqiSCe/cbmv5mSmTYlDPTR03WS0IvgyeoNAzqCbYQe44AUBEZb/yT8Z3XxwW0GzcPMZQ0XjpcZiaKAueN9V8nJgNCEDPTJqpSjy+tEHmSgxn70+E57F0vzPvdQ3vOEeRj8zlBblHd4uVrhxdBMUuQ73JEQEha5rz0qcUy04Wmjld1rBuX6pdOqrArAYzTLJbIuLqDjlnYFsHLs9QBGvIEb9VFOlAm5JW8npBbIRHXqPfwZWs60+uNksTtsN3MxBxUWJPOByb4xRNx+nRpTOvfKKFlgq1ReK5bGSTCB7x0Ft3+T42LOQDrBPyxxtGzWs+aq05qFgI4n0h8X82wxJflK+kUdwvvG/ZY5MM+/le2zOrUeyzvxXsHoRetgg+DOk7v+v7VsuT1KuvTXvgzxoOFF3/T2pNPpE3h6bbP2BUqZ2yzPNziGFslywDLZ8W3OUZoQejGqobRePdgUoBi5q2um/sPnq81kOJ/qhIOVq581ZD4IQWLot8eK8vX0G/y7y71YelRR51cUfgR5WvZZf6LvYw+GpwOtSViugl9QxGCviSLgHTJSSEm0ijtbzKhwP4vEyydNDrz8+WYB8DNIV7K2Pc8JyxAM03FYX30CaaJ40pbEUuVQVEnkAD2E//29/ZzgNTf/LBMzMEP5j7wlL+QQpmPAtL/FlBrOJ4nDEqsOOhWzI1MN51xRZuv3e2RqzVPiSmrKtk=","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_no_public.xml b/tests/data/wire/ext_conf_no_public.xml index 63e7013cc0..5ee9635cca 100644 --- a/tests/data/wire/ext_conf_no_public.xml +++ b/tests/data/wire/ext_conf_no_public.xml @@ -39,7 +39,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK"}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK"}}]} diff --git a/tests/data/wire/ext_conf_required_features.xml b/tests/data/wire/ext_conf_required_features.xml index 798ba5c52d..2dedcdbab2 100644 --- a/tests/data/wire/ext_conf_required_features.xml +++ b/tests/data/wire/ext_conf_required_features.xml @@ -32,7 +32,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_rsm_version.xml b/tests/data/wire/ext_conf_rsm_version.xml index 806063541a..d76ac6453c 100644 --- a/tests/data/wire/ext_conf_rsm_version.xml +++ b/tests/data/wire/ext_conf_rsm_version.xml @@ -25,7 +25,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_sequencing.xml b/tests/data/wire/ext_conf_sequencing.xml index 3c9a2ddd79..99ffd402c3 100644 --- a/tests/data/wire/ext_conf_sequencing.xml +++ b/tests/data/wire/ext_conf_sequencing.xml @@ -23,12 +23,12 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_settings_case_mismatch.xml b/tests/data/wire/ext_conf_settings_case_mismatch.xml index 71286c5bf5..cb7c82d73b 100644 --- a/tests/data/wire/ext_conf_settings_case_mismatch.xml +++ b/tests/data/wire/ext_conf_settings_case_mismatch.xml @@ -25,27 +25,27 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_upgradeguid.xml b/tests/data/wire/ext_conf_upgradeguid.xml index 2ec7147bb8..7cd013b5b5 100644 --- a/tests/data/wire/ext_conf_upgradeguid.xml +++ b/tests/data/wire/ext_conf_upgradeguid.xml @@ -19,7 +19,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_version_missing_in_agent_family.xml b/tests/data/wire/ext_conf_version_missing_in_agent_family.xml index 3f81ed1195..eee17a4ef1 100644 --- a/tests/data/wire/ext_conf_version_missing_in_agent_family.xml +++ b/tests/data/wire/ext_conf_version_missing_in_agent_family.xml @@ -23,7 +23,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_version_missing_in_manifest.xml b/tests/data/wire/ext_conf_version_missing_in_manifest.xml index c750d5d3a2..4d3ebd70ce 100644 --- a/tests/data/wire/ext_conf_version_missing_in_manifest.xml +++ b/tests/data/wire/ext_conf_version_missing_in_manifest.xml @@ -31,7 +31,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_version_not_from_rsm.xml b/tests/data/wire/ext_conf_version_not_from_rsm.xml index 9da8f5da72..9636c80d43 100644 --- a/tests/data/wire/ext_conf_version_not_from_rsm.xml +++ b/tests/data/wire/ext_conf_version_not_from_rsm.xml @@ -25,7 +25,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml b/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml index 384723f461..e7017c4ce3 100644 --- a/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml +++ b/tests/data/wire/ext_conf_vm_not_enabled_for_rsm_upgrades.xml @@ -25,7 +25,7 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml b/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml index 8d76b732c9..a9aa7c49a5 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_depends_on_for_single_handler.xml @@ -28,16 +28,16 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]}
- {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml b/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml index 43e1e02819..4de9a4cebb 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_runtime_settings_same_plugin.xml @@ -21,8 +21,8 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml b/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml index 7351c8bf56..a1cc86381b 100644 --- a/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml +++ b/tests/data/wire/invalid_config/ext_conf_multiple_settings_for_same_handler.xml @@ -21,10 +21,10 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml b/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml index dcf1014641..7220b59c72 100644 --- a/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml +++ b/tests/data/wire/invalid_config/ext_conf_plugin_settings_version_mismatch.xml @@ -19,10 +19,10 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml b/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml index 8a30ddbaf2..899d23398a 100644 --- a/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml +++ b/tests/data/wire/invalid_config/ext_conf_single_and_multi_config_settings_same_plugin.xml @@ -21,8 +21,8 @@ - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} - {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"BD447EF71C3ADDF7C837E84D630F3FAC22CCD22F","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} + {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"F6ABAA61098A301EBB8A571C3C7CF77F355F7FA9","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/trans_cert b/tests/data/wire/trans_cert index 35793e019f..c522a2f519 100644 --- a/tests/data/wire/trans_cert +++ b/tests/data/wire/trans_cert @@ -1,19 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIDEzCCAfugAwIBAgIUDcHXiRT74wOkLZYnyoZibT9+2G8wDQYJKoZIhvcNAQEL -BQAwGTEXMBUGA1UEAwwOTGludXhUcmFuc3BvcnQwHhcNMjIwODEyMTgzMTM5WhcN -MjQwODExMTgzMTM5WjAZMRcwFQYDVQQDDA5MaW51eFRyYW5zcG9ydDCCASIwDQYJ -KoZIhvcNAQEBBQADggEPADCCAQoCggEBAK/XWh+Djc2WYoJ/8FkZd8OV3V47fID5 -WV8hSBz/i/hVUKHhCWTQfE4VcQBGYFyK8lMKIBV7t6Bq05TQGuB8148HSjIboDx3 -Ndd0C/+lYcBE1izMrHKZYhcy7lSlEUk+y5iye0cA5k/dlJhfwoxWolw0E2dMOjlY -qzkEGJdyS6+hFddo696HzD7OYhxh1r50aHPWqY8NnC51487loOtPs4LYA2bd3HSg -ECpOtKzyJW+GP0H2vBa7MrXrZOnD1K2j2xb8nTnYnpNtlmnZPj7VYFsLOlsq547X -nFiSptPWslbVogkUVkCZlAqkMcJ/OtH70ZVjLyjFd6j7J/Wy8MrA7pECAwEAAaNT -MFEwHQYDVR0OBBYEFGXBvV/uWivFWRWPHiVfY/kSJqufMB8GA1UdIwQYMBaAFGXB -vV/uWivFWRWPHiVfY/kSJqufMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL -BQADggEBABjatix/q90u6X/Jar/UkKiL2zx36s4huPU9KG8F51g48cYRjrvpK4+H -K6avCGArl7h1gczaGS7LTOHFUU25eg/BBcKcXEO3aryQph2A167ip89UM55LxlnC -QVVV9HAnEw5qAoh0wlZ65fVN+SE8FdasYlbbbp7c4At/LZruSj+IIapZDwwJxcBk -YlSOa34v1Uay09+Hgu95dYQjI9txJW1ViRVlDpKbieGTzROI6s3uk+3rhxxlH2Zi -Z9UqNmPfH9UE1xgSk/wkMWW22h/x51qIRKAZ4EzmdHVXdT/BarIuHxtHH8hIPNSL -FjetCMVZNBej2HXL9cY5UVFYCG6JG0Q= +MIIDEzCCAfugAwIBAgIUToMqRt0z6FfqfiJhS1Hh+u2j3VEwDQYJKoZIhvcNAQEL +BQAwGTEXMBUGA1UEAwwOTGludXhUcmFuc3BvcnQwHhcNMjQwODAxMTYwOTU2WhcN +MjYwODAxMTYwOTU2WjAZMRcwFQYDVQQDDA5MaW51eFRyYW5zcG9ydDCCASIwDQYJ +KoZIhvcNAQEBBQADggEPADCCAQoCggEBAMs8jttzIHATj1BNs3r4cCOAMuVaM1b7 +Aw8D7Lz3rTxFieQCh1vLSFl1l9SQmO7rmh0OfEzIKK8jAU4wkLclgospKuYpB9ME +5QnXbLpXWYfW99V4safGvv9lGZztGKMd4ZT2it9QcpKEFFi6W7cjIyiUuyYMB0uI +IvA6s6tGs8LgN89Lx7HSTSR86QNPvRtTw0jlrr8nfM7EkaT9Q6xu6GjCp89wCx+h +IwcPtstSgfMo5P+3IO30L1wSM+CF1n+nD9M8E4wtcxhoWLuyAPhDsw5f7jKyHmRo +Nm9RxToM0ON67SmN2906i0NxzXWtuttww6KE/O6BEZKNlnp9ja3bnM8CAwEAAaNT +MFEwHQYDVR0OBBYEFNPDyPggVKjneDW7XuzC8NqgmJ9VMB8GA1UdIwQYMBaAFNPD +yPggVKjneDW7XuzC8NqgmJ9VMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL +BQADggEBAFuVgcimwPxgpwKNvyUKMY9VFa6UVZs/ky6FEEaxrKVAl2GZF9MoSTO5 +vXMdWYHtSF+RWYxCz5pt7Bv97zuEXvbino/JvsLrE8f265Woe2CdDOPiBCHWBOlH ++wM71Hoh0TX7V2TSumona6e0cqUPT7fbNdaNZm8ZHoUscbbPmamERH9Z9zUXWPLk +mtjwz17bvRriAMrglA/Dm3xHiEYBJv3+4FnOqPGfg9vZH6xfmrRwrF1Moj5jEZz5 +cN2N+vO8HCEqGMBCpSlsWq1c2r3NwLH0J3b6EL7X4jcVvpykKg3WmOZGdataYDk9 +0IHy8VyGiX7g3EJOAbbf12FjgLAt4NM= -----END CERTIFICATE----- diff --git a/tests/data/wire/trans_prv b/tests/data/wire/trans_prv index 17bdb07c65..876b8351b2 100644 --- a/tests/data/wire/trans_prv +++ b/tests/data/wire/trans_prv @@ -1,28 +1,28 @@ -----BEGIN PRIVATE KEY----- -MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCv11ofg43NlmKC -f/BZGXfDld1eO3yA+VlfIUgc/4v4VVCh4Qlk0HxOFXEARmBcivJTCiAVe7egatOU -0BrgfNePB0oyG6A8dzXXdAv/pWHARNYszKxymWIXMu5UpRFJPsuYsntHAOZP3ZSY -X8KMVqJcNBNnTDo5WKs5BBiXckuvoRXXaOveh8w+zmIcYda+dGhz1qmPDZwudePO -5aDrT7OC2ANm3dx0oBAqTrSs8iVvhj9B9rwWuzK162Tpw9Sto9sW/J052J6TbZZp -2T4+1WBbCzpbKueO15xYkqbT1rJW1aIJFFZAmZQKpDHCfzrR+9GVYy8oxXeo+yf1 -svDKwO6RAgMBAAECggEAEwBogsNKjY7Usll09Yvk/0OwmkA/YgiP+dG04z1SONGv -Vu7kfvpwlFeI0IjKXPW+3e5YLTojS7h/iLM8VEnpWVFmWSfXFvGi5ddqfIO4nnhR -1KGBeRjOGsesLYVw6sNYaPXQkImuWa8OIbEnatbp0KDn/9+i4xOL3StuJN97Ak1u -Giq4gwFbag4/QctBZ+5P0t77W+uzWcvEyNgK6rndfPWxqwmJSBFchY6O3s1l6NY8 -vSmyYhYRgFXEgX0nDumGfEXsF1Cj9tzYT2DUZc2f6+UCtXCD49qnoKawLhCrl5Uh -QGs82TR5FSn7zLW4MbFody6p8UDw6pYiWlPPR7fmgQKBgQDO3j5RCXf0276K56BA -rFpOOmivq3fxElRVCSRRRVPKHDYKQiPKnNXoa/pSl8a6CfjJaJzkNj+wTEFdQRGm -Ia123kR/1S21/zgGZNmbUGby+A4fKxBY101/JQweucRN7aw3XLKPXhOL1NPyKdWh -dARvjZvEl1qR6s07Y6jZgpkGqQKBgQDZmqVWvUgACdxkCYEzDf3Fc2G/8oL4VxWJ -HHr5zib+DDhTfKrgQyA9CZ97stZfrR7KYnsLJH8jnj/w/CNOI0G+41KroICRsnjT -5bm7/sT5uwLwu+FAQzITiehj7Te1lwsqtS8yOnXBTQ3hzaw9yhAsuhefx+WT2UCd -Y8Od13nhqQKBgQCR2LR8s71D/81F52nfTuRYNOvrtmtYpkCYt1pIhiU94EflUZ4k -UhCpzb7tjh5IuZEShtPePbUHWavX0HFd/G5s2OXYbnbM0oQwVdfpnXUHpgVmyhi7 -WghENN1nqDcTbha17X/ifkQvmLxZBk+chcw+zcrdfowXRkCtt2Sq/V1gCQKBgH/w -UK3C9AYxxgZ7IB9oZoAk6p/0cdSZPuwydotRDdPoU2WissTQMrAwbDhKWYg/PQ84 -/6b5elbywB1r4UYbrJgTB5Qo9e6zxB6xvpYtoJpDveLUVAd4eoTKXHwECPEXMVWW -2XzqqjlQmIzeZBqgJwplD2a+HNjkrvzanzS6b8qhAoGBAIun0EEc/Zc0ZxzgDPen -A9/7jV++QCrNsevxGH8yrhPP4UqTVSHGR9H+RAif7zTBTn0OwzSBz6hFbPmxum3m -cKabsKVN3poz3TBvfyhgjYosMWvCHpNhif09lyd/s2FezPGyK1Nyf5cKNEWjFGKw -+fCPJ/Ihp4iwacNU1Pu9m050 +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLPI7bcyBwE49Q +TbN6+HAjgDLlWjNW+wMPA+y89608RYnkAodby0hZdZfUkJju65odDnxMyCivIwFO +MJC3JYKLKSrmKQfTBOUJ12y6V1mH1vfVeLGnxr7/ZRmc7RijHeGU9orfUHKShBRY +ulu3IyMolLsmDAdLiCLwOrOrRrPC4DfPS8ex0k0kfOkDT70bU8NI5a6/J3zOxJGk +/UOsbuhowqfPcAsfoSMHD7bLUoHzKOT/tyDt9C9cEjPghdZ/pw/TPBOMLXMYaFi7 +sgD4Q7MOX+4ysh5kaDZvUcU6DNDjeu0pjdvdOotDcc11rbrbcMOihPzugRGSjZZ6 +fY2t25zPAgMBAAECggEAE9CAJxIW4AZwKwagUIVnPXbSv3ynU7weRLj/vD6zg5RO +CM5cTw1HLP2jg2RjnKuYt2uBn+TF3qldh7eBbHG6RAIL/iuS6TZpdCeuII7CmlVR +jVz6iR594Z2EPUH6bHDN3P2adYI84V8CMtJcfcLtuxehFWkHzwvjSCOY/8JhZUbV +ebXXc3zPdSu+WmeManXnzs4VgE6QnSNdyk67fvE1Qxi18s49XXWBPTg01hn+v2yJ +QVuv36UP2MgIRZJE/PI9NL6tqqiHmY5sCIJ41hQLRxd/mnRC8hdHrfNNhqHVlC9g +JoQQwn/dD12EZwyiQyJyGZOmFDrfv7G3d2QQVJ4OLQKBgQDrxf3nRK28CWaV2evS +J4MZjTWmZGiNzMiqEtfTgd0v3+rs73WYaNfQ79Iejj6KJfJq7vtdawqGW1bPNfgF +KJCdr3yxjpv5GsHF7fiE8ZWcQ6d6FTWNuayLOEbHnPemYTqg5pd1wsPgIBoE9Zqm +zo1iuGxmwHos2yQgif9vEU99wwKBgQDcq/+aDscOO1oimJjAbBl95I8bOtSxR0Ip +pv/iaB8+rrS18jiAygXuo34tq+L0HmoniMCuuVg4zhgAxzgnohTlsJpyGnzkdkmo +TTan76WkFAedmurzQSu96p5F9HOc0MgluQHtPhO5SsjWhUgXxAU0Zoe+JnTVq0X+ +//8z1s64BQKBgEbanl4U7p0WuiSIc+0ZALX6EMhrXlxW0WsC9KdUXJNZmHER2WYv +A8R/fca++p5rnvlxzkqZs3UDGAh3cIykTymEJlX5xHfNCbSgulHBhDOMxVTT8N8h +kG/aPrMYQfhXOdZG1feGy3ScURVydcJxSl4DjFgouc6nIKlCr2fCbQAfAoGAVpez +3EtSNzZ5HzxMLK3+rtUihufmEI7K2rdqj/iV0i4SQZeELp2YCFXlrJxXmb3ZoBvc +qHOYt+m/p4aFdZ/3nU5YvM/CFJCKRN3PxcSXdjRZ7LGe4se/F25an07Wk0GmWI8p +v2Ptr3c2Kl/ws0q7VB2rxKUokbP86pygE0KGqdUCgYAf8G1QLDZMq57XsNBpiITY +xmS/vnmu2jj/DaTAiJ/gPkUaemoJ4xqhuIko7KqaNOBYoOMrOadldygNtrH1c5YE +LKdPYQ9/bASF59DnBotKAv79n2svHFHNXkpZA+kIoH7QwhgKpwo3vNwcJcKRIBB9 +MjMnBzho1vIbdhoIHJ+Egw== -----END PRIVATE KEY----- diff --git a/tests/data/wire/trans_pub b/tests/data/wire/trans_pub index 330ff42712..b090a7817d 100644 --- a/tests/data/wire/trans_pub +++ b/tests/data/wire/trans_pub @@ -1,9 +1,9 @@ -----BEGIN PUBLIC KEY----- -MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr9daH4ONzZZign/wWRl3 -w5XdXjt8gPlZXyFIHP+L+FVQoeEJZNB8ThVxAEZgXIryUwogFXu3oGrTlNAa4HzX -jwdKMhugPHc113QL/6VhwETWLMyscpliFzLuVKURST7LmLJ7RwDmT92UmF/CjFai -XDQTZ0w6OVirOQQYl3JLr6EV12jr3ofMPs5iHGHWvnRoc9apjw2cLnXjzuWg60+z -gtgDZt3cdKAQKk60rPIlb4Y/Qfa8Frsytetk6cPUraPbFvydOdiek22Wadk+PtVg -Wws6WyrnjtecWJKm09ayVtWiCRRWQJmUCqQxwn860fvRlWMvKMV3qPsn9bLwysDu -kQIDAQAB +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyzyO23MgcBOPUE2zevhw +I4Ay5VozVvsDDwPsvPetPEWJ5AKHW8tIWXWX1JCY7uuaHQ58TMgoryMBTjCQtyWC +iykq5ikH0wTlCddsuldZh9b31Xixp8a+/2UZnO0Yox3hlPaK31BykoQUWLpbtyMj +KJS7JgwHS4gi8Dqzq0azwuA3z0vHsdJNJHzpA0+9G1PDSOWuvyd8zsSRpP1DrG7o +aMKnz3ALH6EjBw+2y1KB8yjk/7cg7fQvXBIz4IXWf6cP0zwTjC1zGGhYu7IA+EOz +Dl/uMrIeZGg2b1HFOgzQ43rtKY3b3TqLQ3HNda2623DDooT87oERko2Wen2Nrduc +zwIDAQAB -----END PUBLIC KEY----- From 818ec099302ee758afcd7f59880be64a81cbfcd2 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 12 Aug 2024 13:30:28 -0700 Subject: [PATCH 222/240] Remove extension status only on extension delete (#3167) * Remove extension status only on extension delete * . * . --------- Co-authored-by: narrieta@microsoft --- azurelinuxagent/ga/exthandlers.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/azurelinuxagent/ga/exthandlers.py b/azurelinuxagent/ga/exthandlers.py index 3499b706c4..d23b1630ce 100644 --- a/azurelinuxagent/ga/exthandlers.py +++ b/azurelinuxagent/ga/exthandlers.py @@ -1416,9 +1416,17 @@ def disable(self, extension=None, ignore_error=False): self.report_event(name=self.get_extension_full_name(extension), message=msg, is_success=False, log_event=False) - # Clean extension state For Multi Config extensions on Disable + # + # In the case of multi-config handlers, we keep the state of each extension individually. + # Disable can be called when the extension is deleted (the extension state in the goal state is set to "disabled"), + # or as part of the Uninstall and Update sequences. When the extension is deleted, we need to remove its state, along + # with its status and settings files. Otherwise, we need to set the state to "disabled". + # if self.should_perform_multi_config_op(extension): - self.__remove_extension_state_files(extension) + if extension.state == ExtensionRequestedState.Disabled: + self.__remove_extension_state_files(extension) + else: + self.__set_extension_state(extension, ExtensionState.Disabled) # For Single config, dont check enabled_extensions because no extension state is maintained. # For MultiConfig, Set the handler state to Installed only when all extensions have been disabled From 1a0f1b5b17696815fdaa708a7cee86926ddb0ecd Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Thu, 15 Aug 2024 10:06:25 -0700 Subject: [PATCH 223/240] Add support for Azure Linux 3 (#3183) * . * Add Azure Linux 3 to test runs * . * . * . * . * Update setup.py --------- Co-authored-by: narrieta@microsoft --- azurelinuxagent/common/osutil/factory.py | 2 +- setup.py | 2 +- .../test_suites/agent_persist_firewall.yml | 2 ++ tests_e2e/test_suites/ext_sequencing.yml | 5 ++++- tests_e2e/test_suites/images.yml | 13 ++++++++++++- tests_e2e/tests/lib/agent_log.py | 18 ++++++++++++++++++ 6 files changed, 38 insertions(+), 4 deletions(-) diff --git a/azurelinuxagent/common/osutil/factory.py b/azurelinuxagent/common/osutil/factory.py index 58afd0af11..fd66fbb0e9 100644 --- a/azurelinuxagent/common/osutil/factory.py +++ b/azurelinuxagent/common/osutil/factory.py @@ -142,7 +142,7 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name) if distro_name == "iosxe": return IosxeOSUtil() - if distro_name == "mariner": + if distro_name in ["mariner", "azurelinux"]: return MarinerOSUtil() if distro_name == "nsbsd": diff --git a/setup.py b/setup.py index 2d51fae8c2..0bb053d4c2 100755 --- a/setup.py +++ b/setup.py @@ -147,7 +147,7 @@ def get_data_files(name, version, fullname): # pylint: disable=R0912 src=["config/clearlinux/waagent.conf"]) set_systemd_files(data_files, dest=systemd_dir_path, src=["init/clearlinux/waagent.service"]) - elif name == 'mariner': + elif name in ["mariner", "azurelinux"]: set_bin_files(data_files, dest=agent_bin_path) set_conf_files(data_files, dest="/etc", src=["config/mariner/waagent.conf"]) diff --git a/tests_e2e/test_suites/agent_persist_firewall.yml b/tests_e2e/test_suites/agent_persist_firewall.yml index f749046a23..ea877b9f0c 100644 --- a/tests_e2e/test_suites/agent_persist_firewall.yml +++ b/tests_e2e/test_suites/agent_persist_firewall.yml @@ -14,6 +14,8 @@ owns_vm: true # This vm cannot be shared with other tests because it modifies t # so skipping the test run on flatcar distro. # (2023-11-14T19:04:13.738695Z ERROR ExtHandler ExtHandler Unable to setup the persistent firewall rules: [Errno 30] Read-only file system: '/lib/systemd/system/waagent-network-setup.service) skip_on_images: + - "azure-linux_3" # TODO: the test in unstable on Azure Linux 3; skipping for now + - "azure-linux_3_arm64" # TODO: the test in unstable on Azure Linux 3; skipping for now - "debian_9" # TODO: Reboot is slow on debian_9. Need to investigate further. - "flatcar" - "flatcar_arm64" diff --git a/tests_e2e/test_suites/ext_sequencing.yml b/tests_e2e/test_suites/ext_sequencing.yml index 1976a85025..78c73dee11 100644 --- a/tests_e2e/test_suites/ext_sequencing.yml +++ b/tests_e2e/test_suites/ext_sequencing.yml @@ -7,4 +7,7 @@ tests: - "ext_sequencing/ext_sequencing.py" images: "endorsed" # This scenario is executed on instances of a scaleset created by the agent test suite. -executes_on_scale_set: true \ No newline at end of file +executes_on_scale_set: true +skip_on_images: # TODO: AzureMonitorLinuxAgent, used by this test, currently does not work on Azure Linux 3. Remove this once it is fixed. + - "azure-linux_3" + - "azure-linux_3_arm64" diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index fb6cc7f1fc..976c987f92 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -17,8 +17,8 @@ image-sets: - "debian_11" - "flatcar" - "suse_12" - - "mariner_1" - "mariner_2" + - "azure-linux_3" - "suse_15" - "rhel_79" - "rhel_82" @@ -37,6 +37,7 @@ image-sets: - "debian_11_arm64" - "flatcar_arm64" - "mariner_2_arm64" + - "azure-linux_3_arm64" - "rhel_90_arm64" - "ubuntu_2204_arm64" @@ -87,6 +88,16 @@ images: urn: "almalinux almalinux 9-gen2 latest" locations: AzureChinaCloud: [] + azure-linux_3: + urn: "microsoftcblmariner azure-linux-3 azure-linux-3 latest" + locations: + AzureUSGovernment: [] + AzureChinaCloud: [] + azure-linux_3_arm64: + urn: "microsoftcblmariner azure-linux-3 azure-linux-3-arm64 latest" + locations: + AzureUSGovernment: [] + AzureChinaCloud: [] centos_610: "OpenLogic CentOS 6.10 latest" centos_75: "OpenLogic CentOS 7.5 latest" centos_79: "OpenLogic CentOS 7_9 latest" diff --git a/tests_e2e/tests/lib/agent_log.py b/tests_e2e/tests/lib/agent_log.py index 83f77b1ea2..9c02406f96 100644 --- a/tests_e2e/tests/lib/agent_log.py +++ b/tests_e2e/tests/lib/agent_log.py @@ -380,6 +380,24 @@ def get_errors(self) -> List[AgentLogRecord]: 'message': r"Unable to determine version of iptables: \[Errno 2\] No such file or directory: 'iptables'", 'if': lambda r: DISTRO_NAME == 'ubuntu' }, + # + # TODO: The Daemon has not been updated on Azure Linux 3; remove this message when it is. + # + # 2024-08-05T14:36:48.004865Z WARNING Daemon Daemon Unable to load distro implementation for azurelinux. Using default distro implementation instead. + # + { + 'message': r"Unable to load distro implementation for azurelinux. Using default distro implementation instead.", + 'if': lambda r: DISTRO_NAME == 'azurelinux' and r.prefix == 'Daemon' and r.level == 'WARNING' + }, + # + # TODO: The OMS extension does not support Azure Linux 3; remove this message when it does. + # + # 2024-08-12T17:40:48.375193Z ERROR ExtHandler ExtHandler Event: name=Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux, op=Install, message=[ExtensionOperationError] Non-zero exit code: 51, /var/lib/waagent/Microsoft.EnterpriseCloud.Monitoring.OmsAgentForLinux-1.19.0/omsagent_shim.sh -install + # + { + 'message': r"name=Microsoft\.EnterpriseCloud\.Monitoring\.OmsAgentForLinux.+Non-zero exit code: 51", + 'if': lambda r: DISTRO_NAME == 'azurelinux' and DISTRO_VERSION == '3.0' + }, ] def is_error(r: AgentLogRecord) -> bool: From 4412778a7017f87d5ab763bbf9c83777c312c484 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 23 Aug 2024 14:00:26 -0700 Subject: [PATCH 224/240] Use self-update for initial update (#3184) * use self-update for initial update * addressing comments * cleanup files * state files * remove comment --- azurelinuxagent/ga/agent_update_handler.py | 108 +++++++++++++----- azurelinuxagent/pa/deprovision/default.py | 3 +- tests/ga/test_agent_update_handler.py | 43 ++++++- tests/ga/test_update.py | 10 +- .../orchestrator/lib/agent_test_loader.py | 35 +++++- .../lib/agent_test_suite_combinator.py | 30 ++--- .../test_suites/agent_wait_for_cloud_init.yml | 3 +- .../test_suites/initial_agent_update.yml | 13 +++ .../initial_agent_update.py | 82 +++++++++++++ ...agent_update-self_update_latest_version.py | 13 ++- ...gent_update-agent_update_check_from_log.py | 62 ++++++++++ 11 files changed, 341 insertions(+), 61 deletions(-) create mode 100644 tests_e2e/test_suites/initial_agent_update.yml create mode 100644 tests_e2e/tests/initial_agent_update/initial_agent_update.py create mode 100755 tests_e2e/tests/scripts/initial_agent_update-agent_update_check_from_log.py diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 8caec10873..7eb77c8df5 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -33,20 +33,32 @@ def get_agent_update_handler(protocol): return AgentUpdateHandler(protocol) +RSM_UPDATE_STATE_FILE = "waagent_rsm_update" +INITIAL_UPDATE_STATE_FILE = "waagent_initial_update" + + class AgentUpdateHandler(object): """ This class handles two type of agent updates. Handler initializes the updater to SelfUpdateVersionUpdater and switch to appropriate updater based on below conditions: - RSM update: This is the update requested by RSM. The contract between CRP and agent is we get following properties in the goal state: + RSM update: This update requested by RSM and contract between CRP and agent is we get following properties in the goal state: version: it will have what version to update isVersionFromRSM: True if the version is from RSM deployment. isVMEnabledForRSMUpgrades: True if the VM is enabled for RSM upgrades. - if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment + if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment( if isVersionFromRSM:False) we ignore the update. - Self update: We fallback to this if above is condition not met. This update to the largest version available in the manifest + Self update: We fallback to this if above condition not met. This update to the largest version available in the manifest. + Also, we use self-update for initial update due to [1][2] Note: Self-update don't support downgrade. - Handler keeps the rsm state of last update is with RSM or not on every new goal state. Once handler decides which updater to use, then - does following steps: + [1] New vms that are enrolled into RSM, they get isVMEnabledForRSMUpgrades as True and isVersionFromRSM as False in first goal state. As per RSM update flow mentioned above, + we don't apply the update if isVersionFromRSM is false. Consequently, new vms remain on pre-installed agent until RSM drives a new version update. In the meantime, agent may process the extensions with the baked version. + This can potentially lead to issues due to incompatibility. + [2] If current version is N, and we are deploying N+1. We find an issue on N+1 and remove N+1 from PIR. If CRP created the initial goal state for a new vm + before the delete, the version in the goal state would be N+1; If the agent starts processing the goal state after the deleting, it won't find N+1 and update will fail and + the vm will use baked version. + + Handler updates the state if current update mode is changed from last update mode(RSM or Self-Update) on new goal state. Once handler decides which updater to use, then + updater does following steps: 1. Retrieve the agent version from the goal state. 2. Check if we allowed to update for that version. 3. Log the update message. @@ -63,8 +75,8 @@ def __init__(self, protocol): self._daemon_version = self._get_daemon_version_for_update() self._last_attempted_update_error_msg = "" - # restore the state of rsm update. Default to self-update if last update is not with RSM. - if not self._get_is_last_update_with_rsm(): + # Restore the state of rsm update. Default to self-update if last update is not with RSM or if agent doing initial update + if not self._get_is_last_update_with_rsm() or self._is_initial_update(): self._updater = SelfUpdateVersionUpdater(self._gs_id) else: self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) @@ -78,14 +90,39 @@ def _get_daemon_version_for_update(): # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. return FlexibleVersion("2.2.53") + @staticmethod + def _get_initial_update_state_file(): + """ + This file keeps if initial update is attempted or not + """ + return os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE) + + def _save_initial_update_state_file(self): + """ + Save the file if agent attempted initial update + """ + try: + with open(self._get_initial_update_state_file(), "w"): + pass + except Exception as e: + msg = "Error creating the initial update state file ({0}): {1}".format(self._get_initial_update_state_file(), ustr(e)) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + + def _is_initial_update(self): + """ + Returns True if state file doesn't exit as presence of file consider as initial update already attempted + """ + return not os.path.exists(self._get_initial_update_state_file()) + @staticmethod def _get_rsm_update_state_file(): """ This file keeps if last attempted update is rsm or not. """ - return os.path.join(conf.get_lib_dir(), "rsm_update.json") + return os.path.join(conf.get_lib_dir(), RSM_UPDATE_STATE_FILE) - def _save_rsm_update_state(self): + def _save_rsm_update_state_file(self): """ Save the rsm state empty file when we switch to RSM """ @@ -93,9 +130,11 @@ def _save_rsm_update_state(self): with open(self._get_rsm_update_state_file(), "w"): pass except Exception as e: - logger.warn("Error creating the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) + msg = "Error creating the RSM state file ({0}): {1}".format(self._get_rsm_update_state_file(), ustr(e)) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - def _remove_rsm_update_state(self): + def _remove_rsm_update_state_file(self): """ Remove the rsm state file when we switch to self-update """ @@ -103,7 +142,9 @@ def _remove_rsm_update_state(self): if os.path.exists(self._get_rsm_update_state_file()): os.remove(self._get_rsm_update_state_file()) except Exception as e: - logger.warn("Error removing the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) + msg = "Error removing the RSM state file ({0}): {1}".format(self._get_rsm_update_state_file(), ustr(e)) + logger.warn(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) def _get_is_last_update_with_rsm(self): """ @@ -152,25 +193,29 @@ def run(self, goal_state, ext_gs_updated): agent_family = self._get_agent_family_manifest(goal_state) - # Updater will return True or False if we need to switch the updater - # If self-updater receives RSM update enabled, it will switch to RSM updater - # If RSM updater receives RSM update disabled, it will switch to self-update - # No change in updater if GS not updated - is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated) + # Always agent uses self-update for initial update regardless vm enrolled into RSM or not + # So ignoring the check for updater switch for the initial goal state/update + if not self._is_initial_update(): - if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater): - msg = "VM not enabled for RSM updates, switching to self-update mode" - logger.info(msg) - add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - self._updater = SelfUpdateVersionUpdater(self._gs_id) - self._remove_rsm_update_state() + # Updater will return True or False if we need to switch the updater + # If self-updater receives RSM update enabled, it will switch to RSM updater + # If RSM updater receives RSM update disabled, it will switch to self-update + # No change in updater if GS not updated + is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated) - if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater): - msg = "VM enabled for RSM updates, switching to RSM update mode" - logger.info(msg) - add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) - self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) - self._save_rsm_update_state() + if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater): + msg = "VM not enabled for RSM updates, switching to self-update mode" + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + self._updater = SelfUpdateVersionUpdater(self._gs_id) + self._remove_rsm_update_state_file() + + if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater): + msg = "VM enabled for RSM updates, switching to RSM update mode" + logger.info(msg) + add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) + self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) + self._save_rsm_update_state_file() # If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition # RSM checks new goal state; self-update checks manifest download interval @@ -218,6 +263,11 @@ def run(self, goal_state, ext_gs_updated): add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) self._last_attempted_update_error_msg = error_msg + # save initial update state when agent is doing first update + finally: + if self._is_initial_update(): + self._save_initial_update_state_file() + def get_vmagent_update_status(self): """ This function gets the VMAgent update status as per the last attempted update. diff --git a/azurelinuxagent/pa/deprovision/default.py b/azurelinuxagent/pa/deprovision/default.py index 35b4ae82ed..d96adbfec5 100644 --- a/azurelinuxagent/pa/deprovision/default.py +++ b/azurelinuxagent/pa/deprovision/default.py @@ -162,7 +162,8 @@ def del_lib_dir_files(self, warnings, actions): # pylint: disable=W0613 'published_hostname', 'fast_track.json', 'initial_goal_state', - 'rsm_update.json' + 'waagent_rsm_update', + 'waagent_initial_update' ] known_files_glob = [ 'Extensions.*.xml', diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index c6e41469f3..ed372d502c 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -10,7 +10,8 @@ from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME -from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler +from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler, INITIAL_UPDATE_STATE_FILE, \ + RSM_UPDATE_STATE_FILE from azurelinuxagent.ga.guestagent import GuestAgent from tests.ga.test_update import UpdateTestCase from tests.lib.http_request_predicates import HttpRequestPredicates @@ -28,7 +29,7 @@ def setUp(self): clear_singleton_instances(ProtocolUtil) @contextlib.contextmanager - def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None): + def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None): # Default to DATA_FILE of test_data parameter raises the pylint warning # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) test_data = DATA_FILE if test_data is None else test_data @@ -57,6 +58,9 @@ def put_handler(url, *args, **_): protocol.set_http_handlers(http_get_handler=http_get_handler, http_put_handler=http_put_handler) + if initial_update_attempted: + open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close() + with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): @@ -452,7 +456,7 @@ def test_it_should_save_rsm_state_of_the_most_recent_goal_state(self): with self.assertRaises(AgentUpgradeExitException): agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) - state_file = os.path.join(conf.get_lib_dir(), "rsm_update.json") + state_file = os.path.join(conf.get_lib_dir(), RSM_UPDATE_STATE_FILE) self.assertTrue(os.path.exists(state_file), "The rsm state file was not saved (can't find {0})".format(state_file)) # check if state gets updated if most recent goal state has different values @@ -535,3 +539,36 @@ def http_get_handler(uri, *_, **__): self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if "Downloaded agent package: WALinuxAgent-9.9.9.10 is missing agent handler manifest file" in kwarg['message'] and kwarg[ 'op'] == WALAEventOperation.AgentUpgrade]), "Agent update should fail") + + def test_it_should_use_self_update_for_first_update_always(self): + self.prepare_agents(count=1) + + # mock the goal state as vm enrolled into RSM + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml" + with self._get_agent_update_handler(test_data=data_file, initial_update_attempted=False) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + # Verifying agent used self-update for initial update + self._assert_update_discovered_from_agent_manifest(mock_telemetry, version="99999.0.0.0") + self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) + + state_file = os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE) + self.assertTrue(os.path.exists(state_file), + "The first update state file was not saved (can't find {0})".format(state_file)) + + def test_it_should_honor_any_update_type_after_first_update(self): + self.prepare_agents(count=1) + + data_file = DATA_FILE.copy() + data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml" + # mocking initial update attempt as true + with self._get_agent_update_handler(test_data=data_file, initial_update_attempted=True) as (agent_update_handler, mock_telemetry): + with self.assertRaises(AgentUpgradeExitException) as context: + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + + # Verifying agent honored RSM update + self._assert_agent_rsm_version_in_goal_state(mock_telemetry, version="9.9.9.10") + self._assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)]) + self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason)) diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index 44a6d73243..ef1a69053a 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -20,6 +20,8 @@ from datetime import datetime, timedelta from threading import current_thread + +from azurelinuxagent.ga.agent_update_handler import INITIAL_UPDATE_STATE_FILE from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ AGENT_ERROR_FILE from tests.common.osutil.test_default import TestOSUtil @@ -1282,6 +1284,9 @@ def update_goal_state_and_run_handler(autoupdate_enabled=True): protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) + # mocking first agent update attempted + open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close() + # Case 1: rsm version missing in GS when vm opt-in for rsm upgrades; report missing rsm version error protocol.mock_wire_data.set_extension_config("wire/ext_conf_version_missing_in_agent_family.xml") update_goal_state_and_run_handler() @@ -1481,7 +1486,10 @@ def create_conf_mocks(self, autoupdate_frequency, hotfix_frequency, normal_frequ @contextlib.contextmanager def __get_update_handler(self, iterations=1, test_data=None, - reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0): + reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0, initial_update_attempted=True): + + if initial_update_attempted: + open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close() test_data = DATA_FILE if test_data is None else test_data # In _get_update_handler() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning diff --git a/tests_e2e/orchestrator/lib/agent_test_loader.py b/tests_e2e/orchestrator/lib/agent_test_loader.py index f952f1160b..ba54f0b592 100644 --- a/tests_e2e/orchestrator/lib/agent_test_loader.py +++ b/tests_e2e/orchestrator/lib/agent_test_loader.py @@ -83,6 +83,28 @@ class VmImageInfo(object): def __str__(self): return self.urn +class CustomImage(object): + + # Images from a gallery are given as "//". + _IMAGE_FROM_GALLERY = re.compile(r"(?P[^/]+)/(?P[^/]+)/(?P[^/]+)") + + @staticmethod + def _is_image_from_gallery(image: str) -> bool: + """ + Verifies if image is from shared gallery + """ + return CustomImage._IMAGE_FROM_GALLERY.match(image) is not None + + @staticmethod + def _get_name_of_image_from_gallery(image: str) -> str: + """ + Get image name from shared gallery + """ + match = CustomImage._IMAGE_FROM_GALLERY.match(image) + if match is None: + raise Exception(f"Invalid image from gallery: {image}") + return match.group('image') + class AgentTestLoader(object): """ @@ -134,6 +156,7 @@ def _validate(self): """ Performs some basic validations on the data loaded from the YAML description files """ + def _parse_image(image: str) -> str: """ Parses a reference to an image or image set and returns the name of the image or image set @@ -147,8 +170,11 @@ def _parse_image(image: str) -> str: # Validate that the images the suite must run on are in images.yml for image in suite.images: image = _parse_image(image) + # skip validation if suite image from gallery image + if CustomImage._is_image_from_gallery(image): + continue if image not in self.images: - raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") + raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml or image from a shared gallery") # If the suite specifies a cloud and it's location, validate that location string is start with and then validate that the images it uses are available in that location for suite_location in suite.locations: @@ -158,6 +184,9 @@ def _parse_image(image: str) -> str: continue for suite_image in suite.images: suite_image = _parse_image(suite_image) + # skip validation if suite image from gallery image + if CustomImage._is_image_from_gallery(suite_image): + continue for image in self.images[suite_image]: # If the image has a location restriction, validate that it is available on the location the suite must run on if image.locations: @@ -208,8 +237,8 @@ def _load_test_suite(description_file: Path) -> TestSuiteInfo: rest of the tests in the suite will not be executed). By default, a failure on a test does not stop execution of the test suite. * images - A string, or a list of strings, specifying the images on which the test suite must be executed. Each value - can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed"). The - names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml. + can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed") or shared gallery image(e.g. "gallery/wait-cloud-init/1.0.2"). + The names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml. * locations - [Optional; string or list of strings] If given, the test suite must be executed on that cloud location(e.g. "AzureCloud:eastus2euap"). If not specified, or set to an empty string, the test suite will be executed in the default location. This is useful for test suites that exercise a feature that is enabled only in certain regions. diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 1450398c8d..22b1afd9b4 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -22,7 +22,7 @@ from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 from lisa.util import field_metadata # pylint: disable=E0401 -from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo +from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo, CustomImage from tests_e2e.tests.lib.logging import set_thread_name from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient @@ -171,10 +171,10 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any] vhd = image.urn image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd") shared_gallery = "" - elif self._is_image_from_gallery(image.urn): + elif CustomImage._is_image_from_gallery(image.urn): marketplace_image = "" vhd = "" - image_name = self._get_name_of_image_from_gallery(image.urn) + image_name = CustomImage._get_name_of_image_from_gallery(image.urn) shared_gallery = image.urn else: marketplace_image = image.urn @@ -472,7 +472,15 @@ def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> Lis for image in suite.images: match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) if match is None: - image_list = loader.images[image] + # Added this condition for galley image as they don't have definition in images.yml + if CustomImage._is_image_from_gallery(image): + i = VmImageInfo() + i.urn = image + i.locations = [] + i.vm_sizes = [] + image_list = [i] + else: + image_list = loader.images[image] else: count = match.group('count') if count is None: @@ -566,20 +574,6 @@ def _is_vhd(vhd: str) -> bool: parsed = urllib.parse.urlparse(vhd) return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != "" - # Images from a gallery are given as "//". - _IMAGE_FROM_GALLERY = re.compile(r"(?P[^/]+)/(?P[^/]+)/(?P[^/]+)") - - @staticmethod - def _is_image_from_gallery(image: str) -> bool: - return AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) is not None - - @staticmethod - def _get_name_of_image_from_gallery(image: str) -> bool: - match = AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) - if match is None: - raise Exception(f"Invalid image from gallery: {image}") - return match.group('image') - @staticmethod def _report_test_result( suite_name: str, diff --git a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml index 09c00aa7ee..154e183499 100644 --- a/tests_e2e/test_suites/agent_wait_for_cloud_init.yml +++ b/tests_e2e/test_suites/agent_wait_for_cloud_init.yml @@ -9,5 +9,4 @@ tests: - "agent_wait_for_cloud_init/agent_wait_for_cloud_init.py" template: "agent_wait_for_cloud_init/add_cloud_init_script.py" install_test_agent: false -# Dummy image, since the parameter is required. The actual image needs to be passed as a parameter to the runbook. -images: "ubuntu_2204" +images: "gallery/wait-cloud-init/1.0.2" diff --git a/tests_e2e/test_suites/initial_agent_update.yml b/tests_e2e/test_suites/initial_agent_update.yml new file mode 100644 index 0000000000..6dc039d625 --- /dev/null +++ b/tests_e2e/test_suites/initial_agent_update.yml @@ -0,0 +1,13 @@ +# +# This test verifies that the Agent does initial update on very first goal state before it starts processing extensions for new vms that are enrolled into RSM. +# +# NOTE: This test_suite is not fully automated. It requires a custom image where custom pre-installed Agent has been installed with version 2.8.9.9. Creation of custom images is not automated currently. +# But daily run is automated and test suite will pass shared gallery custom image reference in images list +# +# +name: "InitialAgentUpdate" +tests: + - "initial_agent_update/initial_agent_update.py" +install_test_agent: false +images: "gallery/initial-agent-update/1.0.0" +locations: "AzureCloud:eastus2euap" diff --git a/tests_e2e/tests/initial_agent_update/initial_agent_update.py b/tests_e2e/tests/initial_agent_update/initial_agent_update.py new file mode 100644 index 0000000000..455dcd3eee --- /dev/null +++ b/tests_e2e/tests/initial_agent_update/initial_agent_update.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from assertpy import fail + +from tests_e2e.tests.lib.agent_test import AgentVmTest +from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext +from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false + + +class InitialAgentUpdate(AgentVmTest): + """ + This test verifies that the Agent does initial update on very first goal state before it starts processing extensions for new vms that are enrolled into RSM + """ + def __init__(self, context: AgentVmTestContext): + super().__init__(context) + self._ssh_client = self._context.create_ssh_client() + self._test_version = "2.8.9.9" + + def run(self): + + log.info("Testing initial agent update for new vms that are enrolled into RSM") + + log.info("Retrieving latest version from goal state to verify initial agent update") + latest_version: str = self._ssh_client.run_command("agent_update-self_update_latest_version.py --family_type Prod", + use_sudo=True).rstrip() + log.info("Latest Version: %s", latest_version) + self._verify_agent_updated_to_latest_version(latest_version) + self._verify_agent_updated_before_processing_goal_state(latest_version) + + def _verify_agent_updated_to_latest_version(self, latest_version: str) -> None: + """ + Verifies the agent updated to latest version from custom image test version. + """ + log.info("Verifying agent updated to latest version: {0} from custom image test version: {1}".format(latest_version, self._test_version)) + self._verify_guest_agent_update(latest_version) + + def _verify_guest_agent_update(self, latest_version: str) -> None: + """ + Verify current agent version running on latest version + """ + + def _check_agent_version(latest_version: str) -> bool: + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + expected_version = f"Goal state agent: {latest_version}" + if expected_version in waagent_version: + return True + else: + return False + + log.info("Running waagent --version and checking Goal state agent version") + success: bool = retry_if_false(lambda: _check_agent_version(latest_version), delay=60) + waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True) + if not success: + fail("Guest agent didn't update to latest version {0} but found \n {1}".format( + latest_version, waagent_version)) + log.info( + f"Successfully verified agent updated to latest version. Current agent version running:\n {waagent_version}") + + def _verify_agent_updated_before_processing_goal_state(self, latest_version) -> None: + log.info("Checking agent log if agent does initial update with self-update before processing goal state") + + output = self._ssh_client.run_command( + "initial_agent_update-agent_update_check_from_log.py --current_version {0} --latest_version {1}".format(self._test_version, latest_version)) + log.info(output) diff --git a/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py b/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py index 4be0f0dc3d..004011deca 100755 --- a/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py +++ b/tests_e2e/tests/scripts/agent_update-self_update_latest_version.py @@ -19,20 +19,22 @@ # returns the agent latest version published # +import argparse + from azurelinuxagent.common.protocol.goal_state import GoalStateProperties from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from tests_e2e.tests.lib.retry import retry -def get_agent_family_manifest(goal_state): +def get_agent_family_manifest(goal_state, family_type): """ - Get the agent_family from last GS for Test Family + Get the agent_family from last GS for given Family """ agent_families = goal_state.extensions_goal_state.agent_families agent_family_manifests = [] for m in agent_families: - if m.name == 'Test': + if m.name == family_type: if len(m.uris) > 0: agent_family_manifests.append(m) return agent_family_manifests[0] @@ -53,11 +55,14 @@ def get_largest_version(agent_manifest): def main(): try: + parser = argparse.ArgumentParser() + parser.add_argument('--family_type', dest="family_type", default="Test") + args = parser.parse_args() protocol = get_protocol_util().get_protocol(init_goal_state=False) retry(lambda: protocol.client.reset_goal_state( goal_state_properties=GoalStateProperties.ExtensionsGoalState)) goal_state = protocol.client.get_goal_state() - agent_family = get_agent_family_manifest(goal_state) + agent_family = get_agent_family_manifest(goal_state, args.family_type) agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) largest_version = get_largest_version(agent_manifest) print(str(largest_version)) diff --git a/tests_e2e/tests/scripts/initial_agent_update-agent_update_check_from_log.py b/tests_e2e/tests/scripts/initial_agent_update-agent_update_check_from_log.py new file mode 100755 index 0000000000..3ae62fb30f --- /dev/null +++ b/tests_e2e/tests/scripts/initial_agent_update-agent_update_check_from_log.py @@ -0,0 +1,62 @@ +#!/usr/bin/env pypy3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Checks that the initial agent update happens with self-update before processing goal state from the agent log + +import argparse +import datetime +import re + +from assertpy import fail + +from tests_e2e.tests.lib.agent_log import AgentLog +from tests_e2e.tests.lib.logging import log + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--current_version", dest='current_version', required=True) + parser.add_argument("--latest_version", dest='latest_version', required=True) + args = parser.parse_args() + + agentlog = AgentLog() + patterns = { + "goal_state": "ProcessExtensionsGoalState started", + "self_update": f"Self-update is ready to upgrade the new agent: {args.latest_version} now before processing the goal state", + "exit_process": f"Current Agent {args.current_version} completed all update checks, exiting current process to upgrade to the new Agent version {args.latest_version}" + } + first_occurrence_times = {"goal_state": datetime.time.min, "self_update": datetime.time.min, "exit_process": datetime.time.min} + + for record in agentlog.read(): + for key, pattern in patterns.items(): + # Skip if we already found the first occurrence of the pattern + if first_occurrence_times[key] != datetime.time.min: + continue + if re.search(pattern, record.message, flags=re.DOTALL): + log.info(f"Found data: {record} in agent log") + first_occurrence_times[key] = record.when + break + + if first_occurrence_times["self_update"] < first_occurrence_times["goal_state"] and first_occurrence_times["exit_process"] < first_occurrence_times["goal_state"]: + log.info("Verified initial agent update happened before processing goal state") + else: + fail(f"Agent initial update didn't happen before processing goal state and first_occurrence_times for patterns: {patterns} are: {first_occurrence_times}") + + +if __name__ == '__main__': + main() From 67cd6e181bae8161b6b5b91a3602369da2db8cb6 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 23 Aug 2024 14:27:23 -0700 Subject: [PATCH 225/240] send updatemode in heartbeat and don't send RSM supported feature flag if versioning disabled in agent (#3189) * rsm changes (cherry picked from commit d73cef55cb284dc6e361fa45cb649d4ffee75f7e) * addressed comment (cherry picked from commit 1ab912235be9e2bc810232b51bc04a6c3decbe2f) * updated comment * addressed comments * added semicolon --- .../common/agent_supported_feature.py | 5 ++-- azurelinuxagent/ga/agent_update_handler.py | 19 +++++++++++++++ azurelinuxagent/ga/update.py | 22 +++++++++--------- tests/ga/test_agent_update_handler.py | 23 +++++++++++++++++++ tests/ga/test_update.py | 6 ++--- 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/azurelinuxagent/common/agent_supported_feature.py b/azurelinuxagent/common/agent_supported_feature.py index 694c636391..f22a72ea67 100644 --- a/azurelinuxagent/common/agent_supported_feature.py +++ b/azurelinuxagent/common/agent_supported_feature.py @@ -77,14 +77,15 @@ def __init__(self): class _GAVersioningGovernanceFeature(AgentSupportedFeature): """ CRP would drive the RSM update if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version. - Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning. + Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning + or if explicitly support for versioning is disabled in agent Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution. """ __NAME = SupportedFeatureNames.GAVersioningGovernance __VERSION = "1.0" - __SUPPORTED = conf.get_auto_update_to_latest_version() + __SUPPORTED = conf.get_auto_update_to_latest_version() and conf.get_enable_ga_versioning() def __init__(self): super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME, diff --git a/azurelinuxagent/ga/agent_update_handler.py b/azurelinuxagent/ga/agent_update_handler.py index 7eb77c8df5..ee6a44f9f7 100644 --- a/azurelinuxagent/ga/agent_update_handler.py +++ b/azurelinuxagent/ga/agent_update_handler.py @@ -29,6 +29,14 @@ from azurelinuxagent.ga.self_update_version_updater import SelfUpdateVersionUpdater +class UpdateMode(object): + """ + Enum for Update modes + """ + RSM = "RSM" + SelfUpdate = "SelfUpdate" + + def get_agent_update_handler(protocol): return AgentUpdateHandler(protocol) @@ -179,6 +187,15 @@ def _get_agent_family_manifest(self, goal_state): family, self._gs_id)) return agent_family_manifests[0] + def get_current_update_mode(self): + """ + Returns current update mode whether RSM or Self-Update + """ + if isinstance(self._updater, RSMVersionUpdater): + return UpdateMode.RSM + else: + return UpdateMode.SelfUpdate + def run(self, goal_state, ext_gs_updated): try: @@ -188,6 +205,8 @@ def run(self, goal_state, ext_gs_updated): # Update the state only on new goal state if ext_gs_updated: + # Reset the last reported update state on new goal state before we attempt update otherwise we keep reporting the last update error if any + self._last_attempted_update_error_msg = "" self._gs_id = goal_state.extensions_goal_state.id self._updater.sync_new_gs_id(self._gs_id) diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py index 2c2b3c263e..9579fd1445 100644 --- a/azurelinuxagent/ga/update.py +++ b/azurelinuxagent/ga/update.py @@ -394,7 +394,7 @@ def run(self, debug=False): self._check_daemon_running(debug) self._check_threads_running(all_thread_handlers) self._process_goal_state(exthandlers_handler, remote_access_handler, agent_update_handler) - self._send_heartbeat_telemetry(protocol) + self._send_heartbeat_telemetry(protocol, agent_update_handler) self._check_agent_memory_usage() time.sleep(self._goal_state_period) @@ -1016,27 +1016,27 @@ def _write_pid_file(self): return pid_files, pid_file - def _send_heartbeat_telemetry(self, protocol): + def _send_heartbeat_telemetry(self, protocol, agent_update_handler): if self._last_telemetry_heartbeat is None: self._last_telemetry_heartbeat = datetime.utcnow() - UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD if datetime.utcnow() >= (self._last_telemetry_heartbeat + UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD): dropped_packets = self.osutil.get_firewall_dropped_packets(protocol.get_endpoint()) - auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 + auto_update_enabled = 1 if conf.get_auto_update_to_latest_version() else 0 + update_mode = agent_update_handler.get_current_update_mode() - telemetry_msg = "{0};{1};{2};{3};{4}".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, - self._heartbeat_update_goal_state_error_count, - auto_update_enabled) - debug_log_msg = "[DEBUG HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ - "UpdateGSErrors: {3};AutoUpdate: {4}]".format(self._heartbeat_counter, + # Note: When we add new values to the heartbeat message, please add a semicolon at the end of the value. + # This helps to parse the message easily in kusto queries with regex + heartbeat_msg = "HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ + "UpdateGSErrors: {3};AutoUpdate: {4};UpdateMode: {5};".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, - auto_update_enabled) + auto_update_enabled, update_mode) # Write Heartbeat events/logs add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HeartBeat, is_success=True, - message=telemetry_msg, log_event=False) - logger.info(u"[HEARTBEAT] Agent {0} is running as the goal state agent {1}", CURRENT_AGENT, debug_log_msg) + message=heartbeat_msg, log_event=False) + logger.info(u"[HEARTBEAT] Agent {0} is running as the goal state agent [DEBUG {1}]", CURRENT_AGENT, heartbeat_msg) # Update/Reset the counters self._heartbeat_counter += 1 diff --git a/tests/ga/test_agent_update_handler.py b/tests/ga/test_agent_update_handler.py index ed372d502c..c2d01a424a 100644 --- a/tests/ga/test_agent_update_handler.py +++ b/tests/ga/test_agent_update_handler.py @@ -411,6 +411,29 @@ def test_it_should_report_update_status_with_error_on_download_fail(self): self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) self.assertIn("Failed to download agent package from all URIs", vm_agent_update_status.message) + def test_it_should_not_report_error_status_if_new_rsm_version_is_same_as_current_after_last_update_attempt_failed(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_rsm_version.xml" + + with self._get_agent_update_handler(test_data=data_file, protocol_get_error=True) as (agent_update_handler, _): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) + self.assertEqual(1, vm_agent_update_status.code) + self.assertEqual("9.9.9.10", vm_agent_update_status.expected_version) + self.assertIn("Failed to download agent package from all URIs", vm_agent_update_status.message) + + # Send same version GS after last update attempt failed + agent_update_handler._protocol.mock_wire_data.set_version_in_agent_family( + str(CURRENT_VERSION)) + agent_update_handler._protocol.mock_wire_data.set_incarnation(2) + agent_update_handler._protocol.client.update_goal_state() + agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) + vm_agent_update_status = agent_update_handler.get_vmagent_update_status() + self.assertEqual(VMAgentUpdateStatuses.Success, vm_agent_update_status.status) + self.assertEqual(0, vm_agent_update_status.code) + self.assertEqual(str(CURRENT_VERSION), vm_agent_update_status.expected_version) + def test_it_should_report_update_status_with_missing_rsm_version_error(self): data_file = DATA_FILE.copy() data_file['ext_conf'] = "wire/ext_conf_version_missing_in_agent_family.xml" diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py index ef1a69053a..cf6908559d 100644 --- a/tests/ga/test_update.py +++ b/tests/ga/test_update.py @@ -2449,11 +2449,11 @@ def test_telemetry_heartbeat_creates_event(self, patch_add_event, patch_info, *_ with mock_wire_protocol(wire_protocol_data.DATA_FILE) as mock_protocol: update_handler = get_update_handler() - + agent_update_handler = Mock() update_handler.last_telemetry_heartbeat = datetime.utcnow() - timedelta(hours=1) - update_handler._send_heartbeat_telemetry(mock_protocol) + update_handler._send_heartbeat_telemetry(mock_protocol, agent_update_handler) self.assertEqual(1, patch_add_event.call_count) - self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent {1}" + self.assertTrue(any(call_args[0] == "[HEARTBEAT] Agent {0} is running as the goal state agent [DEBUG {1}]" for call_args in patch_info.call_args), "The heartbeat was not written to the agent's log") From fc7644afae1466fecd394887231f4935bb27470e Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 26 Aug 2024 10:45:00 -0700 Subject: [PATCH 226/240] Disable multi-config test in AzureCloud (#3192) Co-authored-by: narrieta@microsoft --- tests_e2e/test_suites/multi_config_ext.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests_e2e/test_suites/multi_config_ext.yml b/tests_e2e/test_suites/multi_config_ext.yml index 24bdaa7366..1856a4d061 100644 --- a/tests_e2e/test_suites/multi_config_ext.yml +++ b/tests_e2e/test_suites/multi_config_ext.yml @@ -7,3 +7,6 @@ tests: - "multi_config_ext/multi_config_ext.py" images: - "endorsed" +# TODO: This test has been failing due to issues in the RC2 extension on AzureCloud. Re-enable once the extension has been fixed. +skip_on_clouds: + - "AzureCloud" From 0681f148f2aaa5025321e7f200188b7eac1d238a Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:23:24 -0700 Subject: [PATCH 227/240] Add cgroupv2 support for log collector (#3188) * Lc v2 implementation branch (#18) * memory experimentation changes * Initial changes * obvious issues * Fix e2e test * First round of unit test fixes * Fix existing unit tests * Remove unneeded cpu files * Get memory usage should return tuple * Fix log for tracking cgroup * Add unit tests * Add unit tests * Address pylint comments * Clean up code * clean up code * Fix unit tests (#19) * Fix unit tests * Fix unit tests * Revisions (#20) * Respond to comments * Test failures * Fix type issue * Revisions * Additional revisions (#21) * Revisions * Remove unit test for sending telem * final fixes * add config flag * Fix e2e tests --- azurelinuxagent/agent.py | 37 +- azurelinuxagent/common/conf.py | 23 +- azurelinuxagent/ga/cgroupapi.py | 113 +++-- azurelinuxagent/ga/cgroupconfigurator.py | 80 ++-- azurelinuxagent/ga/cgroupcontroller.py | 175 ++++++++ azurelinuxagent/ga/cgroupstelemetry.py | 32 +- azurelinuxagent/ga/collect_logs.py | 114 +++-- azurelinuxagent/ga/controllermetrics.py | 404 ------------------ azurelinuxagent/ga/cpucontroller.py | 293 +++++++++++++ azurelinuxagent/ga/extensionprocessutil.py | 18 +- azurelinuxagent/ga/logcollector.py | 15 +- azurelinuxagent/ga/memorycontroller.py | 220 ++++++++++ azurelinuxagent/ga/monitor.py | 2 +- .../utils/test_extension_process_util.py | 14 +- tests/data/cgroups/cpu_mount/cpuacct.stat | 2 - tests/data/cgroups/{ => v1}/cpu.stat | 0 tests/data/cgroups/{ => v1}/cpu.stat_t0 | 0 tests/data/cgroups/{ => v1}/cpu.stat_t1 | 0 tests/data/cgroups/{ => v1}/cpuacct.stat | 0 tests/data/cgroups/{ => v1}/cpuacct.stat_t0 | 0 tests/data/cgroups/{ => v1}/cpuacct.stat_t1 | 0 tests/data/cgroups/{ => v1}/cpuacct.stat_t2 | 0 .../memory.max_usage_in_bytes | 0 .../cgroups/{memory_mount => v1}/memory.stat | 0 .../memory.stat => v1/memory.stat_missing} | 0 tests/data/cgroups/{ => v1}/proc_stat_t0 | 0 tests/data/cgroups/{ => v1}/proc_stat_t1 | 0 tests/data/cgroups/{ => v1}/proc_stat_t2 | 0 tests/data/cgroups/v2/cpu.stat | 9 + tests/data/cgroups/v2/cpu.stat_t0 | 9 + tests/data/cgroups/v2/cpu.stat_t1 | 9 + tests/data/cgroups/v2/cpu.stat_t2 | 9 + tests/data/cgroups/v2/memory.events | 6 + tests/data/cgroups/v2/memory.events_missing | 5 + tests/data/cgroups/v2/memory.peak | 1 + tests/data/cgroups/v2/memory.stat | 53 +++ tests/data/cgroups/v2/memory.stat_missing | 51 +++ tests/data/cgroups/v2/memory.swap.current | 1 + tests/data/cgroups/v2/proc_uptime_t0 | 1 + tests/data/cgroups/v2/proc_uptime_t1 | 1 + tests/data/cgroups/v2/proc_uptime_t2 | 1 + tests/ga/test_cgroupapi.py | 120 ++++-- tests/ga/test_cgroupconfigurator.py | 23 +- tests/ga/test_cgroupcontroller.py | 55 +++ tests/ga/test_cgroupstelemetry.py | 223 +++++----- tests/ga/test_collect_logs.py | 302 ++++++++++--- tests/ga/test_controllermetrics.py | 242 ----------- tests/ga/test_cpucontroller.py | 313 ++++++++++++++ tests/ga/test_logcollector.py | 67 ++- tests/ga/test_memorycontroller.py | 124 ++++++ tests/ga/test_monitor.py | 16 +- tests/test_agent.py | 108 ++++- tests_e2e/orchestrator/runbook.yml | 1 + tests_e2e/test_suites/images.yml | 13 + tests_e2e/test_suites/log_collector.yml | 8 + .../tests/log_collector/log_collector.py | 82 ++++ .../agent_cpu_quota-check_agent_cpu_quota.py | 2 +- 57 files changed, 2354 insertions(+), 1043 deletions(-) create mode 100644 azurelinuxagent/ga/cgroupcontroller.py delete mode 100644 azurelinuxagent/ga/controllermetrics.py create mode 100644 azurelinuxagent/ga/cpucontroller.py create mode 100644 azurelinuxagent/ga/memorycontroller.py delete mode 100644 tests/data/cgroups/cpu_mount/cpuacct.stat rename tests/data/cgroups/{ => v1}/cpu.stat (100%) rename tests/data/cgroups/{ => v1}/cpu.stat_t0 (100%) rename tests/data/cgroups/{ => v1}/cpu.stat_t1 (100%) rename tests/data/cgroups/{ => v1}/cpuacct.stat (100%) rename tests/data/cgroups/{ => v1}/cpuacct.stat_t0 (100%) rename tests/data/cgroups/{ => v1}/cpuacct.stat_t1 (100%) rename tests/data/cgroups/{ => v1}/cpuacct.stat_t2 (100%) rename tests/data/cgroups/{memory_mount => v1}/memory.max_usage_in_bytes (100%) rename tests/data/cgroups/{memory_mount => v1}/memory.stat (100%) rename tests/data/cgroups/{missing_memory_counters/memory.stat => v1/memory.stat_missing} (100%) rename tests/data/cgroups/{ => v1}/proc_stat_t0 (100%) rename tests/data/cgroups/{ => v1}/proc_stat_t1 (100%) rename tests/data/cgroups/{ => v1}/proc_stat_t2 (100%) create mode 100644 tests/data/cgroups/v2/cpu.stat create mode 100644 tests/data/cgroups/v2/cpu.stat_t0 create mode 100644 tests/data/cgroups/v2/cpu.stat_t1 create mode 100644 tests/data/cgroups/v2/cpu.stat_t2 create mode 100644 tests/data/cgroups/v2/memory.events create mode 100644 tests/data/cgroups/v2/memory.events_missing create mode 100644 tests/data/cgroups/v2/memory.peak create mode 100644 tests/data/cgroups/v2/memory.stat create mode 100644 tests/data/cgroups/v2/memory.stat_missing create mode 100644 tests/data/cgroups/v2/memory.swap.current create mode 100644 tests/data/cgroups/v2/proc_uptime_t0 create mode 100644 tests/data/cgroups/v2/proc_uptime_t1 create mode 100644 tests/data/cgroups/v2/proc_uptime_t2 create mode 100644 tests/ga/test_cgroupcontroller.py delete mode 100644 tests/ga/test_controllermetrics.py create mode 100644 tests/ga/test_cpucontroller.py create mode 100644 tests/ga/test_memorycontroller.py create mode 100644 tests_e2e/test_suites/log_collector.yml create mode 100755 tests_e2e/tests/log_collector/log_collector.py diff --git a/azurelinuxagent/agent.py b/azurelinuxagent/agent.py index babd67e1ef..bfb795c6b9 100644 --- a/azurelinuxagent/agent.py +++ b/azurelinuxagent/agent.py @@ -23,6 +23,7 @@ from __future__ import print_function +import json import os import re import subprocess @@ -31,7 +32,8 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator -from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics +from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR +from azurelinuxagent.ga.cpucontroller import _CpuController from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException import azurelinuxagent.common.conf as conf @@ -208,7 +210,7 @@ def collect_logs(self, is_full_mode): # Check the cgroups unit log_collector_monitor = None - tracked_metrics = [] + tracked_controllers = [] if CollectLogsHandler.is_enabled_monitor_cgroups_check(): try: cgroup_api = get_cgroup_api() @@ -220,10 +222,10 @@ def collect_logs(self, is_full_mode): sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR) - tracked_metrics = log_collector_cgroup.get_controller_metrics() + tracked_controllers = log_collector_cgroup.get_controllers() - if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()): - log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers())) + if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()): + log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controller_names())) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE): @@ -235,15 +237,30 @@ def collect_logs(self, is_full_mode): # Running log collector resource monitoring only if agent starts the log collector. # If Log collector start by any other means, then it will not be monitored. if CollectLogsHandler.is_enabled_monitor_cgroups_check(): - for metric in tracked_metrics: - if isinstance(metric, CpuMetrics): - metric.initialize_cpu_usage() + for controller in tracked_controllers: + if isinstance(controller, _CpuController): + controller.initialize_cpu_usage() break - log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics) + log_collector_monitor = get_log_collector_monitor_handler(tracked_controllers) log_collector_monitor.run() - archive = log_collector.collect_logs_and_get_archive() + + archive, total_uncompressed_size = log_collector.collect_logs_and_get_archive() logger.info("Log collection successfully completed. Archive can be found at {0} " "and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH)) + + if log_collector_monitor is not None: + log_collector_monitor.stop() + try: + metrics_summary = log_collector_monitor.get_max_recorded_metrics() + metrics_summary['Total Uncompressed File Size (B)'] = total_uncompressed_size + msg = json.dumps(metrics_summary) + logger.info(msg) + event.add_event(op=event.WALAEventOperation.LogCollection, message=msg, log_event=False) + except Exception as e: + msg = "An error occurred while reporting log collector resource usage summary: {0}".format(ustr(e)) + logger.warn(msg) + event.add_event(op=event.WALAEventOperation.LogCollection, is_success=False, message=msg, log_event=False) + except Exception as e: logger.error("Log collection completed unsuccessfully. Error: {0}".format(ustr(e))) logger.info("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH)) diff --git a/azurelinuxagent/common/conf.py b/azurelinuxagent/common/conf.py index 85a7bc2f73..b5eec73ce2 100644 --- a/azurelinuxagent/common/conf.py +++ b/azurelinuxagent/common/conf.py @@ -146,7 +146,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.CgroupDisableOnQuotaCheckFailure": True, "Debug.EnableAgentMemoryUsageCheck": False, "Debug.EnableFastTrack": True, - "Debug.EnableGAVersioning": True + "Debug.EnableGAVersioning": True, + "Debug.EnableCgroupV2ResourceLimiting": False } @@ -200,7 +201,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__): "Debug.EtpCollectionPeriod": 300, "Debug.AutoUpdateHotfixFrequency": 14400, "Debug.AutoUpdateNormalFrequency": 86400, - "Debug.FirewallRulesLogPeriod": 86400 + "Debug.FirewallRulesLogPeriod": 86400, + "Debug.LogCollectorInitialDelay": 5 * 60 } @@ -680,3 +682,20 @@ def get_firewall_rules_log_period(conf=__conf__): NOTE: This option is experimental and may be removed in later versions of the Agent. """ return conf.get_int("Debug.FirewallRulesLogPeriod", 86400) + + +def get_enable_cgroup_v2_resource_limiting(conf=__conf__): + """ + If True, the agent will enable resource monitoring and enforcement for the log collector on machines using cgroup v2. + NOTE: This option is experimental and may be removed in later versions of the Agent. + """ + return conf.get_switch("Debug.EnableCgroupV2ResourceLimiting", False) + + +def get_log_collector_initial_delay(conf=__conf__): + """ + Determine the initial delay at service start before the first periodic log collection. + + NOTE: This option is experimental and may be removed in later versions of the Agent. + """ + return conf.get_int("Debug.LogCollectorInitialDelay", 5 * 60) diff --git a/azurelinuxagent/ga/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py index 3483527620..72b41ec773 100644 --- a/azurelinuxagent/ga/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -24,8 +24,9 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cpucontroller import _CpuController, CpuControllerV1, CpuControllerV2 +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1, MemoryControllerV2 from azurelinuxagent.common.conf import get_agent_pid_file_path from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \ ExtensionOperationError @@ -292,7 +293,7 @@ def _get_controller_mountpoints(): if match is not None: path = match.group('path') controller = match.group('controller') - if controller is not None and path is not None and controller in CgroupV1.get_supported_controllers(): + if controller is not None and path is not None and controller in CgroupV1.get_supported_controller_names(): mount_points[controller] = path return mount_points @@ -335,7 +336,7 @@ def _get_process_relative_controller_paths(process_id): if match is not None: controller = match.group('controller') path = match.group('path').lstrip('/') if match.group('path') != '/' else None - if path is not None and controller in CgroupV1.get_supported_controllers(): + if path is not None and controller in CgroupV1.get_supported_controller_names(): conroller_relative_paths[controller] = path return conroller_relative_paths @@ -371,7 +372,7 @@ def get_process_cgroup(self, process_id, cgroup_name): controller_paths=process_controller_paths) def log_root_paths(self): - for controller in CgroupV1.get_supported_controllers(): + for controller in CgroupV1.get_supported_controller_names(): mount_point = self._cgroup_mountpoints.get(controller) if mount_point is None: log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False) @@ -402,14 +403,14 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False) - cpu_metrics = None + cpu_controller = None try: cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name) cgroup = self.get_cgroup_from_relative_path(cgroup_relative_path, extension_name) - for metrics in cgroup.get_controller_metrics(): - if isinstance(metrics, CpuMetrics): - cpu_metrics = metrics - CGroupsTelemetry.track_cgroup(metrics) + for controller in cgroup.get_controllers(): + if isinstance(controller, _CpuController): + cpu_controller = controller + CGroupsTelemetry.track_cgroup_controller(controller) except IOError as e: if e.errno == 2: # 'No such file or directory' @@ -421,7 +422,7 @@ def start_extension_command(self, extension_name, command, cmd_name, timeout, sh # Wait for process completion or timeout try: return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, - stderr=stderr, error_code=error_code, cpu_metrics=cpu_metrics) + stderr=stderr, error_code=error_code, cpu_controller=cpu_controller) except ExtensionError as e: # The extension didn't terminate successfully. Determine whether it was due to systemd errors or # extension errors. @@ -498,7 +499,7 @@ def _get_controllers_enabled_at_root(root_cgroup_path): enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control') if os.path.exists(enabled_controllers_file): controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split() - return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controllers())) + return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controller_names())) return [] @staticmethod @@ -546,7 +547,7 @@ def get_process_cgroup(self, process_id, cgroup_name): def log_root_paths(self): log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False) - for controller in CgroupV2.get_supported_controllers(): + for controller in CgroupV2.get_supported_controller_names(): if controller in self._controllers_enabled_at_root: log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False) else: @@ -564,9 +565,9 @@ def __init__(self, cgroup_name): self._cgroup_name = cgroup_name @staticmethod - def get_supported_controllers(): + def get_supported_controller_names(): """ - Cgroup version specific. Returns a list of the controllers which the agent supports. + Cgroup version specific. Returns a list of the controllers which the agent supports as strings. """ raise NotImplementedError() @@ -578,12 +579,12 @@ def check_in_expected_slice(self, expected_slice): """ raise NotImplementedError() - def get_controller_metrics(self, expected_relative_path=None): + def get_controllers(self, expected_relative_path=None): """ - Cgroup version specific. Returns a list of the metrics for the agent supported controllers which are - mounted/enabled for the cgroup. + Cgroup version specific. Returns a list of the agent supported controllers which are mounted/enabled for the cgroup. - :param expected_relative_path: The expected relative path of the cgroup. If provided, only metrics for controllers at this expected path will be returned. + :param expected_relative_path: The expected relative path of the cgroup. If provided, only controllers mounted + at this expected path will be returned. """ raise NotImplementedError() @@ -608,7 +609,7 @@ def __init__(self, cgroup_name, controller_mountpoints, controller_paths): self._controller_paths = controller_paths @staticmethod - def get_supported_controllers(): + def get_supported_controller_names(): return [CgroupV1.CPU_CONTROLLER, CgroupV1.MEMORY_CONTROLLER] def check_in_expected_slice(self, expected_slice): @@ -620,39 +621,39 @@ def check_in_expected_slice(self, expected_slice): return in_expected_slice - def get_controller_metrics(self, expected_relative_path=None): - metrics = [] + def get_controllers(self, expected_relative_path=None): + controllers = [] - for controller in self.get_supported_controllers(): - controller_metrics = None - controller_path = self._controller_paths.get(controller) - controller_mountpoint = self._controller_mountpoints.get(controller) + for supported_controller_name in self.get_supported_controller_names(): + controller = None + controller_path = self._controller_paths.get(supported_controller_name) + controller_mountpoint = self._controller_mountpoints.get(supported_controller_name) if controller_mountpoint is None: - log_cgroup_warning("{0} controller is not mounted; will not track metrics".format(controller), send_event=False) + log_cgroup_warning("{0} controller is not mounted; will not track".format(supported_controller_name), send_event=False) continue if controller_path is None: - log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track metrics".format(controller, self._cgroup_name), send_event=False) + log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track".format(supported_controller_name, self._cgroup_name), send_event=False) continue if expected_relative_path is not None: expected_path = os.path.join(controller_mountpoint, expected_relative_path) if controller_path != expected_path: - log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track metrics. Actual cgroup path:[{2}] Expected:[{3}]".format(controller, self._cgroup_name, controller_path, expected_path), send_event=False) + log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track. Actual cgroup path:[{2}] Expected:[{3}]".format(supported_controller_name, self._cgroup_name, controller_path, expected_path), send_event=False) continue - if controller == self.CPU_CONTROLLER: - controller_metrics = CpuMetrics(self._cgroup_name, controller_path) - elif controller == self.MEMORY_CONTROLLER: - controller_metrics = MemoryMetrics(self._cgroup_name, controller_path) + if supported_controller_name == self.CPU_CONTROLLER: + controller = CpuControllerV1(self._cgroup_name, controller_path) + elif supported_controller_name == self.MEMORY_CONTROLLER: + controller = MemoryControllerV1(self._cgroup_name, controller_path) - if controller_metrics is not None: - msg = "{0} metrics for cgroup: {1}".format(controller, controller_metrics) + if controller is not None: + msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller) log_cgroup_info(msg, send_event=False) - metrics.append(controller_metrics) + controllers.append(controller) - return metrics + return controllers def get_controller_procs_path(self, controller): controller_path = self._controller_paths.get(controller) @@ -687,7 +688,7 @@ def __init__(self, cgroup_name, root_cgroup_path, cgroup_path, enabled_controlle self._enabled_controllers = enabled_controllers @staticmethod - def get_supported_controllers(): + def get_supported_controller_names(): return [CgroupV2.CPU_CONTROLLER, CgroupV2.MEMORY_CONTROLLER] def check_in_expected_slice(self, expected_slice): @@ -697,9 +698,41 @@ def check_in_expected_slice(self, expected_slice): return True - def get_controller_metrics(self, expected_relative_path=None): - # TODO - Implement controller metrics for cgroup v2 - raise NotImplementedError() + def get_controllers(self, expected_relative_path=None): + controllers = [] + + for supported_controller_name in self.get_supported_controller_names(): + controller = None + + if supported_controller_name not in self._enabled_controllers: + log_cgroup_warning("{0} controller is not enabled; will not track".format(supported_controller_name), + send_event=False) + continue + + if self._cgroup_path == "": + log_cgroup_warning("Cgroup path for {0} cannot be determined; will not track".format(self._cgroup_name), + send_event=False) + continue + + if expected_relative_path is not None: + expected_path = os.path.join(self._root_cgroup_path, expected_relative_path) + if self._cgroup_path != expected_path: + log_cgroup_warning( + "The {0} cgroup is not mounted at the expected path; will not track. Actual cgroup path:[{1}] Expected:[{2}]".format( + self._cgroup_name, self._cgroup_path, expected_path), send_event=False) + continue + + if supported_controller_name == self.CPU_CONTROLLER: + controller = CpuControllerV2(self._cgroup_name, self._cgroup_path) + elif supported_controller_name == self.MEMORY_CONTROLLER: + controller = MemoryControllerV2(self._cgroup_name, self._cgroup_path) + + if controller is not None: + msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller) + log_cgroup_info(msg, send_event=False) + controllers.append(controller) + + return controllers def get_procs_path(self): if self._cgroup_path != "": diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index f18c23e3c5..2a75344b42 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -23,10 +23,12 @@ from azurelinuxagent.common import conf from azurelinuxagent.common import logger -from azurelinuxagent.ga.controllermetrics import CpuMetrics, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryMetrics +from azurelinuxagent.ga.cgroupcontroller import AGENT_NAME_TELEMETRY, MetricsCounter from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \ log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry +from azurelinuxagent.ga.cpucontroller import _CpuController +from azurelinuxagent.ga.memorycontroller import _MemoryController from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import systemd @@ -76,8 +78,11 @@ CPUQuota={cpu_quota} MemoryAccounting=yes """ -_LOGCOLLECTOR_CPU_QUOTA = "5%" -LOGCOLLECTOR_MEMORY_LIMIT = 30 * 1024 ** 2 # 30Mb +LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2 = "5%" +LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2 = "170M" +LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 = 10 +LOGCOLLECTOR_ANON_MEMORY_LIMIT_FOR_V1_AND_V2 = 25 * 1024 ** 2 # 25Mb +LOGCOLLECTOR_CACHE_MEMORY_LIMIT_FOR_V1_AND_V2 = 155 * 1024 ** 2 # 155Mb _AGENT_DROP_IN_FILE_SLICE = "10-Slice.conf" _AGENT_DROP_IN_FILE_SLICE_CONTENTS = """ @@ -176,6 +181,11 @@ def initialize(self): log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) return + # Setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but can be + # enabled for log collector in waagent.conf. The log collector slice should be created in case v2 + # usage is enabled for log collector. + self.__setup_azure_slice() + if self.using_cgroup_v2(): log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") return @@ -186,8 +196,6 @@ def initialize(self): log_cgroup_warning("The agent is within an unexpected slice: {0}".format(agent_slice)) return - self.__setup_azure_slice() - # Log mount points/root paths for cgroup controllers self._cgroups_api.log_root_paths() @@ -199,19 +207,19 @@ def initialize(self): self.disable(reason, DisableCgroups.ALL) return - # Get metrics to track - metrics = self._agent_cgroup.get_controller_metrics(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name())) - if len(metrics) > 0: + # Get controllers to track + agent_controllers = self._agent_cgroup.get_controllers(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name())) + if len(agent_controllers) > 0: self.enable() - for metric in metrics: - for prop in metric.get_unit_properties(): + for controller in agent_controllers: + for prop in controller.get_unit_properties(): log_cgroup_info('Agent {0} unit property value: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop))) - if isinstance(metric, CpuMetrics): + if isinstance(controller, _CpuController): self.__set_cpu_quota(conf.get_agent_cpu_quota()) - elif isinstance(metric, MemoryMetrics): - self._agent_memory_metrics = metric - CGroupsTelemetry.track_cgroup(metric) + elif isinstance(controller, _MemoryController): + self._agent_memory_metrics = controller + CGroupsTelemetry.track_cgroup_controller(controller) except Exception as exception: log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception))) @@ -279,7 +287,7 @@ def __setup_azure_slice(): files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS)) # Update log collector slice contents - slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=_LOGCOLLECTOR_CPU_QUOTA) + slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2) files_to_create.append((logcollector_slice, slice_contents)) if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None: @@ -441,10 +449,10 @@ def disable(self, reason, disable_cgroups): elif disable_cgroups == DisableCgroups.AGENT: # disable agent self._agent_cgroups_enabled = False self.__reset_agent_cpu_quota() - agent_metrics = self._agent_cgroup.get_controller_metrics() - for metric in agent_metrics: - if isinstance(metric, CpuMetrics): - CGroupsTelemetry.stop_tracking(metric) + agent_controllers = self._agent_cgroup.get_controllers() + for controller in agent_controllers: + if isinstance(controller, _CpuController): + CGroupsTelemetry.stop_tracking(controller) break log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled) @@ -603,6 +611,22 @@ def _check_processes_in_agent_cgroup(self): self._report_agent_cgroups_procs(agent_cgroup_proc_names, unexpected) raise CGroupsException("The agent's cgroup includes unexpected processes: {0}".format(unexpected)) + def get_logcollector_unit_properties(self): + """ + Returns the systemd unit properties for the log collector process. + + Each property should be explicitly set (even if already included in the log collector slice) for the log + collector process to run in the transient scope directory with the expected accounting and limits. + """ + logcollector_properties = ["--property=CPUAccounting=yes", "--property=MemoryAccounting=yes", "--property=CPUQuota={0}".format(LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2)] + if not self.using_cgroup_v2(): + return logcollector_properties + # Memory throttling limit is used when running log collector on v2 machines using the 'MemoryHigh' property. + # We do not use a systemd property to enforce memory on V1 because it invokes the OOM killer if the limit + # is exceeded. + logcollector_properties.append("--property=MemoryHigh={0}".format(LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2)) + return logcollector_properties + @staticmethod def _get_command(pid): try: @@ -727,10 +751,10 @@ def _get_parent(pid): def start_tracking_unit_cgroups(self, unit_name): try: cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name) - metrics = cgroup.get_controller_metrics() + controllers = cgroup.get_controllers() - for metric in metrics: - CGroupsTelemetry.track_cgroup(metric) + for controller in controllers: + CGroupsTelemetry.track_cgroup_controller(controller) except Exception as exception: log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False) @@ -738,10 +762,10 @@ def start_tracking_unit_cgroups(self, unit_name): def stop_tracking_unit_cgroups(self, unit_name): try: cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name) - metrics = cgroup.get_controller_metrics() + controllers = cgroup.get_controllers() - for metric in metrics: - CGroupsTelemetry.stop_tracking(metric) + for controller in controllers: + CGroupsTelemetry.stop_tracking(controller) except Exception as exception: log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) @@ -753,9 +777,9 @@ def stop_tracking_extension_cgroups(self, extension_name): cgroup = self._cgroups_api.get_cgroup_from_relative_path(relative_path=cgroup_relative_path, cgroup_name=extension_name) - metrics = cgroup.get_controller_metrics() - for metric in metrics: - CGroupsTelemetry.stop_tracking(metric) + controllers = cgroup.get_controllers() + for controller in controllers: + CGroupsTelemetry.stop_tracking(controller) except Exception as exception: log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) diff --git a/azurelinuxagent/ga/cgroupcontroller.py b/azurelinuxagent/ga/cgroupcontroller.py new file mode 100644 index 0000000000..a530553b21 --- /dev/null +++ b/azurelinuxagent/ga/cgroupcontroller.py @@ -0,0 +1,175 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import errno +import os +from datetime import timedelta + +from azurelinuxagent.common import logger, conf +from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.utils import fileutil + +_REPORT_EVERY_HOUR = timedelta(hours=1) +_DEFAULT_REPORT_PERIOD = timedelta(seconds=conf.get_cgroup_check_period()) + +AGENT_NAME_TELEMETRY = "walinuxagent.service" # Name used for telemetry; it needs to be consistent even if the name of the service changes +AGENT_LOG_COLLECTOR = "azure-walinuxagent-logcollector" + + +class CounterNotFound(Exception): + pass + + +class MetricValue(object): + """ + Class for defining all the required metric fields to send telemetry. + """ + + def __init__(self, category, counter, instance, value, report_period=_DEFAULT_REPORT_PERIOD): + self._category = category + self._counter = counter + self._instance = instance + self._value = value + self._report_period = report_period + + @property + def category(self): + return self._category + + @property + def counter(self): + return self._counter + + @property + def instance(self): + return self._instance + + @property + def value(self): + return self._value + + @property + def report_period(self): + return self._report_period + + +class MetricsCategory(object): + MEMORY_CATEGORY = "Memory" + CPU_CATEGORY = "CPU" + + +class MetricsCounter(object): + PROCESSOR_PERCENT_TIME = "% Processor Time" + THROTTLED_TIME = "Throttled Time (s)" + TOTAL_MEM_USAGE = "Total Memory Usage (B)" + ANON_MEM_USAGE = "Anon Memory Usage (B)" + CACHE_MEM_USAGE = "Cache Memory Usage (B)" + MAX_MEM_USAGE = "Max Memory Usage (B)" + SWAP_MEM_USAGE = "Swap Memory Usage (B)" + MEM_THROTTLED = "Total Memory Throttled Events" + AVAILABLE_MEM = "Available Memory (MB)" + USED_MEM = "Used Memory (MB)" + + +class _CgroupController(object): + def __init__(self, name, cgroup_path): + """ + Initialize _data collection for the controller + :param: name: Name of the CGroup + :param: cgroup_path: Path of the controller + :return: + """ + self.name = name + self.path = cgroup_path + + def __str__(self): + return "{0} [{1}]".format(self.name, self.path) + + def _get_cgroup_file(self, file_name): + return os.path.join(self.path, file_name) + + def _get_file_contents(self, file_name): + """ + Retrieve the contents of file. + + :param str file_name: Name of file within that metric controller + :return: Entire contents of the file + :rtype: str + """ + parameter_file = self._get_cgroup_file(file_name) + + return fileutil.read_file(parameter_file) + + def _get_parameters(self, parameter_name, first_line_only=False): + """ + Retrieve the values of a parameter from a controller. + Returns a list of values in the file. + + :param first_line_only: return only the first line. + :param str parameter_name: Name of file within that metric controller + :return: The first line of the file, without line terminator + :rtype: [str] + """ + result = [] + try: + values = self._get_file_contents(parameter_name).splitlines() + result = values[0] if first_line_only else values + except IndexError: + parameter_filename = self._get_cgroup_file(parameter_name) + logger.error("File {0} is empty but should not be".format(parameter_filename)) + raise CGroupsException("File {0} is empty but should not be".format(parameter_filename)) + except Exception as e: + if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 + raise e + parameter_filename = self._get_cgroup_file(parameter_name) + raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e) + return result + + def is_active(self): + """ + Returns True if any processes belong to the cgroup. In v1, cgroup.procs returns a list of the thread group IDs + belong to the cgroup. In v2, cgroup.procs returns a list of the process IDs belonging to the cgroup. + """ + try: + procs = self._get_parameters("cgroup.procs") + if procs: + return len(procs) != 0 + except (IOError, OSError) as e: + if e.errno == errno.ENOENT: + # only suppressing file not found exceptions. + pass + else: + logger.periodic_warn(logger.EVERY_HALF_HOUR, + 'Could not get list of procs from "cgroup.procs" file in the cgroup: {0}.' + ' Internal error: {1}'.format(self.path, ustr(e))) + except CGroupsException as e: + logger.periodic_warn(logger.EVERY_HALF_HOUR, + 'Could not get list of procs from "cgroup.procs" file in the cgroup: {0}.' + ' Internal error: {1}'.format(self.path, ustr(e))) + return False + + def get_tracked_metrics(self, **_): + """ + Retrieves the current value of the metrics tracked for this controller/cgroup and returns them as an array. + """ + raise NotImplementedError() + + def get_unit_properties(self): + """ + Returns a list of the unit properties to collect for the controller. + """ + raise NotImplementedError() diff --git a/azurelinuxagent/ga/cgroupstelemetry.py b/azurelinuxagent/ga/cgroupstelemetry.py index e8efad0382..412f75f4f0 100644 --- a/azurelinuxagent/ga/cgroupstelemetry.py +++ b/azurelinuxagent/ga/cgroupstelemetry.py @@ -17,7 +17,7 @@ import threading from azurelinuxagent.common import logger -from azurelinuxagent.ga.controllermetrics import CpuMetrics +from azurelinuxagent.ga.cpucontroller import _CpuController from azurelinuxagent.common.future import ustr @@ -37,18 +37,18 @@ def get_track_throttled_time(): return CGroupsTelemetry._track_throttled_time @staticmethod - def track_cgroup(cgroup): + def track_cgroup_controller(cgroup_controller): """ - Adds the given item to the dictionary of tracked cgroups + Adds the given item to the dictionary of tracked cgroup controllers """ - if isinstance(cgroup, CpuMetrics): + if isinstance(cgroup_controller, _CpuController): # set the current cpu usage - cgroup.initialize_cpu_usage() + cgroup_controller.initialize_cpu_usage() with CGroupsTelemetry._rlock: - if not CGroupsTelemetry.is_tracked(cgroup.path): - CGroupsTelemetry._tracked[cgroup.path] = cgroup - logger.info("Started tracking cgroup {0}", cgroup) + if not CGroupsTelemetry.is_tracked(cgroup_controller.path): + CGroupsTelemetry._tracked[cgroup_controller.path] = cgroup_controller + logger.info("Started tracking cgroup {0}", cgroup_controller) @staticmethod def is_tracked(path): @@ -75,11 +75,11 @@ def stop_tracking(cgroup): @staticmethod def poll_all_tracked(): metrics = [] - inactive_cgroups = [] + inactive_controllers = [] with CGroupsTelemetry._rlock: - for cgroup in CGroupsTelemetry._tracked.values(): + for controller in CGroupsTelemetry._tracked.values(): try: - metrics.extend(cgroup.get_tracked_metrics(track_throttled_time=CGroupsTelemetry._track_throttled_time)) + metrics.extend(controller.get_tracked_metrics(track_throttled_time=CGroupsTelemetry._track_throttled_time)) except Exception as e: # There can be scenarios when the CGroup has been deleted by the time we are fetching the values # from it. This would raise IOError with file entry not found (ERRNO: 2). We do not want to log @@ -87,11 +87,11 @@ def poll_all_tracked(): # exceptions which could occur, which is why we do a periodic log for all the other errors. if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 logger.periodic_warn(logger.EVERY_HOUR, '[PERIODIC] Could not collect metrics for cgroup ' - '{0}. Error : {1}'.format(cgroup.name, ustr(e))) - if not cgroup.is_active(): - inactive_cgroups.append(cgroup) - for inactive_cgroup in inactive_cgroups: - CGroupsTelemetry.stop_tracking(inactive_cgroup) + '{0}. Error : {1}'.format(controller.name, ustr(e))) + if not controller.is_active(): + inactive_controllers.append(controller) + for inactive_controller in inactive_controllers: + CGroupsTelemetry.stop_tracking(inactive_controller) return metrics diff --git a/azurelinuxagent/ga/collect_logs.py b/azurelinuxagent/ga/collect_logs.py index d8ea3dba3d..488691a5aa 100644 --- a/azurelinuxagent/ga/collect_logs.py +++ b/azurelinuxagent/ga/collect_logs.py @@ -25,19 +25,17 @@ import azurelinuxagent.common.conf as conf from azurelinuxagent.common import logger -from azurelinuxagent.ga.controllermetrics import MetricsCounter -from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric +from azurelinuxagent.ga.cgroupcontroller import MetricsCounter +from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation from azurelinuxagent.common.future import ustr from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.ga.logcollector import COMPRESSED_ARCHIVE_PATH, GRACEFUL_KILL_ERRCODE -from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, LOGCOLLECTOR_MEMORY_LIMIT +from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, LOGCOLLECTOR_ANON_MEMORY_LIMIT_FOR_V1_AND_V2, LOGCOLLECTOR_CACHE_MEMORY_LIMIT_FOR_V1_AND_V2, LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils.shellutil import CommandError from azurelinuxagent.common.version import PY_VERSION_MAJOR, PY_VERSION_MINOR, AGENT_NAME, CURRENT_VERSION -_INITIAL_LOG_COLLECTION_DELAY = 5 * 60 # Five minutes of delay - def get_collect_logs_handler(): return CollectLogsHandler() @@ -46,18 +44,27 @@ def get_collect_logs_handler(): def is_log_collection_allowed(): # There are three conditions that need to be met in order to allow periodic log collection: # 1) It should be enabled in the configuration. - # 2) The system must be using cgroups to manage services. Needed for resource limiting of the log collection. + # 2) The system must be using cgroups to manage services - needed for resource limiting of the log collection. The + # agent currently fully supports resource limiting for v1, but only supports log collector resource limiting for v2 + # if enabled via configuration. + # This condition is True if either: + # a. cgroup usage in the agent is enabled; OR + # b. the machine is using cgroup v2 and v2 resource limiting is enabled in the configuration. # 3) The python version must be greater than 2.6 in order to support the ZipFile library used when collecting. conf_enabled = conf.get_collect_logs() cgroups_enabled = CGroupConfigurator.get_instance().enabled() + cgroup_v2_resource_limiting_enabled = CGroupConfigurator.get_instance().using_cgroup_v2() and conf.get_enable_cgroup_v2_resource_limiting() supported_python = PY_VERSION_MINOR >= 6 if PY_VERSION_MAJOR == 2 else PY_VERSION_MAJOR == 3 - is_allowed = conf_enabled and cgroups_enabled and supported_python + is_allowed = conf_enabled and (cgroups_enabled or cgroup_v2_resource_limiting_enabled) and supported_python msg = "Checking if log collection is allowed at this time [{0}]. All three conditions must be met: " \ - "configuration enabled [{1}], cgroups enabled [{2}], python supported: [{3}]".format(is_allowed, - conf_enabled, - cgroups_enabled, - supported_python) + "1. configuration enabled [{1}], " \ + "2. cgroups v1 enabled [{2}] OR cgroups v2 is in use and v2 resource limiting configuration enabled [{3}], " \ + "3. python supported: [{4}]".format(is_allowed, + conf_enabled, + cgroups_enabled, + cgroup_v2_resource_limiting_enabled, + supported_python) logger.info(msg) add_event( name=AGENT_NAME, @@ -144,7 +151,7 @@ def init_protocols(self): def daemon(self): # Delay the first collector on start up to give short lived VMs (that might be dead before the second # collection has a chance to run) an opportunity to do produce meaningful logs to collect. - time.sleep(_INITIAL_LOG_COLLECTION_DELAY) + time.sleep(conf.get_log_collector_initial_delay()) try: CollectLogsHandler.enable_monitor_cgroups_check() @@ -171,15 +178,13 @@ def collect_and_send_logs(self): def _collect_logs(self): logger.info("Starting log collection...") - # Invoke the command line tool in the agent to collect logs, with resource limits on CPU. - # Some distros like ubuntu20 by default cpu and memory accounting enabled. Thus create nested cgroups under the logcollector slice - # So disabling CPU and Memory accounting prevents from creating nested cgroups, so that all the counters will be present in logcollector Cgroup - + # Invoke the command line tool in the agent to collect logs. The --scope option starts the process as a systemd + # transient scope unit. The --property option is used to set systemd memory and cpu properties on the scope. systemd_cmd = [ - "systemd-run", "--property=CPUAccounting=no", "--property=MemoryAccounting=no", + "systemd-run", "--unit={0}".format(logcollector.CGROUPS_UNIT), "--slice={0}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE), "--scope" - ] + ] + CGroupConfigurator.get_instance().get_logcollector_unit_properties() # The log tool is invoked from the current agent's egg with the command line option collect_logs_cmd = [sys.executable, "-u", sys.argv[0], "-collect-logs"] @@ -208,8 +213,7 @@ def exec_command(): # pylint has limited (i.e. no) awareness of control flow w.r.t. typing. we disable=no-member # here because we know e must be a CommandError but pylint still considers the case where # e is a different type of exception. - err_msg = ustr("Log Collector exited with code {0}").format( - e.returncode) # pylint: disable=no-member + err_msg = ustr("Log Collector exited with code {0}").format(e.returncode) # pylint: disable=no-member if e.returncode == logcollector.INVALID_CGROUPS_ERRCODE: # pylint: disable=no-member logger.info("Disabling periodic log collection until service restart due to process error.") @@ -262,8 +266,8 @@ def _send_logs(self): log_event=False) -def get_log_collector_monitor_handler(cgroups): - return LogCollectorMonitorHandler(cgroups) +def get_log_collector_monitor_handler(controllers): + return LogCollectorMonitorHandler(controllers) class LogCollectorMonitorHandler(ThreadHandlerInterface): @@ -277,12 +281,13 @@ class LogCollectorMonitorHandler(ThreadHandlerInterface): def get_thread_name(): return LogCollectorMonitorHandler._THREAD_NAME - def __init__(self, cgroups): + def __init__(self, controllers): self.event_thread = None self.should_run = True self.period = 2 # Log collector monitor runs every 2 secs. - self.cgroups = cgroups - self.__log_metrics = conf.get_cgroup_log_metrics() + self.controllers = controllers + self.max_recorded_metrics = {} + self.__should_log_metrics = conf.get_cgroup_log_metrics() def run(self): self.start() @@ -312,7 +317,8 @@ def daemon(self): while not self.stopped(): try: metrics = self._poll_resource_usage() - self._send_telemetry(metrics) + if self.__should_log_metrics: + self._log_metrics(metrics) self._verify_memory_limit(metrics) except Exception as e: logger.error("An error occurred in the log collection monitor thread loop; " @@ -324,30 +330,54 @@ def daemon(self): "An error occurred in the MonitorLogCollectorCgroupsHandler thread; will exit the thread.\n{0}", ustr(e)) + def get_max_recorded_metrics(self): + return self.max_recorded_metrics + def _poll_resource_usage(self): metrics = [] - for cgroup in self.cgroups: - metrics.extend(cgroup.get_tracked_metrics(track_throttled_time=True)) + for controller in self.controllers: + metrics.extend(controller.get_tracked_metrics(track_throttled_time=True)) + + for metric in metrics: + current_max = self.max_recorded_metrics.get(metric.counter) + self.max_recorded_metrics[metric.counter] = metric.value if current_max is None else max(current_max, metric.value) + return metrics - def _send_telemetry(self, metrics): + def _log_metrics(self, metrics): for metric in metrics: - report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics) + logger.info("Metric {0}/{1} [{2}] = {3}".format(metric.category, metric.counter, metric.instance, metric.value)) def _verify_memory_limit(self, metrics): - current_usage = 0 + current_anon_and_swap_usage = 0 + current_cache_usage = 0 + memory_throttled_events = 0 for metric in metrics: - if metric.counter == MetricsCounter.TOTAL_MEM_USAGE: - current_usage += metric.value + if metric.counter == MetricsCounter.ANON_MEM_USAGE: + current_anon_and_swap_usage += metric.value elif metric.counter == MetricsCounter.SWAP_MEM_USAGE: - current_usage += metric.value - - if current_usage > LOGCOLLECTOR_MEMORY_LIMIT: - msg = "Log collector memory limit {0} bytes exceeded. The max reported usage is {1} bytes.".format(LOGCOLLECTOR_MEMORY_LIMIT, current_usage) + current_anon_and_swap_usage += metric.value + elif metric.counter == MetricsCounter.CACHE_MEM_USAGE: + current_cache_usage = metric.value + elif metric.counter == MetricsCounter.MEM_THROTTLED: + memory_throttled_events = metric.value + + mem_limit_exceeded = False + if current_anon_and_swap_usage > LOGCOLLECTOR_ANON_MEMORY_LIMIT_FOR_V1_AND_V2: + mem_limit_exceeded = True + msg = "Log collector anon + swap memory limit {0} bytes exceeded. The reported usage is {1} bytes.".format(LOGCOLLECTOR_ANON_MEMORY_LIMIT_FOR_V1_AND_V2, current_anon_and_swap_usage) logger.info(msg) - add_event( - name=AGENT_NAME, - version=CURRENT_VERSION, - op=WALAEventOperation.LogCollection, - message=msg) + add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.LogCollection, message=msg) + if current_cache_usage > LOGCOLLECTOR_CACHE_MEMORY_LIMIT_FOR_V1_AND_V2: + mem_limit_exceeded = True + msg = "Log collector cache memory limit {0} bytes exceeded. The reported usage is {1} bytes.".format(LOGCOLLECTOR_CACHE_MEMORY_LIMIT_FOR_V1_AND_V2, current_cache_usage) + logger.info(msg) + add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.LogCollection, message=msg) + if memory_throttled_events > LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2: + mem_limit_exceeded = True + msg = "Log collector memory throttled events limit {0} exceeded. The reported number of throttled events is {1}.".format(LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2, memory_throttled_events) + logger.info(msg) + add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.LogCollection, message=msg) + + if mem_limit_exceeded: os._exit(GRACEFUL_KILL_ERRCODE) diff --git a/azurelinuxagent/ga/controllermetrics.py b/azurelinuxagent/ga/controllermetrics.py deleted file mode 100644 index 3aaeab3193..0000000000 --- a/azurelinuxagent/ga/controllermetrics.py +++ /dev/null @@ -1,404 +0,0 @@ -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.6+ and Openssl 1.0+ - -import errno -import os -import re -from datetime import timedelta - -from azurelinuxagent.common import logger, conf -from azurelinuxagent.common.exception import CGroupsException -from azurelinuxagent.common.future import ustr -from azurelinuxagent.common.osutil import get_osutil -from azurelinuxagent.common.utils import fileutil - -_REPORT_EVERY_HOUR = timedelta(hours=1) -_DEFAULT_REPORT_PERIOD = timedelta(seconds=conf.get_cgroup_check_period()) - -AGENT_NAME_TELEMETRY = "walinuxagent.service" # Name used for telemetry; it needs to be consistent even if the name of the service changes -AGENT_LOG_COLLECTOR = "azure-walinuxagent-logcollector" - - -class CounterNotFound(Exception): - pass - - -class MetricValue(object): - - """ - Class for defining all the required metric fields to send telemetry. - """ - - def __init__(self, category, counter, instance, value, report_period=_DEFAULT_REPORT_PERIOD): - self._category = category - self._counter = counter - self._instance = instance - self._value = value - self._report_period = report_period - - @property - def category(self): - return self._category - - @property - def counter(self): - return self._counter - - @property - def instance(self): - return self._instance - - @property - def value(self): - return self._value - - @property - def report_period(self): - return self._report_period - - -class MetricsCategory(object): - MEMORY_CATEGORY = "Memory" - CPU_CATEGORY = "CPU" - - -class MetricsCounter(object): - PROCESSOR_PERCENT_TIME = "% Processor Time" - TOTAL_MEM_USAGE = "Total Memory Usage" - MAX_MEM_USAGE = "Max Memory Usage" - THROTTLED_TIME = "Throttled Time" - SWAP_MEM_USAGE = "Swap Memory Usage" - AVAILABLE_MEM = "Available MBytes" - USED_MEM = "Used MBytes" - - -re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n') - - -class ControllerMetrics(object): - def __init__(self, name, cgroup_path): - """ - Initialize _data collection for the Memory controller - :param: name: Name of the CGroup - :param: cgroup_path: Path of the controller - :return: - """ - self.name = name - self.path = cgroup_path - - def __str__(self): - return "{0} [{1}]".format(self.name, self.path) - - def _get_cgroup_file(self, file_name): - return os.path.join(self.path, file_name) - - def _get_file_contents(self, file_name): - """ - Retrieve the contents to file. - - :param str file_name: Name of file within that metric controller - :return: Entire contents of the file - :rtype: str - """ - parameter_file = self._get_cgroup_file(file_name) - - return fileutil.read_file(parameter_file) - - def _get_parameters(self, parameter_name, first_line_only=False): - """ - Retrieve the values of a parameter from a controller. - Returns a list of values in the file. - - :param first_line_only: return only the first line. - :param str parameter_name: Name of file within that metric controller - :return: The first line of the file, without line terminator - :rtype: [str] - """ - result = [] - try: - values = self._get_file_contents(parameter_name).splitlines() - result = values[0] if first_line_only else values - except IndexError: - parameter_filename = self._get_cgroup_file(parameter_name) - logger.error("File {0} is empty but should not be".format(parameter_filename)) - raise CGroupsException("File {0} is empty but should not be".format(parameter_filename)) - except Exception as e: - if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 - raise e - parameter_filename = self._get_cgroup_file(parameter_name) - raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e) - return result - - def is_active(self): - try: - tasks = self._get_parameters("tasks") - if tasks: - return len(tasks) != 0 - except (IOError, OSError) as e: - if e.errno == errno.ENOENT: - # only suppressing file not found exceptions. - pass - else: - logger.periodic_warn(logger.EVERY_HALF_HOUR, - 'Could not get list of tasks from "tasks" file in the cgroup: {0}.' - ' Internal error: {1}'.format(self.path, ustr(e))) - except CGroupsException as e: - logger.periodic_warn(logger.EVERY_HALF_HOUR, - 'Could not get list of tasks from "tasks" file in the cgroup: {0}.' - ' Internal error: {1}'.format(self.path, ustr(e))) - return False - - def get_tracked_metrics(self, **_): - """ - Retrieves the current value of the metrics tracked for this cgroup and returns them as an array. - - Note: Agent won't track the metrics if the current cpu ticks less than previous value and returns empty array. - """ - raise NotImplementedError() - - def get_unit_properties(self): - """ - Returns a list of the unit properties to collect for the controller. - """ - raise NotImplementedError() - - -class CpuMetrics(ControllerMetrics): - def __init__(self, name, cgroup_path): - super(CpuMetrics, self).__init__(name, cgroup_path) - - self._osutil = get_osutil() - self._previous_cgroup_cpu = None - self._previous_system_cpu = None - self._current_cgroup_cpu = None - self._current_system_cpu = None - self._previous_throttled_time = None - self._current_throttled_time = None - - def _get_cpu_ticks(self, allow_no_such_file_or_directory_error=False): - """ - Returns the number of USER_HZ of CPU time (user and system) consumed by this cgroup. - - If allow_no_such_file_or_directory_error is set to True and cpuacct.stat does not exist the function - returns 0; this is useful when the function can be called before the cgroup has been created. - """ - try: - cpuacct_stat = self._get_file_contents('cpuacct.stat') - except Exception as e: - if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 - raise CGroupsException("Failed to read cpuacct.stat: {0}".format(ustr(e))) - if not allow_no_such_file_or_directory_error: - raise e - cpuacct_stat = None - - cpu_ticks = 0 - - if cpuacct_stat is not None: - # - # Sample file: - # # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpuacct.stat - # user 10190 - # system 3160 - # - match = re_user_system_times.match(cpuacct_stat) - if not match: - raise CGroupsException( - "The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat)) - cpu_ticks = int(match.groups()[0]) + int(match.groups()[1]) - - return cpu_ticks - - def get_throttled_time(self): - try: - with open(os.path.join(self.path, 'cpu.stat')) as cpu_stat: - # - # Sample file: - # - # # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpu.stat - # nr_periods 51660 - # nr_throttled 19461 - # throttled_time 1529590856339 - # - for line in cpu_stat: - match = re.match(r'throttled_time\s+(\d+)', line) - if match is not None: - return int(match.groups()[0]) - raise Exception("Cannot find throttled_time") - except (IOError, OSError) as e: - if e.errno == errno.ENOENT: - return 0 - raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) - except Exception as e: - raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) - - def _cpu_usage_initialized(self): - return self._current_cgroup_cpu is not None and self._current_system_cpu is not None - - def initialize_cpu_usage(self): - """ - Sets the initial values of CPU usage. This function must be invoked before calling get_cpu_usage(). - """ - if self._cpu_usage_initialized(): - raise CGroupsException("initialize_cpu_usage() should be invoked only once") - self._current_cgroup_cpu = self._get_cpu_ticks(allow_no_such_file_or_directory_error=True) - self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() - self._current_throttled_time = self.get_throttled_time() - - def get_cpu_usage(self): - """ - Computes the CPU used by the cgroup since the last call to this function. - - The usage is measured as a percentage of utilization of 1 core in the system. For example, - using 1 core all of the time on a 4-core system would be reported as 100%. - - NOTE: initialize_cpu_usage() must be invoked before calling get_cpu_usage() - """ - if not self._cpu_usage_initialized(): - raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()") - - self._previous_cgroup_cpu = self._current_cgroup_cpu - self._previous_system_cpu = self._current_system_cpu - self._current_cgroup_cpu = self._get_cpu_ticks() - self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() - - cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu - system_delta = max(1, self._current_system_cpu - self._previous_system_cpu) - - return round(100.0 * self._osutil.get_processor_cores() * float(cgroup_delta) / float(system_delta), 3) - - def get_cpu_throttled_time(self, read_previous_throttled_time=True): - """ - Computes the throttled time (in seconds) since the last call to this function. - NOTE: initialize_cpu_usage() must be invoked before calling this function - Compute only current throttled time if read_previous_throttled_time set to False - """ - if not read_previous_throttled_time: - return float(self.get_throttled_time() / 1E9) - - if not self._cpu_usage_initialized(): - raise CGroupsException( - "initialize_cpu_usage() must be invoked before the first call to get_throttled_time()") - - self._previous_throttled_time = self._current_throttled_time - self._current_throttled_time = self.get_throttled_time() - - return float(self._current_throttled_time - self._previous_throttled_time) / 1E9 - - def get_tracked_metrics(self, **kwargs): - tracked = [] - cpu_usage = self.get_cpu_usage() - if cpu_usage >= float(0): - tracked.append( - MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage)) - - if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']: - throttled_time = self.get_cpu_throttled_time() - if cpu_usage >= float(0) and throttled_time >= float(0): - tracked.append( - MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time)) - - return tracked - - def get_unit_properties(self): - return ["CPUAccounting", "CPUQuotaPerSecUSec"] - - -class MemoryMetrics(ControllerMetrics): - def __init__(self, name, cgroup_path): - super(MemoryMetrics, self).__init__(name, cgroup_path) - - self._counter_not_found_error_count = 0 - - def _get_memory_stat_counter(self, counter_name): - try: - with open(os.path.join(self.path, 'memory.stat')) as memory_stat: - # cat /sys/fs/cgroup/memory/azure.slice/memory.stat - # cache 67178496 - # rss 42340352 - # rss_huge 6291456 - # swap 0 - for line in memory_stat: - re_memory_counter = r'{0}\s+(\d+)'.format(counter_name) - match = re.match(re_memory_counter, line) - if match is not None: - return int(match.groups()[0]) - except (IOError, OSError) as e: - if e.errno == errno.ENOENT: - raise - raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e))) - except Exception as e: - raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e))) - - raise CounterNotFound("Cannot find counter: {0}".format(counter_name)) - - def get_memory_usage(self): - """ - Collect RSS+CACHE from memory.stat cgroup. - - :return: Memory usage in bytes - :rtype: int - """ - - cache = self._get_memory_stat_counter("cache") - rss = self._get_memory_stat_counter("rss") - return cache + rss - - def try_swap_memory_usage(self): - """ - Collect SWAP from memory.stat cgroup. - - :return: Memory usage in bytes - :rtype: int - Note: stat file is the only place to get the SWAP since other swap related file memory.memsw.usage_in_bytes is for total Memory+SWAP. - """ - try: - return self._get_memory_stat_counter("swap") - except CounterNotFound as e: - if self._counter_not_found_error_count < 1: - logger.periodic_info(logger.EVERY_HALF_HOUR, - '{0} from "memory.stat" file in the cgroup: {1}---[Note: This log for informational purpose only and can be ignored]'.format(ustr(e), self.path)) - self._counter_not_found_error_count += 1 - return 0 - - def get_max_memory_usage(self): - """ - Collect memory.max_usage_in_bytes from the cgroup. - - :return: Memory usage in bytes - :rtype: int - """ - usage = 0 - try: - usage = int(self._get_parameters('memory.max_usage_in_bytes', first_line_only=True)) - except Exception as e: - if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 - raise - raise CGroupsException("Exception while attempting to read {0}".format("memory.max_usage_in_bytes"), e) - - return usage - - def get_tracked_metrics(self, **_): - return [ - MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, self.name, - self.get_memory_usage()), - MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MAX_MEM_USAGE, self.name, - self.get_max_memory_usage(), _REPORT_EVERY_HOUR), - MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name, - self.try_swap_memory_usage(), _REPORT_EVERY_HOUR) - ] - - def get_unit_properties(self): - return["MemoryAccounting"] diff --git a/azurelinuxagent/ga/cpucontroller.py b/azurelinuxagent/ga/cpucontroller.py new file mode 100644 index 0000000000..b4f56dd150 --- /dev/null +++ b/azurelinuxagent/ga/cpucontroller.py @@ -0,0 +1,293 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import errno +import os +import re + +from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.osutil import get_osutil +from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.ga.cgroupcontroller import _CgroupController, MetricValue, MetricsCategory, MetricsCounter + +re_v1_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n') +re_v2_usage_time = re.compile(r'[\s\S]*usage_usec (\d+)[\s\S]*') + + +class _CpuController(_CgroupController): + def __init__(self, name, cgroup_path): + super(_CpuController, self).__init__(name, cgroup_path) + + self._osutil = get_osutil() + self._previous_cgroup_cpu = None + self._previous_system_cpu = None + self._current_cgroup_cpu = None + self._current_system_cpu = None + self._previous_throttled_time = None + self._current_throttled_time = None + + def _get_cpu_stat_counter(self, counter_name): + """ + Gets the value for the provided counter in cpu.stat + """ + try: + with open(os.path.join(self.path, 'cpu.stat')) as cpu_stat: + # + # Sample file v1: + # # cat cpu.stat + # nr_periods 51660 + # nr_throttled 19461 + # throttled_time 1529590856339 + # + # Sample file v2 + # # cat cpu.stat + # usage_usec 200161503 + # user_usec 199388368 + # system_usec 773134 + # core_sched.force_idle_usec 0 + # nr_periods 40059 + # nr_throttled 40022 + # throttled_usec 3565247992 + # nr_bursts 0 + # burst_usec 0 + # + for line in cpu_stat: + match = re.match(r'{0}\s+(\d+)'.format(counter_name), line) + if match is not None: + return int(match.groups()[0]) + raise Exception("Cannot find {0}".format(counter_name)) + except (IOError, OSError) as e: + if e.errno == errno.ENOENT: + return 0 + raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) + except Exception as e: + raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) + + def _cpu_usage_initialized(self): + """ + Returns True if cpu usage has been initialized, False otherwise. + """ + return self._current_cgroup_cpu is not None and self._current_system_cpu is not None + + def initialize_cpu_usage(self): + """ + Sets the initial values of CPU usage. This function must be invoked before calling get_cpu_usage(). + """ + raise NotImplementedError() + + def get_cpu_usage(self): + """ + Computes the CPU used by the cgroup since the last call to this function. + + The usage is measured as a percentage of utilization of 1 core in the system. For example, + using 1 core all of the time on a 4-core system would be reported as 100%. + + NOTE: initialize_cpu_usage() must be invoked before calling get_cpu_usage() + """ + raise NotImplementedError() + + def get_cpu_throttled_time(self, read_previous_throttled_time=True): + """ + Computes the throttled time (in seconds) since the last call to this function. + NOTE: initialize_cpu_usage() must be invoked before calling this function + Compute only current throttled time if read_previous_throttled_time set to False + """ + raise NotImplementedError() + + def get_tracked_metrics(self, **kwargs): + # Note: If the current cpu usage is less than the previous usage (metric is negative), then an empty array will + # be returned and the agent won't track the metrics. + tracked = [] + cpu_usage = self.get_cpu_usage() + if cpu_usage >= float(0): + tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage)) + + if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']: + throttled_time = self.get_cpu_throttled_time() + if cpu_usage >= float(0) and throttled_time >= float(0): + tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time)) + + return tracked + + def get_unit_properties(self): + return ["CPUAccounting", "CPUQuotaPerSecUSec"] + + +class CpuControllerV1(_CpuController): + def initialize_cpu_usage(self): + if self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() should be invoked only once") + self._current_cgroup_cpu = self._get_cpu_ticks(allow_no_such_file_or_directory_error=True) + self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() + self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_time') + + def _get_cpu_ticks(self, allow_no_such_file_or_directory_error=False): + """ + Returns the number of USER_HZ of CPU time (user and system) consumed by this cgroup. + + If allow_no_such_file_or_directory_error is set to True and cpuacct.stat does not exist the function + returns 0; this is useful when the function can be called before the cgroup has been created. + """ + try: + cpuacct_stat = self._get_file_contents('cpuacct.stat') + except Exception as e: + if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 + raise CGroupsException("Failed to read cpuacct.stat: {0}".format(ustr(e))) + if not allow_no_such_file_or_directory_error: + raise e + cpuacct_stat = None + + cpu_ticks = 0 + + if cpuacct_stat is not None: + # + # Sample file: + # # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpuacct.stat + # user 10190 + # system 3160 + # + match = re_v1_user_system_times.match(cpuacct_stat) + if not match: + raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat)) + cpu_ticks = int(match.groups()[0]) + int(match.groups()[1]) + + return cpu_ticks + + def get_cpu_usage(self): + if not self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()") + + self._previous_cgroup_cpu = self._current_cgroup_cpu + self._previous_system_cpu = self._current_system_cpu + self._current_cgroup_cpu = self._get_cpu_ticks() + self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() + + cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu + system_delta = max(1, self._current_system_cpu - self._previous_system_cpu) + + return round(100.0 * self._osutil.get_processor_cores() * float(cgroup_delta) / float(system_delta), 3) + + def get_cpu_throttled_time(self, read_previous_throttled_time=True): + # Throttled time is reported in nanoseconds in v1 + if not read_previous_throttled_time: + return float(self._get_cpu_stat_counter(counter_name='throttled_time') / 1E9) + + if not self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_throttled_time()") + + self._previous_throttled_time = self._current_throttled_time + self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_time') + + return round(float(self._current_throttled_time - self._previous_throttled_time) / 1E9, 3) + + +class CpuControllerV2(_CpuController): + @staticmethod + def get_system_uptime(): + """ + Get the uptime of the system (including time spent in suspend) in seconds. + /proc/uptime contains two numbers (values in seconds): the uptime of the system (including time spent in + suspend) and the amount of time spent in the idle process: + # cat /proc/uptime + 365380.48 722644.81 + + :return: System uptime in seconds + :rtype: float + """ + uptime_contents = fileutil.read_file('/proc/uptime').split() + return float(uptime_contents[0]) + + def _get_system_usage(self): + try: + return self.get_system_uptime() + except (OSError, IOError) as e: + raise CGroupsException("Couldn't read /proc/uptime: {0}".format(ustr(e))) + except Exception as e: + raise CGroupsException("Couldn't parse /proc/uptime: {0}".format(ustr(e))) + + def initialize_cpu_usage(self): + if self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() should be invoked only once") + self._current_cgroup_cpu = self._get_cpu_time(allow_no_such_file_or_directory_error=True) + self._current_system_cpu = self._get_system_usage() + self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_usec') + + def _get_cpu_time(self, allow_no_such_file_or_directory_error=False): + """ + Returns the CPU time (user and system) consumed by this cgroup in seconds. + + If allow_no_such_file_or_directory_error is set to True and cpu.stat does not exist the function + returns 0; this is useful when the function can be called before the cgroup has been created. + """ + try: + cpu_stat = self._get_file_contents('cpu.stat') + except Exception as e: + if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 + raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) + if not allow_no_such_file_or_directory_error: + raise e + cpu_stat = None + + cpu_time = 0 + + if cpu_stat is not None: + # + # Sample file: + # # cat /sys/fs/cgroup/azure.slice/azure-walinuxagent.slice/azure-walinuxagent-logcollector.slice/collect-logs.scope/cpu.stat + # usage_usec 1990707 + # user_usec 1939858 + # system_usec 50848 + # core_sched.force_idle_usec 0 + # nr_periods 397 + # nr_throttled 397 + # throttled_usec 37994949 + # nr_bursts 0 + # burst_usec 0 + # + match = re_v2_usage_time.match(cpu_stat) + if not match: + raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpu.stat'), cpu_stat)) + cpu_time = int(match.groups()[0]) / 1E6 + + return cpu_time + + def get_cpu_usage(self): + if not self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()") + + self._previous_cgroup_cpu = self._current_cgroup_cpu + self._previous_system_cpu = self._current_system_cpu + self._current_cgroup_cpu = self._get_cpu_time() + self._current_system_cpu = self._get_system_usage() + + cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu + system_delta = max(1.0, self._current_system_cpu - self._previous_system_cpu) + + return round(100.0 * float(cgroup_delta) / float(system_delta), 3) + + def get_cpu_throttled_time(self, read_previous_throttled_time=True): + # Throttled time is reported in microseconds in v2 + if not read_previous_throttled_time: + return float(self._get_cpu_stat_counter(counter_name='throttled_usec') / 1E6) + + if not self._cpu_usage_initialized(): + raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_throttled_time()") + + self._previous_throttled_time = self._current_throttled_time + self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_usec') + + return round(float(self._current_throttled_time - self._previous_throttled_time) / 1E6, 3) diff --git a/azurelinuxagent/ga/extensionprocessutil.py b/azurelinuxagent/ga/extensionprocessutil.py index 9061fd3fff..8eb65d459b 100644 --- a/azurelinuxagent/ga/extensionprocessutil.py +++ b/azurelinuxagent/ga/extensionprocessutil.py @@ -31,7 +31,7 @@ TELEMETRY_MESSAGE_MAX_LEN = 3200 -def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics): +def wait_for_process_completion_or_timeout(process, timeout, cpu_controller): """ Utility function that waits for the process to complete within the given time frame. This function will terminate the process if when the given time frame elapses. @@ -47,7 +47,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics): throttled_time = 0 if timeout == 0: - throttled_time = get_cpu_throttled_time(cpu_metrics) + throttled_time = get_cpu_throttled_time(cpu_controller) os.killpg(os.getpgid(process.pid), signal.SIGKILL) else: # process completed or forked; sleep 1 sec to give the child process (if any) a chance to start @@ -57,7 +57,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics): return timeout == 0, return_code, throttled_time -def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_metrics=None): +def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_controller=None): """ Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised. @@ -68,15 +68,15 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c :param stdout: Must be a file since we seek on it when parsing the subprocess output :param stderr: Must be a file since we seek on it when parsing the subprocess outputs :param error_code: The error code to set if we raise an ExtensionError - :param cpu_metrics: References the cpu metrics for the cgroup + :param cpu_controller: References the cpu controller for the cgroup :return: """ # Wait for process completion or timeout - timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_metrics) + timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_controller) process_output = read_output(stdout, stderr) if timed_out: - if cpu_metrics is not None: # Report CPUThrottledTime when timeout happens + if cpu_controller is not None: # Report CPUThrottledTime when timeout happens raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output), code=ExtensionErrorCodes.PluginHandlerScriptTimedout) @@ -211,14 +211,14 @@ def to_s(captured_stdout, stdout_offset, captured_stderr, stderr_offset): return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each) -def get_cpu_throttled_time(cpu_metrics): +def get_cpu_throttled_time(cpu_controller): """ return the throttled time for the given cgroup. """ throttled_time = 0 - if cpu_metrics is not None: + if cpu_controller is not None: try: - throttled_time = cpu_metrics.get_cpu_throttled_time(read_previous_throttled_time=False) + throttled_time = cpu_controller.get_cpu_throttled_time(read_previous_throttled_time=False) except Exception as e: logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e)) diff --git a/azurelinuxagent/ga/logcollector.py b/azurelinuxagent/ga/logcollector.py index eda6106b65..dfd5bfaf1e 100644 --- a/azurelinuxagent/ga/logcollector.py +++ b/azurelinuxagent/ga/logcollector.py @@ -314,21 +314,21 @@ def _get_final_list_for_archive(self, priority_file_queue): if os.path.getsize(file_path) <= _FILE_SIZE_LIMIT: final_files_to_collect.append(file_path) + total_uncompressed_size += file_size _LOGGER.info("Adding file %s, size %s b", file_path, file_size) else: truncated_file_path = self._truncate_large_file(file_path) if truncated_file_path: _LOGGER.info("Adding truncated file %s, size %s b", truncated_file_path, file_size) final_files_to_collect.append(truncated_file_path) - - total_uncompressed_size += file_size + total_uncompressed_size += file_size except IOError as e: if e.errno == 2: # [Errno 2] No such file or directory _LOGGER.warning("File %s does not exist, skipping collection for this file", file_path) _LOGGER.info("Uncompressed archive size is %s b", total_uncompressed_size) - return final_files_to_collect + return final_files_to_collect, total_uncompressed_size def _create_list_of_files_to_collect(self): # The final list of files to be collected by zip is created in three steps: @@ -338,8 +338,8 @@ def _create_list_of_files_to_collect(self): # the size limit. parsed_file_paths = self._process_manifest_file() prioritized_file_paths = self._get_priority_files_list(parsed_file_paths) - files_to_collect = self._get_final_list_for_archive(prioritized_file_paths) - return files_to_collect + files_to_collect, total_uncompressed_size = self._get_final_list_for_archive(prioritized_file_paths) + return files_to_collect, total_uncompressed_size def collect_logs_and_get_archive(self): """ @@ -347,6 +347,7 @@ def collect_logs_and_get_archive(self): :return: Returns the path of the collected compressed archive """ files_to_collect = [] + total_uncompressed_size = 0 try: # Clear previous run's output and create base directories if they don't exist already. @@ -356,7 +357,7 @@ def collect_logs_and_get_archive(self): _LOGGER.info("Starting log collection at %s", start_time.strftime("%Y-%m-%dT%H:%M:%SZ")) _LOGGER.info("Using log collection mode %s", "full" if self._is_full_mode else "normal") - files_to_collect = self._create_list_of_files_to_collect() + files_to_collect, total_uncompressed_size = self._create_list_of_files_to_collect() _LOGGER.info("### Creating compressed archive ###") compressed_archive = None @@ -402,7 +403,7 @@ def handle_add_file_to_archive_error(error_count, max_errors, file_to_collect, e if compressed_archive is not None: compressed_archive.close() - return COMPRESSED_ARCHIVE_PATH + return COMPRESSED_ARCHIVE_PATH, total_uncompressed_size except Exception as e: msg = "Failed to collect logs: {0}".format(ustr(e)) _LOGGER.error(msg) diff --git a/azurelinuxagent/ga/memorycontroller.py b/azurelinuxagent/ga/memorycontroller.py new file mode 100644 index 0000000000..30e7540cf3 --- /dev/null +++ b/azurelinuxagent/ga/memorycontroller.py @@ -0,0 +1,220 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.6+ and Openssl 1.0+ + +import errno +import os +import re + +from azurelinuxagent.common import logger +from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.common.future import ustr +from azurelinuxagent.ga.cgroupcontroller import _CgroupController, CounterNotFound, MetricValue, MetricsCategory, \ + MetricsCounter, _REPORT_EVERY_HOUR + + +class _MemoryController(_CgroupController): + def __init__(self, name, cgroup_path): + super(_MemoryController, self).__init__(name, cgroup_path) + self._counter_not_found_error_count = 0 + + def _get_memory_stat_counter(self, counter_name): + """ + Gets the value for the provided counter in memory.stat + """ + try: + with open(os.path.join(self.path, 'memory.stat')) as memory_stat: + # + # Sample file v1: + # # cat memory.stat + # cache 0 + # rss 0 + # rss_huge 0 + # shmem 0 + # mapped_file 0 + # dirty 0 + # writeback 0 + # swap 0 + # ... + # + # Sample file v2 + # # cat memory.stat + # anon 0 + # file 147140608 + # kernel 1421312 + # kernel_stack 0 + # pagetables 0 + # sec_pagetables 0 + # percpu 130752 + # sock 0 + # ... + # + for line in memory_stat: + re_memory_counter = r'{0}\s+(\d+)'.format(counter_name) + match = re.match(re_memory_counter, line) + if match is not None: + return int(match.groups()[0]) + except (IOError, OSError) as e: + if e.errno == errno.ENOENT: + raise + raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e))) + except Exception as e: + raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e))) + + raise CounterNotFound("Cannot find counter: {0}".format(counter_name)) + + def get_memory_usage(self): + """ + Collects anon and cache usage for the cgroup and returns as a tuple + Returns anon and cache memory usage for the cgroup as a tuple -> (anon, cache) + + :return: Anon and cache memory usage in bytes + :rtype: tuple[int, int] + """ + raise NotImplementedError() + + def try_swap_memory_usage(self): + """ + Collects swap usage for the cgroup + + :return: Memory usage in bytes + :rtype: int + """ + raise NotImplementedError() + + def get_max_memory_usage(self): + """ + Collect max memory usage for the cgroup. + + :return: Memory usage in bytes + :rtype: int + """ + raise NotImplementedError() + + def get_tracked_metrics(self, **_): + # The log collector monitor tracks anon and cache memory separately. + anon_mem_usage, cache_mem_usage = self.get_memory_usage() + total_mem_usage = anon_mem_usage + cache_mem_usage + return [ + MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, self.name, total_mem_usage), + MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.ANON_MEM_USAGE, self.name, anon_mem_usage), + MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.CACHE_MEM_USAGE, self.name, cache_mem_usage), + MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MAX_MEM_USAGE, self.name, + self.get_max_memory_usage(), _REPORT_EVERY_HOUR), + MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name, + self.try_swap_memory_usage(), _REPORT_EVERY_HOUR) + ] + + def get_unit_properties(self): + return["MemoryAccounting"] + + +class MemoryControllerV1(_MemoryController): + def get_memory_usage(self): + # In v1, anon memory is reported in the 'rss' counter + return self._get_memory_stat_counter("rss"), self._get_memory_stat_counter("cache") + + def try_swap_memory_usage(self): + # In v1, swap memory should be collected from memory.stat, because memory.memsw.usage_in_bytes reports total Memory+SWAP. + try: + return self._get_memory_stat_counter("swap") + except CounterNotFound as e: + if self._counter_not_found_error_count < 1: + logger.periodic_info(logger.EVERY_HALF_HOUR, + '{0} from "memory.stat" file in the cgroup: {1}---[Note: This log for informational purpose only and can be ignored]'.format(ustr(e), self.path)) + self._counter_not_found_error_count += 1 + return 0 + + def get_max_memory_usage(self): + # In v1, max memory usage is reported in memory.max_usage_in_bytes + usage = 0 + try: + usage = int(self._get_parameters('memory.max_usage_in_bytes', first_line_only=True)) + except Exception as e: + if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 + raise + raise CGroupsException("Exception while attempting to read {0}".format("memory.max_usage_in_bytes"), e) + + return usage + + +class MemoryControllerV2(_MemoryController): + def get_memory_usage(self): + # In v2, cache memory is reported in the 'file' counter + return self._get_memory_stat_counter("anon"), self._get_memory_stat_counter("file") + + def get_memory_throttled_events(self): + """ + Returns the number of times processes of the cgroup are throttled and routed to perform memory recliam because + the high memory boundary was exceeded. + + :return: Number of memory throttling events for the cgroup + :rtype: int + """ + try: + with open(os.path.join(self.path, 'memory.events')) as memory_events: + # + # Sample file: + # # cat memory.events + # low 0 + # high 0 + # max 0 + # oom 0 + # oom_kill 0 + # oom_group_kill 0 + # + for line in memory_events: + match = re.match(r'high\s+(\d+)', line) + if match is not None: + return int(match.groups()[0]) + except (IOError, OSError) as e: + if e.errno == errno.ENOENT: + raise + raise CGroupsException("Failed to read memory.events: {0}".format(ustr(e))) + except Exception as e: + raise CGroupsException("Failed to read memory.events: {0}".format(ustr(e))) + + raise CounterNotFound("Cannot find memory.events counter: high") + + def try_swap_memory_usage(self): + # In v2, swap memory is reported in memory.swap.current + usage = 0 + try: + usage = int(self._get_parameters('memory.swap.current', first_line_only=True)) + except Exception as e: + if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 + raise + raise CGroupsException("Exception while attempting to read {0}".format("memory.swap.current"), e) + + return usage + + def get_max_memory_usage(self): + # In v2, max memory usage is reported in memory.peak + usage = 0 + try: + usage = int(self._get_parameters('memory.peak', first_line_only=True)) + except Exception as e: + if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101 + raise + raise CGroupsException("Exception while attempting to read {0}".format("memory.peak"), e) + + return usage + + def get_tracked_metrics(self, **_): + metrics = super(MemoryControllerV2, self).get_tracked_metrics() + throttled_value = MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MEM_THROTTLED, self.name, + self.get_memory_throttled_events()) + metrics.append(throttled_value) + return metrics diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index bdf2603fa8..c1340ed69a 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -22,7 +22,7 @@ import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger import azurelinuxagent.common.utils.networkutil as networkutil -from azurelinuxagent.ga.controllermetrics import MetricValue, MetricsCategory, MetricsCounter +from azurelinuxagent.ga.cgroupcontroller import MetricValue, MetricsCategory, MetricsCounter from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.errorstate import ErrorState diff --git a/tests/common/utils/test_extension_process_util.py b/tests/common/utils/test_extension_process_util.py index 7771de4fd2..8058292b95 100644 --- a/tests/common/utils/test_extension_process_util.py +++ b/tests/common/utils/test_extension_process_util.py @@ -19,9 +19,9 @@ import subprocess import tempfile -from azurelinuxagent.ga.controllermetrics import CpuMetrics from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.future import ustr +from azurelinuxagent.ga.cpucontroller import CpuControllerV1 from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \ wait_for_process_completion_or_timeout, handle_process_completion from tests.lib.tools import AgentTestCase, patch, data_dir @@ -52,7 +52,7 @@ def test_wait_for_process_completion_or_timeout_should_terminate_cleanly(self): stdout=subprocess.PIPE, stderr=subprocess.PIPE) - timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None) + timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_controller=None) self.assertEqual(timed_out, False) self.assertEqual(ret, 0) @@ -71,7 +71,7 @@ def test_wait_for_process_completion_or_timeout_should_kill_process_on_timeout(s with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill: with patch('time.sleep') as mock_sleep: timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, - cpu_metrics=None) + cpu_controller=None) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process @@ -90,7 +90,7 @@ def test_handle_process_completion_should_return_nonzero_when_process_fails(self stdout=subprocess.PIPE, stderr=subprocess.PIPE) - timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None) + timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_controller=None) self.assertEqual(timed_out, False) self.assertEqual(ret, 2) @@ -149,9 +149,9 @@ def test_handle_process_completion_should_log_throttled_time_on_timeout(self): with patch('time.sleep') as mock_sleep: with self.assertRaises(ExtensionError) as context_manager: test_file = os.path.join(self.tmp_dir, "cpu.stat") - shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), + shutil.copyfile(os.path.join(data_dir, "cgroups", "v1", "cpu.stat_t0"), test_file) # throttled_time = 50 - cgroup = CpuMetrics("test", self.tmp_dir) + cpu_controller = CpuControllerV1("test", self.tmp_dir) process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn shell=True, cwd=self.tmp_dir, @@ -161,7 +161,7 @@ def test_handle_process_completion_should_log_throttled_time_on_timeout(self): preexec_fn=os.setsid) handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, - stderr=stderr, error_code=42, cpu_metrics=cgroup) + stderr=stderr, error_code=42, cpu_controller=cpu_controller) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process and raising an exception diff --git a/tests/data/cgroups/cpu_mount/cpuacct.stat b/tests/data/cgroups/cpu_mount/cpuacct.stat deleted file mode 100644 index dbdaec701d..0000000000 --- a/tests/data/cgroups/cpu_mount/cpuacct.stat +++ /dev/null @@ -1,2 +0,0 @@ -user 50000 -system 100000 diff --git a/tests/data/cgroups/cpu.stat b/tests/data/cgroups/v1/cpu.stat similarity index 100% rename from tests/data/cgroups/cpu.stat rename to tests/data/cgroups/v1/cpu.stat diff --git a/tests/data/cgroups/cpu.stat_t0 b/tests/data/cgroups/v1/cpu.stat_t0 similarity index 100% rename from tests/data/cgroups/cpu.stat_t0 rename to tests/data/cgroups/v1/cpu.stat_t0 diff --git a/tests/data/cgroups/cpu.stat_t1 b/tests/data/cgroups/v1/cpu.stat_t1 similarity index 100% rename from tests/data/cgroups/cpu.stat_t1 rename to tests/data/cgroups/v1/cpu.stat_t1 diff --git a/tests/data/cgroups/cpuacct.stat b/tests/data/cgroups/v1/cpuacct.stat similarity index 100% rename from tests/data/cgroups/cpuacct.stat rename to tests/data/cgroups/v1/cpuacct.stat diff --git a/tests/data/cgroups/cpuacct.stat_t0 b/tests/data/cgroups/v1/cpuacct.stat_t0 similarity index 100% rename from tests/data/cgroups/cpuacct.stat_t0 rename to tests/data/cgroups/v1/cpuacct.stat_t0 diff --git a/tests/data/cgroups/cpuacct.stat_t1 b/tests/data/cgroups/v1/cpuacct.stat_t1 similarity index 100% rename from tests/data/cgroups/cpuacct.stat_t1 rename to tests/data/cgroups/v1/cpuacct.stat_t1 diff --git a/tests/data/cgroups/cpuacct.stat_t2 b/tests/data/cgroups/v1/cpuacct.stat_t2 similarity index 100% rename from tests/data/cgroups/cpuacct.stat_t2 rename to tests/data/cgroups/v1/cpuacct.stat_t2 diff --git a/tests/data/cgroups/memory_mount/memory.max_usage_in_bytes b/tests/data/cgroups/v1/memory.max_usage_in_bytes similarity index 100% rename from tests/data/cgroups/memory_mount/memory.max_usage_in_bytes rename to tests/data/cgroups/v1/memory.max_usage_in_bytes diff --git a/tests/data/cgroups/memory_mount/memory.stat b/tests/data/cgroups/v1/memory.stat similarity index 100% rename from tests/data/cgroups/memory_mount/memory.stat rename to tests/data/cgroups/v1/memory.stat diff --git a/tests/data/cgroups/missing_memory_counters/memory.stat b/tests/data/cgroups/v1/memory.stat_missing similarity index 100% rename from tests/data/cgroups/missing_memory_counters/memory.stat rename to tests/data/cgroups/v1/memory.stat_missing diff --git a/tests/data/cgroups/proc_stat_t0 b/tests/data/cgroups/v1/proc_stat_t0 similarity index 100% rename from tests/data/cgroups/proc_stat_t0 rename to tests/data/cgroups/v1/proc_stat_t0 diff --git a/tests/data/cgroups/proc_stat_t1 b/tests/data/cgroups/v1/proc_stat_t1 similarity index 100% rename from tests/data/cgroups/proc_stat_t1 rename to tests/data/cgroups/v1/proc_stat_t1 diff --git a/tests/data/cgroups/proc_stat_t2 b/tests/data/cgroups/v1/proc_stat_t2 similarity index 100% rename from tests/data/cgroups/proc_stat_t2 rename to tests/data/cgroups/v1/proc_stat_t2 diff --git a/tests/data/cgroups/v2/cpu.stat b/tests/data/cgroups/v2/cpu.stat new file mode 100644 index 0000000000..6fcb7b86ff --- /dev/null +++ b/tests/data/cgroups/v2/cpu.stat @@ -0,0 +1,9 @@ +usage_usec 817045397 +user_usec 742283732 +system_usec 74761665 +core_sched.force_idle_usec 0 +nr_periods 165261 +nr_throttled 162912 +throttled_usec 15735198706 +nr_bursts 0 +burst_usec 0 diff --git a/tests/data/cgroups/v2/cpu.stat_t0 b/tests/data/cgroups/v2/cpu.stat_t0 new file mode 100644 index 0000000000..6fcb7b86ff --- /dev/null +++ b/tests/data/cgroups/v2/cpu.stat_t0 @@ -0,0 +1,9 @@ +usage_usec 817045397 +user_usec 742283732 +system_usec 74761665 +core_sched.force_idle_usec 0 +nr_periods 165261 +nr_throttled 162912 +throttled_usec 15735198706 +nr_bursts 0 +burst_usec 0 diff --git a/tests/data/cgroups/v2/cpu.stat_t1 b/tests/data/cgroups/v2/cpu.stat_t1 new file mode 100644 index 0000000000..a2eaecf6e1 --- /dev/null +++ b/tests/data/cgroups/v2/cpu.stat_t1 @@ -0,0 +1,9 @@ +usage_usec 819624087 +user_usec 744545316 +system_usec 75078770 +core_sched.force_idle_usec 0 +nr_periods 165783 +nr_throttled 163430 +throttled_usec 15796563650 +nr_bursts 0 +burst_usec 0 diff --git a/tests/data/cgroups/v2/cpu.stat_t2 b/tests/data/cgroups/v2/cpu.stat_t2 new file mode 100644 index 0000000000..cca6a6e425 --- /dev/null +++ b/tests/data/cgroups/v2/cpu.stat_t2 @@ -0,0 +1,9 @@ +usage_usec 822052295 +user_usec 746640066 +system_usec 75412229 +core_sched.force_idle_usec 0 +nr_periods 166274 +nr_throttled 163917 +throttled_usec 15853013984 +nr_bursts 0 +burst_usec 0 diff --git a/tests/data/cgroups/v2/memory.events b/tests/data/cgroups/v2/memory.events new file mode 100644 index 0000000000..ee154297aa --- /dev/null +++ b/tests/data/cgroups/v2/memory.events @@ -0,0 +1,6 @@ +low 0 +high 9 +max 0 +oom 0 +oom_kill 0 +oom_group_kill 0 diff --git a/tests/data/cgroups/v2/memory.events_missing b/tests/data/cgroups/v2/memory.events_missing new file mode 100644 index 0000000000..5a5d05a345 --- /dev/null +++ b/tests/data/cgroups/v2/memory.events_missing @@ -0,0 +1,5 @@ +low 0 +max 0 +oom 0 +oom_kill 0 +oom_group_kill 0 diff --git a/tests/data/cgroups/v2/memory.peak b/tests/data/cgroups/v2/memory.peak new file mode 100644 index 0000000000..25140d458b --- /dev/null +++ b/tests/data/cgroups/v2/memory.peak @@ -0,0 +1 @@ +194494464 diff --git a/tests/data/cgroups/v2/memory.stat b/tests/data/cgroups/v2/memory.stat new file mode 100644 index 0000000000..0b0d4c52d4 --- /dev/null +++ b/tests/data/cgroups/v2/memory.stat @@ -0,0 +1,53 @@ +anon 17589300 +file 134553600 +kernel 25653248 +kernel_stack 0 +pagetables 0 +sec_pagetables 0 +percpu 726400 +sock 0 +vmalloc 0 +shmem 0 +zswap 0 +zswapped 0 +file_mapped 0 +file_dirty 12288 +file_writeback 0 +swapcached 0 +anon_thp 0 +file_thp 0 +shmem_thp 0 +inactive_anon 0 +active_anon 0 +inactive_file 127213568 +active_file 7340032 +unevictable 0 +slab_reclaimable 24061424 +slab_unreclaimable 0 +slab 24061424 +workingset_refault_anon 0 +workingset_refault_file 0 +workingset_activate_anon 0 +workingset_activate_file 0 +workingset_restore_anon 0 +workingset_restore_file 0 +workingset_nodereclaim 128 +pgscan 56624 +pgsteal 56622 +pgscan_kswapd 56624 +pgscan_direct 0 +pgscan_khugepaged 0 +pgsteal_kswapd 56622 +pgsteal_direct 0 +pgsteal_khugepaged 0 +pgfault 3673191 +pgmajfault 1 +pgrefill 124195 +pgactivate 2 +pgdeactivate 0 +pglazyfree 0 +pglazyfreed 0 +zswpin 0 +zswpout 0 +thp_fault_alloc 255 +thp_collapse_alloc 111 diff --git a/tests/data/cgroups/v2/memory.stat_missing b/tests/data/cgroups/v2/memory.stat_missing new file mode 100644 index 0000000000..96d43db68e --- /dev/null +++ b/tests/data/cgroups/v2/memory.stat_missing @@ -0,0 +1,51 @@ +kernel 25653248 +kernel_stack 0 +pagetables 0 +sec_pagetables 0 +percpu 726400 +sock 0 +vmalloc 0 +shmem 0 +zswap 0 +zswapped 0 +file_mapped 0 +file_dirty 12288 +file_writeback 0 +swapcached 0 +anon_thp 0 +file_thp 0 +shmem_thp 0 +inactive_anon 0 +active_anon 0 +inactive_file 127213568 +active_file 7340032 +unevictable 0 +slab_reclaimable 24061424 +slab_unreclaimable 0 +slab 24061424 +workingset_refault_anon 0 +workingset_refault_file 0 +workingset_activate_anon 0 +workingset_activate_file 0 +workingset_restore_anon 0 +workingset_restore_file 0 +workingset_nodereclaim 128 +pgscan 56624 +pgsteal 56622 +pgscan_kswapd 56624 +pgscan_direct 0 +pgscan_khugepaged 0 +pgsteal_kswapd 56622 +pgsteal_direct 0 +pgsteal_khugepaged 0 +pgfault 3673191 +pgmajfault 1 +pgrefill 124195 +pgactivate 2 +pgdeactivate 0 +pglazyfree 0 +pglazyfreed 0 +zswpin 0 +zswpout 0 +thp_fault_alloc 255 +thp_collapse_alloc 111 diff --git a/tests/data/cgroups/v2/memory.swap.current b/tests/data/cgroups/v2/memory.swap.current new file mode 100644 index 0000000000..b92677edb9 --- /dev/null +++ b/tests/data/cgroups/v2/memory.swap.current @@ -0,0 +1 @@ +20000 diff --git a/tests/data/cgroups/v2/proc_uptime_t0 b/tests/data/cgroups/v2/proc_uptime_t0 new file mode 100644 index 0000000000..d035316d9c --- /dev/null +++ b/tests/data/cgroups/v2/proc_uptime_t0 @@ -0,0 +1 @@ +776968.02 1495073.30 diff --git a/tests/data/cgroups/v2/proc_uptime_t1 b/tests/data/cgroups/v2/proc_uptime_t1 new file mode 100644 index 0000000000..f0660cf121 --- /dev/null +++ b/tests/data/cgroups/v2/proc_uptime_t1 @@ -0,0 +1 @@ +777350.57 1495797.44 diff --git a/tests/data/cgroups/v2/proc_uptime_t2 b/tests/data/cgroups/v2/proc_uptime_t2 new file mode 100644 index 0000000000..ae3e36aad1 --- /dev/null +++ b/tests/data/cgroups/v2/proc_uptime_t2 @@ -0,0 +1 @@ +779218.68 1499425.34 diff --git a/tests/ga/test_cgroupapi.py b/tests/ga/test_cgroupapi.py index 717adbb6f0..ae091ed9de 100644 --- a/tests/ga/test_cgroupapi.py +++ b/tests/ga/test_cgroupapi.py @@ -28,7 +28,8 @@ from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics +from azurelinuxagent.ga.cpucontroller import CpuControllerV1, CpuControllerV2 +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1, MemoryControllerV2 from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \ mock_cgroup_hybrid_environment from tests.lib.mock_environment import MockCommand @@ -505,7 +506,7 @@ class CgroupsApiv1TestCase(AgentTestCase): def test_get_supported_controllers_returns_v1_controllers(self): with mock_cgroup_v1_environment(self.tmp_dir): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - controllers = cgroup.get_supported_controllers() + controllers = cgroup.get_supported_controller_names() self.assertEqual(len(controllers), 2) self.assertIn('cpu,cpuacct', controllers) self.assertIn('memory', controllers) @@ -536,55 +537,55 @@ def test_check_in_expected_slice_returns_False_if_any_paths_not_in_expected_slic cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice')) - def test_get_controller_metrics_returns_all_supported_controllers_v1(self): + def test_get_controllers_returns_all_supported_controllers_v1(self): with mock_cgroup_v1_environment(self.tmp_dir): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics() - self.assertEqual(len(metrics), 2) - self.assertIsInstance(metrics[0], CpuMetrics) - self.assertEqual(metrics[0].name, "walinuxagent") - self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") - self.assertIsInstance(metrics[1], MemoryMetrics) - self.assertEqual(metrics[1].name, "walinuxagent") - self.assertEqual(metrics[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") - - def test_get_controller_metrics_returns_only_mounted_controllers_v1(self): + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 2) + self.assertIsInstance(controllers[0], CpuControllerV1) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + self.assertIsInstance(controllers[1], MemoryControllerV1) + self.assertEqual(controllers[1].name, "walinuxagent") + self.assertEqual(controllers[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") + + def test_get_controllers_returns_only_mounted_controllers_v1(self): with mock_cgroup_v1_environment(self.tmp_dir): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics() - self.assertEqual(len(metrics), 1) - self.assertIsInstance(metrics[0], CpuMetrics) - self.assertEqual(metrics[0].name, "walinuxagent") - self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 1) + self.assertIsInstance(controllers[0], CpuControllerV1) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics() - self.assertEqual(len(metrics), 1) - self.assertIsInstance(metrics[0], MemoryMetrics) - self.assertEqual(metrics[0].name, "walinuxagent") - self.assertEqual(metrics[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 1) + self.assertIsInstance(controllers[0], MemoryControllerV1) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service") with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics() - self.assertEqual(len(metrics), 0) + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 0) - def test_get_controller_metrics_returns_only_controllers_at_expected_path_v1(self): + def test_get_controllers_returns_only_controllers_at_expected_path_v1(self): with mock_cgroup_v1_environment(self.tmp_dir): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'unexpected/path'}): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service") - self.assertEqual(len(metrics), 1) - self.assertIsInstance(metrics[0], CpuMetrics) - self.assertEqual(metrics[0].name, "walinuxagent") - self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") + controllers = cgroup.get_controllers(expected_relative_path="system.slice/walinuxagent.service") + self.assertEqual(len(controllers), 1) + self.assertIsInstance(controllers[0], CpuControllerV1) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service") with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'unexpected/path', 'memory': 'unexpected/path'}): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service") - self.assertEqual(len(metrics), 0) + controllers = cgroup.get_controllers(expected_relative_path="system.slice/walinuxagent.service") + self.assertEqual(len(controllers), 0) def test_get_procs_path_returns_correct_path_v1(self): with mock_cgroup_v1_environment(self.tmp_dir): @@ -625,7 +626,7 @@ class CgroupsApiv2TestCase(AgentTestCase): def test_get_supported_controllers_returns_v2_controllers(self): with mock_cgroup_v2_environment(self.tmp_dir): cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") - controllers = cgroup.get_supported_controllers() + controllers = cgroup.get_supported_controller_names() self.assertEqual(len(controllers), 2) self.assertIn('cpu', controllers) self.assertIn('memory', controllers) @@ -644,6 +645,57 @@ def test_check_in_expected_slice_returns_False_if_cgroup_path_not_in_expected_sl cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice')) + def test_get_controllers_returns_all_supported_controllers_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 2) + self.assertIsInstance(controllers[0], CpuControllerV2) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/system.slice/walinuxagent.service") + self.assertIsInstance(controllers[1], MemoryControllerV2) + self.assertEqual(controllers[1].name, "walinuxagent") + self.assertEqual(controllers[1].path, "/sys/fs/cgroup/system.slice/walinuxagent.service") + + def test_get_controllers_returns_only_enabled_controllers_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=["cpu"]): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 1) + self.assertIsInstance(controllers[0], CpuControllerV2) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/system.slice/walinuxagent.service") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=["memory"]): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 1) + self.assertIsInstance(controllers[0], MemoryControllerV2) + self.assertEqual(controllers[0].name, "walinuxagent") + self.assertEqual(controllers[0].path, "/sys/fs/cgroup/system.slice/walinuxagent.service") + + with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=[]): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 0) + + def test_get_controllers_returns_empty_if_cgroup_path_is_empty_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + mock_cgroup_empty_path = CgroupV2(cgroup_name="test", root_cgroup_path="/sys/fs/cgroup", cgroup_path="", enabled_controllers=["cpu", "memory"]) + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup", return_value=mock_cgroup_empty_path): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers() + self.assertEqual(len(controllers), 0) + + def test_get_controllers_returns_only_controllers_at_expected_path_v2(self): + with mock_cgroup_v2_environment(self.tmp_dir): + mock_cgroup_unexpected_path = CgroupV2(cgroup_name="test", root_cgroup_path="/sys/fs/cgroup", cgroup_path="/sys/fs/cgroup/unexpected/path", enabled_controllers=["cpu", "memory"]) + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup", return_value=mock_cgroup_unexpected_path): + cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent") + controllers = cgroup.get_controllers(expected_relative_path="system.slice/walinuxagent.service") + self.assertEqual(len(controllers), 0) + def test_get_procs_path_returns_empty_if_root_cgroup_empty_v2(self): with mock_cgroup_v2_environment(self.tmp_dir): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""): diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 1d1465a47d..9af0d88d7e 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -27,13 +27,14 @@ import threading from azurelinuxagent.common import conf -from azurelinuxagent.ga.controllermetrics import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuMetrics +from azurelinuxagent.ga.cgroupcontroller import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.exception import CGroupsException, AgentMemoryExceededException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.utils import shellutil, fileutil +from azurelinuxagent.ga.cpucontroller import CpuControllerV1 from tests.lib.mock_environment import MockCommand from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, UnitFilePaths, mock_cgroup_v2_environment from tests.lib.tools import AgentTestCase, patch, mock_sleep, data_dir, is_python_version_26_or_34, skip_if_predicate_true @@ -272,7 +273,7 @@ def test_remove_extension_slice_should_remove_unit_files(self): CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ - CpuMetrics('Microsoft.CPlat.Extension', + CpuControllerV1('Microsoft.CPlat.Extension', '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension") @@ -369,10 +370,10 @@ def test_disable_should_reset_cpu_quota_for_all_cgroups(self): configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5) configurator.set_extension_services_cpu_memory_quota(service_list) CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ - CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') + CpuControllerV1('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ - CpuMetrics('Microsoft.CPlat.Extension', + CpuControllerV1('Microsoft.CPlat.Extension', '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') configurator.disable("UNIT TEST", DisableCgroups.ALL) @@ -717,7 +718,8 @@ def test_it_should_stop_tracking_extension_services_cgroups(self): with self._get_cgroup_configurator() as configurator: with patch("os.path.exists") as mock_path: mock_path.return_value = True - CGroupsTelemetry.track_cgroup(CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')) + CGroupsTelemetry.track_cgroup_controller( + CpuControllerV1('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')) configurator.stop_tracking_extension_services_cgroups(service_list) tracked = CGroupsTelemetry._tracked @@ -776,7 +778,7 @@ def side_effect(path): with patch("os.path.exists") as mock_path: mock_path.side_effect = side_effect CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ - CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') + CpuControllerV1('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') configurator.stop_tracking_unit_cgroups("extension.service") tracked = CGroupsTelemetry._tracked @@ -1012,8 +1014,15 @@ def test_check_agent_memory_usage_should_raise_a_cgroups_exception_when_the_limi with self.assertRaises(AgentMemoryExceededException) as context_manager: with self._get_cgroup_configurator() as configurator: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_tracked_metrics") as tracked_metrics: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_tracked_metrics") as tracked_metrics: tracked_metrics.return_value = metrics configurator.check_agent_memory_usage() self.assertIn("The agent memory limit {0} bytes exceeded".format(conf.get_agent_memory_quota()), ustr(context_manager.exception), "An incorrect exception was raised") + + def test_get_log_collector_properties_should_return_correct_props(self): + with self._get_cgroup_configurator() as configurator: + self.assertEqual(configurator.get_logcollector_unit_properties(), ["--property=CPUAccounting=yes", "--property=MemoryAccounting=yes", "--property=CPUQuota=5%"]) + + with self._get_cgroup_configurator_v2() as configurator: + self.assertEqual(configurator.get_logcollector_unit_properties(), ["--property=CPUAccounting=yes", "--property=MemoryAccounting=yes", "--property=CPUQuota=5%", "--property=MemoryHigh=170M"]) diff --git a/tests/ga/test_cgroupcontroller.py b/tests/ga/test_cgroupcontroller.py new file mode 100644 index 0000000000..a01237e965 --- /dev/null +++ b/tests/ga/test_cgroupcontroller.py @@ -0,0 +1,55 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.4+ and Openssl 1.0+ +# + +from __future__ import print_function + +import os +import random + +from azurelinuxagent.ga.cgroupcontroller import _CgroupController +from tests.lib.tools import AgentTestCase, patch + + +def consume_cpu_time(): + waste = 0 + for x in range(1, 200000): # pylint: disable=unused-variable + waste += random.random() + return waste + + +class TestCgroupController(AgentTestCase): + def test_is_active(self): + test_metrics = _CgroupController("test_extension", self.tmp_dir) + + with open(os.path.join(self.tmp_dir, "cgroup.procs"), mode="wb") as tasks: + tasks.write(str(1000).encode()) + + self.assertEqual(True, test_metrics.is_active()) + + @patch("azurelinuxagent.common.logger.periodic_warn") + def test_is_active_file_not_present(self, patch_periodic_warn): + test_metrics = _CgroupController("test_extension", self.tmp_dir) + self.assertFalse(test_metrics.is_active()) + + self.assertEqual(0, patch_periodic_warn.call_count) + + @patch("azurelinuxagent.common.logger.periodic_warn") + def test_is_active_incorrect_file(self, patch_periodic_warn): + open(os.path.join(self.tmp_dir, "cgroup.procs"), mode="wb").close() + test_metrics = _CgroupController("test_extension", os.path.join(self.tmp_dir, "cgroup.procs")) + self.assertEqual(False, test_metrics.is_active()) + self.assertEqual(1, patch_periodic_warn.call_count) diff --git a/tests/ga/test_cgroupstelemetry.py b/tests/ga/test_cgroupstelemetry.py index 457b20e473..ab4e33048b 100644 --- a/tests/ga/test_cgroupstelemetry.py +++ b/tests/ga/test_cgroupstelemetry.py @@ -19,9 +19,11 @@ import random import time -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics +from azurelinuxagent.ga.cgroupcontroller import MetricsCounter from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.utils import fileutil +from azurelinuxagent.ga.cpucontroller import CpuControllerV1 +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1 from tests.lib.tools import AgentTestCase, data_dir, patch @@ -80,9 +82,9 @@ def setUpClass(cls): def mock_read_file(filepath, **args): if filepath == "/proc/stat": - filepath = os.path.join(data_dir, "cgroups", "proc_stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0") elif filepath.endswith("/cpuacct.stat"): - filepath = os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") return original_read_file(filepath, **args) cls._mock_read_cpu_cgroup_file = patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file) @@ -103,76 +105,81 @@ def tearDown(self): CGroupsTelemetry.reset() @staticmethod - def _track_new_extension_cgroups(num_extensions): + def _track_new_extension_cgroup_controllers(num_extensions): for i in range(num_extensions): - dummy_cpu_cgroup = CpuMetrics("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) - CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) + dummy_cpu_controller = CpuControllerV1("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) + CGroupsTelemetry.track_cgroup_controller(dummy_cpu_controller) - dummy_memory_cgroup = MemoryMetrics("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) - CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) + dummy_memory_controller = MemoryControllerV1("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) + CGroupsTelemetry.track_cgroup_controller(dummy_memory_controller) - def _assert_cgroups_are_tracked(self, num_extensions): + def _assert_cgroup_controllers_are_tracked(self, num_extensions): for i in range(num_extensions): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) - def _assert_polled_metrics_equal(self, metrics, cpu_metric_value, memory_metric_value, max_memory_metric_value, swap_memory_value): + def _assert_polled_metrics_equal(self, metrics, cpu_metric_value, current_total_memory_metric_value, current_anon_memory_metric_value, current_cache_memory_metric_value, max_memory_metric_value, swap_memory_value): for metric in metrics: self.assertIn(metric.category, ["CPU", "Memory"]) if metric.category == "CPU": self.assertEqual(metric.counter, "% Processor Time") self.assertEqual(metric.value, cpu_metric_value) if metric.category == "Memory": - self.assertIn(metric.counter, ["Total Memory Usage", "Max Memory Usage", "Swap Memory Usage"]) - if metric.counter == "Total Memory Usage": - self.assertEqual(metric.value, memory_metric_value) - elif metric.counter == "Max Memory Usage": + self.assertIn(metric.counter, [MetricsCounter.TOTAL_MEM_USAGE, MetricsCounter.ANON_MEM_USAGE, MetricsCounter.CACHE_MEM_USAGE, MetricsCounter.MAX_MEM_USAGE, MetricsCounter.SWAP_MEM_USAGE]) + if metric.counter == MetricsCounter.TOTAL_MEM_USAGE: + self.assertEqual(metric.value, current_total_memory_metric_value) + elif metric.counter == MetricsCounter.ANON_MEM_USAGE: + self.assertEqual(metric.value, current_anon_memory_metric_value) + elif metric.counter == MetricsCounter.CACHE_MEM_USAGE: + self.assertEqual(metric.value, current_cache_memory_metric_value) + elif metric.counter == MetricsCounter.MAX_MEM_USAGE: self.assertEqual(metric.value, max_memory_metric_value) - elif metric.counter == "Swap Memory Usage": + elif metric.counter == MetricsCounter.SWAP_MEM_USAGE: self.assertEqual(metric.value, swap_memory_value) def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable=unused-argument num_extensions = 3 - self._track_new_extension_cgroups(num_extensions) - - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: - patch_is_active.return_value = True - - current_cpu = 30 - current_memory = 209715200 - current_max_memory = 471859200 - current_swap_memory = 20971520 - - # 1 CPU metric + 1 Current Memory + 1 Max memory + 1 swap memory - num_of_metrics_per_extn_expected = 4 - patch_get_cpu_usage.return_value = current_cpu - patch_get_memory_usage.return_value = current_memory # example 200 MB - patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB - patch_try_swap_memory_usage.return_value = current_swap_memory # example 20MB - num_polls = 12 - - for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable - metrics = CGroupsTelemetry.poll_all_tracked() - - self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) - self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory) - - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active", return_value=False) + self._track_new_extension_cgroup_controllers(num_extensions) + + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") as patch_is_active: + patch_is_active.return_value = True + + current_cpu = 30 + current_anon_memory = 209715200 + current_cache_memory = 314572800 + current_total_memory = 209715200 + 314572800 + current_max_memory = 471859200 + current_swap_memory = 20971520 + + # 1 CPU metric + 1 total Memory + 1 anon memory + 1 cache memory + 1 Max memory + 1 swap memory + num_of_metrics_per_extn_expected = 6 + patch_get_cpu_usage.return_value = current_cpu + patch_get_memory_usage.return_value = current_anon_memory, current_cache_memory # example 200 MB, 300 MB + patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB + patch_try_swap_memory_usage.return_value = current_swap_memory # example 20MB + num_polls = 18 + + for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable + metrics = CGroupsTelemetry.poll_all_tracked() + + self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) + self._assert_polled_metrics_equal(metrics, current_cpu, current_total_memory, current_anon_memory, current_cache_memory, current_max_memory, current_swap_memory) + + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active", return_value=False) def test_telemetry_polling_with_inactive_cgroups(self, *_): num_extensions = 5 no_extensions_expected = 0 # pylint: disable=unused-variable - self._track_new_extension_cgroups(num_extensions) - self._assert_cgroups_are_tracked(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) + self._assert_cgroup_controllers_are_tracked(num_extensions) metrics = CGroupsTelemetry.poll_all_tracked() @@ -182,14 +189,14 @@ def test_telemetry_polling_with_inactive_cgroups(self, *_): self.assertEqual(len(metrics), 0) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") - @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage") + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage") + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument patch_get_mem, patch_get_max_mem, *args): num_extensions = 5 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) patch_is_active.return_value = True @@ -197,17 +204,18 @@ def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, pa expected_data_count = 1 # pylint: disable=unused-variable current_cpu = 30 - current_memory = 209715200 + current_anon_memory = 104857600 + current_cache_memory = 104857600 current_max_memory = 471859200 patch_get_cpu_usage.return_value = current_cpu - patch_get_mem.return_value = current_memory # example 200 MB + patch_get_mem.return_value = current_anon_memory, current_cache_memory # example 100 MB, 100 MB patch_get_max_mem.return_value = current_max_memory # example 450 MB - self._assert_cgroups_are_tracked(num_extensions) + self._assert_cgroup_controllers_are_tracked(num_extensions) CGroupsTelemetry.poll_all_tracked() - self._assert_cgroups_are_tracked(num_extensions) + self._assert_cgroup_controllers_are_tracked(num_extensions) patch_is_active.return_value = False patch_get_cpu_usage.side_effect = raise_ioerror @@ -225,7 +233,7 @@ def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, pa @patch("azurelinuxagent.common.logger.periodic_warn") def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, patch_periodic_warn): num_extensions = 1 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) self.assertEqual(0, patch_periodic_warn.call_count) # Not expecting logs present for io_error with errno=errno.ENOENT @@ -243,7 +251,7 @@ def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied( num_extensions = 1 num_controllers = 1 is_active_check_per_controller = 2 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) self.assertEqual(0, patch_periodic_warn.call_count) @@ -254,7 +262,7 @@ def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied( with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=io_error_3): poll_count = 1 expected_count_per_call = num_controllers + is_active_check_per_controller - # get_max_memory_usage memory controller would generate a log statement, and each cgroup would invoke a + # get_cpu_usage cpu controller would generate a log statement, and each cgroup controller would invoke a # is active check raising an exception for data_count in range(poll_count, 10): # pylint: disable=unused-variable @@ -263,23 +271,23 @@ def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied( def test_telemetry_polling_to_generate_transient_logs_index_error(self): num_extensions = 1 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) # Generating a different kind of error (non-IOError) to check the logging. # Trying to invoke IndexError during the getParameter call with patch("azurelinuxagent.common.utils.fileutil.read_file", return_value=''): with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn: - expected_call_count = 1 # 1 periodic warning for memory + expected_call_count = 1 # 1 periodic warning for cpu for data_count in range(1, 10): # pylint: disable=unused-variable CGroupsTelemetry.poll_all_tracked() self.assertEqual(expected_call_count, patch_periodic_warn.call_count) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") - @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") - def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage, + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.try_swap_memory_usage") + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage") + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage") + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") + @patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") + def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage, *args): # pylint: disable=unused-argument num_polls = 10 num_extensions = 1 @@ -287,47 +295,48 @@ def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, pat cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)] # only verifying calculations and not validity of the values. - memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] + anon_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] + cache_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] swap_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)] - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked)) for i in range(num_polls): patch_is_active.return_value = True patch_get_cpu_usage.return_value = cpu_percent_values[i] - patch_get_memory_usage.return_value = memory_usage_values[i] + patch_get_memory_usage.return_value = anon_usage_values[i], cache_usage_values[i] patch_get_memory_max_usage.return_value = max_memory_usage_values[i] patch_try_memory_swap_usage.return_value = swap_usage_values[i] metrics = CGroupsTelemetry.poll_all_tracked() - # 1 CPU metric + 1 Current Memory + 1 Max memory + 1 swap memory - self.assertEqual(len(metrics), 4 * num_extensions) - self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i], swap_usage_values[i]) + # 1 CPU metric + 1 Total Memory + 1 anon memory + 1 cache memory + 1 Max memory + 1 swap memory + self.assertEqual(len(metrics), 6 * num_extensions) + self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], anon_usage_values[i] + cache_usage_values[i], anon_usage_values[i], cache_usage_values[i], max_memory_usage_values[i], swap_usage_values[i]) def test_cgroup_tracking(self, *args): # pylint: disable=unused-argument num_extensions = 5 num_controllers = 2 - self._track_new_extension_cgroups(num_extensions) - self._assert_cgroups_are_tracked(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) + self._assert_cgroup_controllers_are_tracked(num_extensions) self.assertEqual(num_extensions * num_controllers, len(CGroupsTelemetry._tracked)) def test_cgroup_is_tracked(self, *args): # pylint: disable=unused-argument num_extensions = 5 - self._track_new_extension_cgroups(num_extensions) - self._assert_cgroups_are_tracked(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) + self._assert_cgroup_controllers_are_tracked(num_extensions) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path")) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) - with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage: - with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") as patch_get_cpu_usage: + with patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") as patch_is_active: patch_is_active.return_value = True current_cpu = 30 @@ -339,42 +348,44 @@ def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # py metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated - self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0) + self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0, 0, 0) - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage", side_effect=raise_ioerror) def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument num_extensions = 5 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage: - with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage: - with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage") as patch_get_memory_max_usage: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage") as patch_get_memory_usage: + with patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.try_swap_memory_usage") as patch_try_swap_memory_usage: + with patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") as patch_is_active: patch_is_active.return_value = True - current_memory = 209715200 + current_total_memory = 209715200 + current_anon_memory = 104857600 + current_cache_memory = 104857600 current_max_memory = 471859200 current_swap_memory = 20971520 - patch_get_memory_usage.return_value = current_memory # example 200 MB + patch_get_memory_usage.return_value = current_anon_memory, current_cache_memory # example 100 MB, 100 MB patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB patch_try_swap_memory_usage.return_value = current_swap_memory # example 20MB num_polls = 10 for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable metrics = CGroupsTelemetry.poll_all_tracked() - # Memory is only populated, CPU is not. Thus 3 metrics for memory. - self.assertEqual(len(metrics), num_extensions * 3) - self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory) + # Memory is only populated, CPU is not. Thus 5 metrics for memory. + self.assertEqual(len(metrics), num_extensions * 5) + self._assert_polled_metrics_equal(metrics, 0, current_total_memory, current_anon_memory, current_cache_memory, current_max_memory, current_swap_memory) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror) - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage", side_effect=raise_ioerror) def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument num_extensions = 5 - self._track_new_extension_cgroups(num_extensions) + self._track_new_extension_cgroup_controllers(num_extensions) - with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active: + with patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") as patch_is_active: patch_is_active.return_value = False poll_count = 1 @@ -383,9 +394,9 @@ def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # py metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(0, len(metrics)) - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_throttled_time") - @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_throttled_time") + @patch("azurelinuxagent.ga.cgroupcontroller._CgroupController.is_active") def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage): num_polls = 5 @@ -396,8 +407,8 @@ def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_ac cpu_percent_values.append(-1) cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)] - dummy_cpu_cgroup = CpuMetrics("dummy_extension_name", "dummy_cpu_path") - CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) + dummy_cpu_cgroup = CpuControllerV1("dummy_extension_name", "dummy_cpu_path") + CGroupsTelemetry.track_cgroup_controller(dummy_cpu_cgroup) self.assertEqual(1, len(CGroupsTelemetry._tracked)) for i in range(num_polls): diff --git a/tests/ga/test_collect_logs.py b/tests/ga/test_collect_logs.py index 2b8c4f412e..458cd2e693 100644 --- a/tests/ga/test_collect_logs.py +++ b/tests/ga/test_collect_logs.py @@ -18,13 +18,15 @@ import os from azurelinuxagent.common import logger, conf -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue +from azurelinuxagent.ga.cgroupcontroller import MetricValue, MetricsCounter from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.logger import Logger from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.collect_logs import get_collect_logs_handler, is_log_collection_allowed, \ get_log_collector_monitor_handler +from azurelinuxagent.ga.cpucontroller import CpuControllerV1, CpuControllerV2 +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1, MemoryControllerV2 from tests.lib.mock_wire_protocol import mock_wire_protocol, MockHttpResponse from tests.lib.http_request_predicates import HttpRequestPredicates from tests.lib.wire_protocol_data import DATA_FILE @@ -32,8 +34,13 @@ is_python_version_26, data_dir +class CgroupVersions: + V1 = "v1" + V2 = "v2" + + @contextlib.contextmanager -def _create_collect_logs_handler(iterations=1, cgroups_enabled=True, collect_logs_conf=True): +def _create_collect_logs_handler(iterations=1, cgroup_version=CgroupVersions.V1, cgroups_enabled=True, collect_logs_conf=True, cgroupv2_resource_limiting_conf=False): """ Creates an instance of CollectLogsHandler that * Uses a mock_wire_protocol for network requests, @@ -52,19 +59,33 @@ def _create_collect_logs_handler(iterations=1, cgroups_enabled=True, collect_log with patch("azurelinuxagent.ga.collect_logs.CollectLogsHandler.stopped", side_effect=[False] * iterations + [True]): with patch("time.sleep"): - # Grab the singleton to patch it - cgroups_configurator_singleton = CGroupConfigurator.get_instance() - with patch.object(cgroups_configurator_singleton, "enabled", return_value=cgroups_enabled): - with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", - return_value=collect_logs_conf): - def run_and_wait(): - collect_logs_handler.run() - collect_logs_handler.join() - - collect_logs_handler = get_collect_logs_handler() - collect_logs_handler.get_mock_wire_protocol = lambda: protocol - collect_logs_handler.run_and_wait = run_and_wait - yield collect_logs_handler + with patch("azurelinuxagent.ga.collect_logs.conf.get_collect_logs", return_value=collect_logs_conf): + + # Grab the singleton to patch it + cgroups_configurator_singleton = CGroupConfigurator.get_instance() + + if cgroup_version == CgroupVersions.V1: + with patch.object(cgroups_configurator_singleton, "enabled", return_value=cgroups_enabled): + def run_and_wait(): + collect_logs_handler.run() + collect_logs_handler.join() + + collect_logs_handler = get_collect_logs_handler() + collect_logs_handler.get_mock_wire_protocol = lambda: protocol + collect_logs_handler.run_and_wait = run_and_wait + yield collect_logs_handler + else: + with patch("azurelinuxagent.ga.collect_logs.conf.get_enable_cgroup_v2_resource_limiting", return_value=cgroupv2_resource_limiting_conf): + with patch.object(cgroups_configurator_singleton, "enabled", return_value=False): + with patch("azurelinuxagent.ga.cgroupconfigurator.CGroupConfigurator._Impl.using_cgroup_v2", return_value=True): + def run_and_wait(): + collect_logs_handler.run() + collect_logs_handler.join() + + collect_logs_handler = get_collect_logs_handler() + collect_logs_handler.get_mock_wire_protocol = lambda: protocol + collect_logs_handler.run_and_wait = run_and_wait + yield collect_logs_handler @skip_if_predicate_true(is_python_version_26, "Disabled on Python 2.6") @@ -101,26 +122,124 @@ def _create_dummy_archive(self, size=1024): def test_it_should_only_collect_logs_if_conditions_are_met(self): # In order to collect logs, three conditions have to be met: - # 1) the flag must be set to true in the conf file - # 2) cgroups must be managing services - # 3) python version 2.7+ which is automatically true for these tests since they are disabled on py2.6 + # 1) It should be enabled in the configuration. + # 2) The system must be using cgroups to manage services - needed for resource limiting of the log collection. The + # agent currently fully supports resource limiting for v1, but only supports log collector resource limiting for v2 + # if enabled via configuration. + # This condition is True if either: + # a. cgroup usage in the agent is enabled; OR + # b. the machine is using cgroup v2 and v2 resource limiting is enabled in the configuration. + # 3) The python version must be greater than 2.6 in order to support the ZipFile library used when collecting. + + # Note, cgroups should not be in an 'enabled' state in the configurator if v2 is in use. Resource governance is + # not fully supported on v2 yet. + + # If collect logs is not enabled in the configuration, then log collection should always be disabled + + # Case 1: + # - Cgroups are enabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=True, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=True, collect_logs_conf=False): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # Case 2: + # - Cgroups are enabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=True, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=False, collect_logs_conf=False): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # Case 3: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V2, cgroupv2_resource_limiting_conf=True, collect_logs_conf=False): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") - # cgroups not enabled, config flag false - with _create_collect_logs_handler(cgroups_enabled=False, collect_logs_conf=False): + # Case 4: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V2, cgroupv2_resource_limiting_conf=False, collect_logs_conf=False): self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") - # cgroups enabled, config flag false - with _create_collect_logs_handler(cgroups_enabled=True, collect_logs_conf=False): + # Case 5: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=True, collect_logs_conf=False): self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") - # cgroups not enabled, config flag true - with _create_collect_logs_handler(cgroups_enabled=False, collect_logs_conf=True): + # Case 6: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag false + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=False, collect_logs_conf=False): self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") - # cgroups enabled, config flag true - with _create_collect_logs_handler(cgroups_enabled=True, collect_logs_conf=True): + # If collect logs is enabled in the configuration and cgroups are enbaled in the configurator, then log collection should always be enabled + + # Case 7: + # - Cgroups are enabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=True, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=True, collect_logs_conf=True): self.assertEqual(True, is_log_collection_allowed(), "Log collection should have been enabled") + # Case 8: + # - Cgroups are enabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=True, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=False, collect_logs_conf=True): + self.assertEqual(True, is_log_collection_allowed(), "Log collection should have been enabled") + + # If collect logs is enabled in the configuration and v2 is in use with the v2 resource limiting conf enabled, then log collection should always be enabled + + # Case 9: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V2, cgroupv2_resource_limiting_conf=True, collect_logs_conf=True): + self.assertEqual(True, is_log_collection_allowed(), "Log collection should have been enabled") + + # If collect logs is enabled in the configuration and v2 is in use but the v2 resource limiting conf disabled, then log collection should always be disabled + + # Case 10: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V2, cgroupv2_resource_limiting_conf=False, collect_logs_conf=True): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # If collect logs is enabled in the configuration but cgroups are disabled in the configurator and v2 is not in use, then log collections should always be disabled + + # Case 11: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is True + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=True, collect_logs_conf=True): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + + # Case 12: + # - Cgroups are disabled in the configurator + # - Cgroup v2 is not in use + # - Cgroup v2 resource limiting conf is False + # - collect logs config flag true + with _create_collect_logs_handler(cgroups_enabled=False, cgroup_version=CgroupVersions.V1, cgroupv2_resource_limiting_conf=False, collect_logs_conf=True): + self.assertEqual(False, is_log_collection_allowed(), "Log collection should not have been enabled") + def test_it_uploads_logs_when_collection_is_successful(self): archive_size = 42 @@ -168,7 +287,7 @@ def http_put_handler(url, _, **__): @contextlib.contextmanager -def _create_log_collector_monitor_handler(iterations=1): +def _create_log_collector_monitor_handler(iterations=1, cgroup_version=CgroupVersions.V1): """ Creates an instance of LogCollectorMonitorHandler that * Runs its main loop only the number of times given in the 'iterations' parameter, and @@ -184,22 +303,40 @@ def _create_log_collector_monitor_handler(iterations=1): original_read_file = fileutil.read_file - def mock_read_file(filepath, **args): + def mock_read_file_v1(filepath, **args): if filepath == "/proc/stat": - filepath = os.path.join(data_dir, "cgroups", "proc_stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0") elif filepath.endswith("/cpuacct.stat"): - filepath = os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") + return original_read_file(filepath, **args) + + def mock_read_file_v2(filepath, **args): + if filepath == "/proc/uptime": + filepath = os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t0") + elif filepath.endswith("/cpu.stat"): + filepath = os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t0") return original_read_file(filepath, **args) + mock_read_file = None + cgroups = [] + if cgroup_version == "v1": + mock_read_file = mock_read_file_v1 + cgroups = [ + CpuControllerV1("test", "dummy_cpu_path"), + MemoryControllerV1("test", "dummy_memory_path") + ] + else: + mock_read_file = mock_read_file_v2 + cgroups = [ + CpuControllerV2("test", "dummy_cpu_path"), + MemoryControllerV2("test", "dummy_memory_path") + ] + with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file): def run_and_wait(): monitor_log_collector.run() monitor_log_collector.join() - cgroups = [ - CpuMetrics("test", "dummy_cpu_path"), - MemoryMetrics("test", "dummy_memory_path") - ] monitor_log_collector = get_log_collector_monitor_handler(cgroups) monitor_log_collector.run_and_wait = run_and_wait yield monitor_log_collector @@ -207,33 +344,78 @@ def run_and_wait(): class TestLogCollectorMonitorHandler(AgentTestCase): - @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage") - def test_send_extension_metrics_telemetry(self, patch_poll_resource_usage, patch_add_metric): + def test_get_max_recorded_metrics(self): + with _create_log_collector_monitor_handler(iterations=2) as log_collector_monitor_handler: + nonlocal_vars = { + 'cpu_iteration': 0, + 'mem_iteration': 0, + 'multiplier': 5 + } + + def get_different_cpu_metrics(**kwargs): # pylint: disable=W0613 + metrics = [MetricValue("Process", MetricsCounter.PROCESSOR_PERCENT_TIME, "service", 4.5), MetricValue("Process", MetricsCounter.THROTTLED_TIME, "service", nonlocal_vars['cpu_iteration']*nonlocal_vars['multiplier'] + 10.000)] + nonlocal_vars['cpu_iteration'] += 1 + return metrics + + def get_different_memory_metrics(**kwargs): # pylint: disable=W0613 + metrics = [MetricValue("Memory", MetricsCounter.TOTAL_MEM_USAGE, "service", 20), + MetricValue("Memory", MetricsCounter.ANON_MEM_USAGE, "service", 15), + MetricValue("Memory", MetricsCounter.CACHE_MEM_USAGE, "service", nonlocal_vars['mem_iteration']*nonlocal_vars['multiplier'] + 5), + MetricValue("Memory", MetricsCounter.MAX_MEM_USAGE, "service", 30), + MetricValue("Memory", MetricsCounter.SWAP_MEM_USAGE, "service", 0)] + nonlocal_vars['mem_iteration'] += 1 + return metrics + + with patch("azurelinuxagent.ga.cpucontroller._CpuController.get_tracked_metrics", side_effect=get_different_cpu_metrics): + with patch("azurelinuxagent.ga.memorycontroller._MemoryController.get_tracked_metrics", side_effect=get_different_memory_metrics): + log_collector_monitor_handler.run_and_wait() + max_recorded_metrics = log_collector_monitor_handler.get_max_recorded_metrics() + self.assertEqual(len(max_recorded_metrics), 7) + self.assertEqual(max_recorded_metrics[MetricsCounter.PROCESSOR_PERCENT_TIME], 4.5) + self.assertEqual(max_recorded_metrics[MetricsCounter.THROTTLED_TIME], 15.0) + self.assertEqual(max_recorded_metrics[MetricsCounter.TOTAL_MEM_USAGE], 20) + self.assertEqual(max_recorded_metrics[MetricsCounter.ANON_MEM_USAGE], 15) + self.assertEqual(max_recorded_metrics[MetricsCounter.CACHE_MEM_USAGE], 10) + self.assertEqual(max_recorded_metrics[MetricsCounter.MAX_MEM_USAGE], 30) + self.assertEqual(max_recorded_metrics[MetricsCounter.SWAP_MEM_USAGE], 0) + + def test_verify_log_collector_memory_limit_exceeded(self): with _create_log_collector_monitor_handler() as log_collector_monitor_handler: - patch_poll_resource_usage.return_value = [MetricValue("Process", "% Processor Time", "service", 1), - MetricValue("Process", "Throttled Time", "service", 1), - MetricValue("Memory", "Total Memory Usage", "service", 1), - MetricValue("Memory", "Max Memory Usage", "service", 1), - MetricValue("Memory", "Swap Memory Usage", "service", 1) - ] - log_collector_monitor_handler.run_and_wait() - self.assertEqual(1, patch_poll_resource_usage.call_count) - self.assertEqual(5, patch_add_metric.call_count) # Five metrics being sent. - - @patch("os._exit", side_effect=Exception) - @patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage") - def test_verify_log_collector_memory_limit_exceeded(self, patch_poll_resource_usage, mock_exit): + cache_exceeded = [MetricValue("Process", MetricsCounter.PROCESSOR_PERCENT_TIME, "service", 4.5), + MetricValue("Process", MetricsCounter.THROTTLED_TIME, "service", 10.281), + MetricValue("Memory", MetricsCounter.TOTAL_MEM_USAGE, "service", 170 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.ANON_MEM_USAGE, "service", 15 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.CACHE_MEM_USAGE, "service", 160 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.MAX_MEM_USAGE, "service", 171 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.SWAP_MEM_USAGE, "service", 0)] + with patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage", return_value=cache_exceeded): + with patch("os._exit") as mock_exit: + log_collector_monitor_handler.run_and_wait() + self.assertEqual(mock_exit.call_count, 1) + with _create_log_collector_monitor_handler() as log_collector_monitor_handler: - with patch("azurelinuxagent.ga.cgroupconfigurator.LOGCOLLECTOR_MEMORY_LIMIT", 8): - patch_poll_resource_usage.return_value = [MetricValue("Process", "% Processor Time", "service", 1), - MetricValue("Process", "Throttled Time", "service", 1), - MetricValue("Memory", "Total Memory Usage", "service", 9), - MetricValue("Memory", "Max Memory Usage", "service", 7), - MetricValue("Memory", "Swap Memory Usage", "service", 0) - - ] - try: + anon_exceeded = [MetricValue("Process", MetricsCounter.PROCESSOR_PERCENT_TIME, "service", 4.5), + MetricValue("Process", MetricsCounter.THROTTLED_TIME, "service", 10.281), + MetricValue("Memory", MetricsCounter.TOTAL_MEM_USAGE, "service", 170 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.ANON_MEM_USAGE, "service", 30 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.CACHE_MEM_USAGE, "service", 140 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.MAX_MEM_USAGE, "service", 171 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.SWAP_MEM_USAGE, "service", 0)] + with patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage", return_value=anon_exceeded): + with patch("os._exit") as mock_exit: + log_collector_monitor_handler.run_and_wait() + self.assertEqual(mock_exit.call_count, 1) + + with _create_log_collector_monitor_handler(cgroup_version=CgroupVersions.V2) as log_collector_monitor_handler: + mem_throttled_exceeded = [MetricValue("Process", MetricsCounter.PROCESSOR_PERCENT_TIME, "service", 4.5), + MetricValue("Process", MetricsCounter.THROTTLED_TIME, "service", 10.281), + MetricValue("Memory", MetricsCounter.TOTAL_MEM_USAGE, "service", 170 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.ANON_MEM_USAGE, "service", 15 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.CACHE_MEM_USAGE, "service", 140 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.MAX_MEM_USAGE, "service", 171 * 1024 ** 2), + MetricValue("Memory", MetricsCounter.SWAP_MEM_USAGE, "service", 0), + MetricValue("Memory", MetricsCounter.MEM_THROTTLED, "service", 11)] + with patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler._poll_resource_usage", return_value=mem_throttled_exceeded): + with patch("os._exit") as mock_exit: log_collector_monitor_handler.run_and_wait() - except Exception: self.assertEqual(mock_exit.call_count, 1) diff --git a/tests/ga/test_controllermetrics.py b/tests/ga/test_controllermetrics.py deleted file mode 100644 index cdd31395f1..0000000000 --- a/tests/ga/test_controllermetrics.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 Microsoft Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Requires Python 2.4+ and Openssl 1.0+ -# - -from __future__ import print_function - -import errno -import os -import random -import shutil - -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricsCounter, CounterNotFound -from azurelinuxagent.common.exception import CGroupsException -from azurelinuxagent.common.osutil import get_osutil -from azurelinuxagent.common.utils import fileutil -from tests.lib.tools import AgentTestCase, patch, data_dir - - -def consume_cpu_time(): - waste = 0 - for x in range(1, 200000): # pylint: disable=unused-variable - waste += random.random() - return waste - - -class TestControllerMetrics(AgentTestCase): - def test_is_active(self): - test_metrics = CpuMetrics("test_extension", self.tmp_dir) - self.assertEqual(False, test_metrics.is_active()) - - with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks: - tasks.write(str(1000).encode()) - - self.assertEqual(True, test_metrics.is_active()) - - @patch("azurelinuxagent.common.logger.periodic_warn") - def test_is_active_file_not_present(self, patch_periodic_warn): - test_metrics = CpuMetrics("test_extension", self.tmp_dir) - self.assertEqual(False, test_metrics.is_active()) - - test_metrics = MemoryMetrics("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist")) - self.assertEqual(False, test_metrics.is_active()) - - self.assertEqual(0, patch_periodic_warn.call_count) - - @patch("azurelinuxagent.common.logger.periodic_warn") - def test_is_active_incorrect_file(self, patch_periodic_warn): - open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close() - test_metrics = CpuMetrics("test_extension", os.path.join(self.tmp_dir, "tasks")) - self.assertEqual(False, test_metrics.is_active()) - self.assertEqual(1, patch_periodic_warn.call_count) - - -class TestCpuMetrics(AgentTestCase): - @classmethod - def setUpClass(cls): - AgentTestCase.setUpClass() - - original_read_file = fileutil.read_file - - # - # Tests that need to mock the contents of /proc/stat or */cpuacct/stat can set this map from - # the file that needs to be mocked to the mock file (each test starts with an empty map). If - # an Exception is given instead of a path, the exception is raised - # - cls.mock_read_file_map = {} - - def mock_read_file(filepath, **args): - if filepath in cls.mock_read_file_map: - mapped_value = cls.mock_read_file_map[filepath] - if isinstance(mapped_value, Exception): - raise mapped_value - filepath = mapped_value - return original_read_file(filepath, **args) - - cls.mock_read_file = patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file) - cls.mock_read_file.start() - - @classmethod - def tearDownClass(cls): - cls.mock_read_file.stop() - AgentTestCase.tearDownClass() - - def setUp(self): - AgentTestCase.setUp(self) - TestCpuMetrics.mock_read_file_map.clear() - - def test_initialize_cpu_usage_should_set_current_cpu_usage(self): - metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") - } - - metrics.initialize_cpu_usage() - - self.assertEqual(metrics._current_cgroup_cpu, 63763) - self.assertEqual(metrics._current_system_cpu, 5496872) - - def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self): - osutil = get_osutil() - - metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") - } - - metrics.initialize_cpu_usage() - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"), - os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1") - } - - cpu_usage = metrics.get_cpu_usage() - - self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3)) - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"), - os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2") - } - - cpu_usage = metrics.get_cpu_usage() - - self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3)) - - def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self): - metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - - io_error_2 = IOError() - io_error_2.errno = errno.ENOENT # "No such directory" - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(metrics.path, "cpuacct.stat"): io_error_2 - } - - metrics.initialize_cpu_usage() - - self.assertEqual(metrics._current_cgroup_cpu, 0) - self.assertEqual(metrics._current_system_cpu, 5496872) # check the system usage just for test sanity - - def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self): - metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - - TestCpuMetrics.mock_read_file_map = { - "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), - os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") - } - - metrics.initialize_cpu_usage() - - with self.assertRaises(CGroupsException): - metrics.initialize_cpu_usage() - - def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self): - metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test") - - with self.assertRaises(CGroupsException): - cpu_usage = metrics.get_cpu_usage() # pylint: disable=unused-variable - - def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self): - test_file = os.path.join(self.tmp_dir, "cpu.stat") - shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50 - metrics = CpuMetrics("test", self.tmp_dir) - metrics.initialize_cpu_usage() - shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327 - - throttled_time = metrics.get_cpu_throttled_time() - - self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect") - - def test_get_tracked_metrics_should_return_the_throttled_time(self): - metrics = CpuMetrics("test", os.path.join(data_dir, "cgroups")) - metrics.initialize_cpu_usage() - - def find_throttled_time(metrics): - return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME] - - found = find_throttled_time(metrics.get_tracked_metrics()) - self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found)) - - found = find_throttled_time(metrics.get_tracked_metrics(track_throttled_time=True)) - self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found)) - - -class TestMemoryMetrics(AgentTestCase): - def test_get_metrics(self): - test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "memory_mount")) - - memory_usage = test_mem_metrics.get_memory_usage() - self.assertEqual(150000, memory_usage) - - max_memory_usage = test_mem_metrics.get_max_memory_usage() - self.assertEqual(1000000, max_memory_usage) - - swap_memory_usage = test_mem_metrics.try_swap_memory_usage() - self.assertEqual(20000, swap_memory_usage) - - def test_get_metrics_when_files_not_present(self): - test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups")) - - with self.assertRaises(IOError) as e: - test_mem_metrics.get_memory_usage() - - self.assertEqual(e.exception.errno, errno.ENOENT) - - with self.assertRaises(IOError) as e: - test_mem_metrics.get_max_memory_usage() - - self.assertEqual(e.exception.errno, errno.ENOENT) - - with self.assertRaises(IOError) as e: - test_mem_metrics.try_swap_memory_usage() - - self.assertEqual(e.exception.errno, errno.ENOENT) - - def test_get_memory_usage_counters_not_found(self): - test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters")) - - with self.assertRaises(CounterNotFound): - test_mem_metrics.get_memory_usage() - - swap_memory_usage = test_mem_metrics.try_swap_memory_usage() - self.assertEqual(0, swap_memory_usage) diff --git a/tests/ga/test_cpucontroller.py b/tests/ga/test_cpucontroller.py new file mode 100644 index 0000000000..bc5fc4070e --- /dev/null +++ b/tests/ga/test_cpucontroller.py @@ -0,0 +1,313 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.4+ and Openssl 1.0+ +# + +from __future__ import print_function + +import errno +import os +import random +import shutil + +from azurelinuxagent.ga.cgroupcontroller import MetricsCounter +from azurelinuxagent.ga.cpucontroller import CpuControllerV1, CpuControllerV2 +from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.common.osutil import get_osutil +from azurelinuxagent.common.utils import fileutil +from tests.lib.tools import AgentTestCase, patch, data_dir + + +def consume_cpu_time(): + waste = 0 + for x in range(1, 200000): # pylint: disable=unused-variable + waste += random.random() + return waste + + +class TestCpuControllerV1(AgentTestCase): + @classmethod + def setUpClass(cls): + AgentTestCase.setUpClass() + + original_read_file = fileutil.read_file + + # + # Tests that need to mock the contents of /proc/stat or */cpuacct/stat can set this map from + # the file that needs to be mocked to the mock file (each test starts with an empty map). If + # an Exception is given instead of a path, the exception is raised + # + cls.mock_read_file_map = {} + + def mock_read_file(filepath, **args): + if filepath in cls.mock_read_file_map: + mapped_value = cls.mock_read_file_map[filepath] + if isinstance(mapped_value, Exception): + raise mapped_value + filepath = mapped_value + return original_read_file(filepath, **args) + + cls.mock_read_file = patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file) + cls.mock_read_file.start() + + @classmethod + def tearDownClass(cls): + cls.mock_read_file.stop() + AgentTestCase.tearDownClass() + + def setUp(self): + AgentTestCase.setUp(self) + TestCpuControllerV1.mock_read_file_map.clear() + + def test_initialize_cpu_usage_v1_should_set_current_cpu_usage(self): + controller = CpuControllerV1("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0"), + os.path.join(controller.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") + } + + controller.initialize_cpu_usage() + + self.assertEqual(controller._current_cgroup_cpu, 63763) + self.assertEqual(controller._current_system_cpu, 5496872) + + def test_get_cpu_usage_v1_should_return_the_cpu_usage_since_its_last_invocation(self): + osutil = get_osutil() + + controller = CpuControllerV1("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0"), + os.path.join(controller.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") + } + + controller.initialize_cpu_usage() + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t1"), + os.path.join(controller.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t1") + } + + cpu_usage = controller.get_cpu_usage() + + self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3)) + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t2"), + os.path.join(controller.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t2") + } + + cpu_usage = controller.get_cpu_usage() + + self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3)) + + def test_initialize_cpu_usage_v1_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self): + controller = CpuControllerV1("test", "/sys/fs/cgroup/cpu/system.slice/test") + + io_error_2 = IOError() + io_error_2.errno = errno.ENOENT # "No such directory" + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0"), + os.path.join(controller.path, "cpuacct.stat"): io_error_2 + } + + controller.initialize_cpu_usage() + + self.assertEqual(controller._current_cgroup_cpu, 0) + self.assertEqual(controller._current_system_cpu, 5496872) # check the system usage just for test sanity + + def test_initialize_cpu_usage_v1_should_raise_an_exception_when_called_more_than_once(self): + controller = CpuControllerV1("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV1.mock_read_file_map = { + "/proc/stat": os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0"), + os.path.join(controller.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") + } + + controller.initialize_cpu_usage() + + with self.assertRaises(CGroupsException): + controller.initialize_cpu_usage() + + def test_get_cpu_usage_v1_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self): + controller = CpuControllerV1("test", "/sys/fs/cgroup/cpu/system.slice/test") + + with self.assertRaises(CGroupsException): + cpu_usage = controller.get_cpu_usage() # pylint: disable=unused-variable + + def test_get_throttled_time_v1_should_return_the_value_since_its_last_invocation(self): + test_file = os.path.join(self.tmp_dir, "cpu.stat") + shutil.copyfile(os.path.join(data_dir, "cgroups", "v1", "cpu.stat_t0"), test_file) # throttled_time = 50 + controller = CpuControllerV1("test", self.tmp_dir) + controller.initialize_cpu_usage() + shutil.copyfile(os.path.join(data_dir, "cgroups", "v1", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327 + + throttled_time = controller.get_cpu_throttled_time() + + self.assertEqual(throttled_time, round(float(2075541442327 - 50) / 1E9, 3), "The value of throttled_time is incorrect") + + def test_get_tracked_metrics_v1_should_return_the_throttled_time(self): + controller = CpuControllerV1("test", os.path.join(data_dir, "cgroups", "v1")) + controller.initialize_cpu_usage() + + def find_throttled_time(metrics): + return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME] + + found = find_throttled_time(controller.get_tracked_metrics()) + self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found)) + + found = find_throttled_time(controller.get_tracked_metrics(track_throttled_time=True)) + self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found)) + + +class TestCpuControllerV2(AgentTestCase): + @classmethod + def setUpClass(cls): + AgentTestCase.setUpClass() + + original_read_file = fileutil.read_file + + # + # Tests that need to mock the contents of /proc/stat or */cpuacct/stat can set this map from + # the file that needs to be mocked to the mock file (each test starts with an empty map). If + # an Exception is given instead of a path, the exception is raised + # + cls.mock_read_file_map = {} + + def mock_read_file(filepath, **args): + if filepath in cls.mock_read_file_map: + mapped_value = cls.mock_read_file_map[filepath] + if isinstance(mapped_value, Exception): + raise mapped_value + filepath = mapped_value + return original_read_file(filepath, **args) + + cls.mock_read_file = patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file) + cls.mock_read_file.start() + + @classmethod + def tearDownClass(cls): + cls.mock_read_file.stop() + AgentTestCase.tearDownClass() + + def setUp(self): + AgentTestCase.setUp(self) + TestCpuControllerV2.mock_read_file_map.clear() + + def test_initialize_cpu_usage_v2_should_set_current_cpu_usage(self): + controller = CpuControllerV2("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t0"), + os.path.join(controller.path, "cpu.stat"): os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t0") + } + + controller.initialize_cpu_usage() + + self.assertEqual(controller._current_cgroup_cpu, 817045397 / 1E6) + self.assertEqual(controller._current_system_cpu, 776968.02) + + def test_get_cpu_usage_v2_should_return_the_cpu_usage_since_its_last_invocation(self): + controller = CpuControllerV2("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t0"), + os.path.join(controller.path, "cpu.stat"): os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t0") + } + + controller.initialize_cpu_usage() + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t1"), + os.path.join(controller.path, "cpu.stat"): os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t1") + } + + cpu_usage = controller.get_cpu_usage() + + cgroup_usage_delta = (819624087 / 1E6) - (817045397 / 1E6) + system_usage_delta = 777350.57 - 776968.02 + self.assertEqual(cpu_usage, round(100.0 * cgroup_usage_delta/system_usage_delta, 3)) + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t2"), + os.path.join(controller.path, "cpu.stat"): os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t2") + } + + cpu_usage = controller.get_cpu_usage() + + cgroup_usage_delta = (822052295 / 1E6) - (819624087 / 1E6) + system_usage_delta = 779218.68 - 777350.57 + self.assertEqual(cpu_usage, round(100.0 * cgroup_usage_delta/system_usage_delta, 3)) + + def test_initialize_cpu_usage_v2_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self): + controller = CpuControllerV2("test", "/sys/fs/cgroup/cpu/system.slice/test") + + io_error_2 = IOError() + io_error_2.errno = errno.ENOENT # "No such directory" + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t0"), + os.path.join(controller.path, "cpu.stat"): io_error_2 + } + + controller.initialize_cpu_usage() + + self.assertEqual(controller._current_cgroup_cpu, 0) + self.assertEqual(controller._current_system_cpu, 776968.02) # check the system usage just for test sanity + + def test_initialize_cpu_usage_v2_should_raise_an_exception_when_called_more_than_once(self): + controller = CpuControllerV2("test", "/sys/fs/cgroup/cpu/system.slice/test") + + TestCpuControllerV2.mock_read_file_map = { + "/proc/uptime": os.path.join(data_dir, "cgroups", "v2", "proc_uptime_t0"), + os.path.join(controller.path, "cpu.stat"): os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t0") + } + + controller.initialize_cpu_usage() + + with self.assertRaises(CGroupsException): + controller.initialize_cpu_usage() + + def test_get_cpu_usage_v2_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self): + controller = CpuControllerV2("test", "/sys/fs/cgroup/cpu/system.slice/test") + + with self.assertRaises(CGroupsException): + cpu_usage = controller.get_cpu_usage() # pylint: disable=unused-variable + + def test_get_throttled_time_v2_should_return_the_value_since_its_last_invocation(self): + test_file = os.path.join(self.tmp_dir, "cpu.stat") + shutil.copyfile(os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t0"), test_file) # throttled_time = 15735198706 + controller = CpuControllerV2("test", self.tmp_dir) + controller.initialize_cpu_usage() + shutil.copyfile(os.path.join(data_dir, "cgroups", "v2", "cpu.stat_t1"), test_file) # throttled_usec = 15796563650 + + throttled_time = controller.get_cpu_throttled_time() + + self.assertEqual(throttled_time, round(float(15796563650 - 15735198706) / 1E6, 3), "The value of throttled_time is incorrect") + + def test_get_tracked_metrics_v2_should_return_the_throttled_time(self): + controller = CpuControllerV2("test", os.path.join(data_dir, "cgroups", "v2")) + controller.initialize_cpu_usage() + + def find_throttled_time(metrics): + return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME] + + found = find_throttled_time(controller.get_tracked_metrics()) + self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found)) + + found = find_throttled_time(controller.get_tracked_metrics(track_throttled_time=True)) + self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found)) diff --git a/tests/ga/test_logcollector.py b/tests/ga/test_logcollector.py index cedf894b09..6a8be83afb 100644 --- a/tests/ga/test_logcollector.py +++ b/tests/ga/test_logcollector.py @@ -79,9 +79,9 @@ def _mock_cgroup(cls): def mock_read_file(filepath, **args): if filepath == "/proc/stat": - filepath = os.path.join(data_dir, "cgroups", "proc_stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "proc_stat_t0") elif filepath.endswith("/cpuacct.stat"): - filepath = os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") + filepath = os.path.join(data_dir, "cgroups", "v1", "cpuacct.stat_t0") return original_read_file(filepath, **args) cls._mock_read_cpu_cgroup_file = patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=mock_read_file) @@ -213,7 +213,7 @@ def test_log_collector_parses_commands_in_manifest(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() with open(self.output_results_file_path, "r") as fh: results = fh.readlines() @@ -227,6 +227,7 @@ def test_log_collector_parses_commands_in_manifest(self): # Assert copy was parsed self._assert_archive_created(archive) self._assert_files_are_in_archive(expected_files=[file_to_collect]) + self.assertEqual(uncompressed_file_size, os.path.getsize(file_to_collect)) no_files = self._get_number_of_files_in_archive() self.assertEqual(1, no_files, "Expected 1 file in archive, found {0}!".format(no_files)) @@ -242,10 +243,11 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector(is_full_mode=True) - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) self._assert_files_are_in_archive(expected_files=[file_to_collect]) + self.assertEqual(uncompressed_file_size, os.path.getsize(file_to_collect)) no_files = self._get_number_of_files_in_archive() self.assertEqual(1, no_files, "Expected 1 file in archive, found {0}!".format(no_files)) @@ -256,7 +258,7 @@ def test_log_collector_should_collect_all_files(self): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -269,6 +271,10 @@ def test_log_collector_should_collect_all_files(self): os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] self._assert_files_are_in_archive(expected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(uncompressed_file_size, expected_total_uncompressed_size) no_files = self._get_number_of_files_in_archive() self.assertEqual(6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) @@ -278,7 +284,7 @@ def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_ with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -294,6 +300,13 @@ def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_ ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) + total_uncompressed_file_size = 0 + for file in expected_files: + if file.startswith("truncated_"): + total_uncompressed_file_size += SMALL_FILE_SIZE + else: + total_uncompressed_file_size += os.path.getsize(file) + self.assertEqual(total_uncompressed_file_size, uncompressed_file_size) no_files = self._get_number_of_files_in_archive() self.assertEqual(5, no_files, "Expected 5 files in archive, found {0}!".format(no_files)) @@ -312,7 +325,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -328,6 +341,10 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(uncompressed_file_size, expected_total_uncompressed_size) no_files = self._get_number_of_files_in_archive() self.assertEqual(3, no_files, "Expected 3 files in archive, found {0}!".format(no_files)) @@ -338,7 +355,7 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files): - second_archive = log_collector.collect_logs_and_get_archive() + second_archive, second_uncompressed_file_size = log_collector.collect_logs_and_get_archive() expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), @@ -352,6 +369,10 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(second_uncompressed_file_size, expected_total_uncompressed_size) self._assert_archive_created(second_archive) @@ -363,7 +384,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - first_archive = log_collector.collect_logs_and_get_archive() + first_archive, first_uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) # Everything should be in the archive @@ -376,6 +397,10 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] self._assert_files_are_in_archive(expected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(first_uncompressed_file_size, expected_total_uncompressed_size) no_files = self._get_number_of_files_in_archive() self.assertEqual(6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) @@ -392,7 +417,7 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d LARGE_FILE_SIZE) rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) - second_archive = log_collector.collect_logs_and_get_archive() + second_archive, second_uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(second_archive) expected_files = [ @@ -408,6 +433,10 @@ def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_d ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(second_uncompressed_file_size, expected_total_uncompressed_size) file = os.path.join(self.root_collect_dir, "waagent.log") # pylint: disable=redefined-builtin new_file_size = self._get_uncompressed_file_size(file) @@ -434,7 +463,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - archive = log_collector.collect_logs_and_get_archive() + archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) @@ -443,6 +472,13 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): self._truncated_path(os.path.join(self.root_collect_dir, "waagent.log.1")), # this file should be truncated ] self._assert_files_are_in_archive(expected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + if file.startswith("truncated_"): + expected_total_uncompressed_size += SMALL_FILE_SIZE + else: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(uncompressed_file_size, expected_total_uncompressed_size) no_files = self._get_number_of_files_in_archive() self.assertEqual(2, no_files, "Expected 2 files in archive, found {0}!".format(no_files)) @@ -456,7 +492,7 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'): log_collector = LogCollector() - second_archive = log_collector.collect_logs_and_get_archive() + second_archive, second_uncompressed_file_size = log_collector.collect_logs_and_get_archive() expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), @@ -467,6 +503,13 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self): ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) + expected_total_uncompressed_size = 0 + for file in expected_files: + if file.startswith("truncated_"): + expected_total_uncompressed_size += SMALL_FILE_SIZE + else: + expected_total_uncompressed_size += os.path.getsize(file) + self.assertEqual(second_uncompressed_file_size, expected_total_uncompressed_size) self._assert_archive_created(second_archive) diff --git a/tests/ga/test_memorycontroller.py b/tests/ga/test_memorycontroller.py new file mode 100644 index 0000000000..1beb9a33f0 --- /dev/null +++ b/tests/ga/test_memorycontroller.py @@ -0,0 +1,124 @@ +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Requires Python 2.4+ and Openssl 1.0+ +# + +from __future__ import print_function + +import errno +import os +import shutil + +from azurelinuxagent.ga.cgroupcontroller import CounterNotFound +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1, MemoryControllerV2 +from tests.lib.tools import AgentTestCase, data_dir + + +class TestMemoryControllerV1(AgentTestCase): + def test_get_metrics_v1(self): + test_mem_controller = MemoryControllerV1("test_extension", os.path.join(data_dir, "cgroups", "v1")) + + rss_memory_usage, cache_memory_usage = test_mem_controller.get_memory_usage() + self.assertEqual(100000, rss_memory_usage) + self.assertEqual(50000, cache_memory_usage) + + max_memory_usage = test_mem_controller.get_max_memory_usage() + self.assertEqual(1000000, max_memory_usage) + + swap_memory_usage = test_mem_controller.try_swap_memory_usage() + self.assertEqual(20000, swap_memory_usage) + + def test_get_metrics_v1_when_files_not_present(self): + test_mem_controller = MemoryControllerV1("test_extension", os.path.join(data_dir, "cgroups")) + + with self.assertRaises(IOError) as e: + test_mem_controller.get_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + with self.assertRaises(IOError) as e: + test_mem_controller.get_max_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + with self.assertRaises(IOError) as e: + test_mem_controller.try_swap_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + def test_get_memory_usage_v1_counters_not_found(self): + test_file = os.path.join(self.tmp_dir, "memory.stat") + shutil.copyfile(os.path.join(data_dir, "cgroups", "v1", "memory.stat_missing"), test_file) + test_mem_controller = MemoryControllerV1("test_extension", self.tmp_dir) + + with self.assertRaises(CounterNotFound): + test_mem_controller.get_memory_usage() + + swap_memory_usage = test_mem_controller.try_swap_memory_usage() + self.assertEqual(0, swap_memory_usage) + + +class TestMemoryControllerV2(AgentTestCase): + def test_get_metrics_v2(self): + test_mem_controller = MemoryControllerV2("test_extension", os.path.join(data_dir, "cgroups", "v2")) + + anon_memory_usage, cache_memory_usage = test_mem_controller.get_memory_usage() + self.assertEqual(17589300, anon_memory_usage) + self.assertEqual(134553600, cache_memory_usage) + + max_memory_usage = test_mem_controller.get_max_memory_usage() + self.assertEqual(194494464, max_memory_usage) + + swap_memory_usage = test_mem_controller.try_swap_memory_usage() + self.assertEqual(20000, swap_memory_usage) + + memory_throttled_events = test_mem_controller.get_memory_throttled_events() + self.assertEqual(9, memory_throttled_events) + + def test_get_metrics_v2_when_files_not_present(self): + test_mem_controller = MemoryControllerV2("test_extension", os.path.join(data_dir, "cgroups")) + + with self.assertRaises(IOError) as e: + test_mem_controller.get_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + with self.assertRaises(IOError) as e: + test_mem_controller.get_max_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + with self.assertRaises(IOError) as e: + test_mem_controller.try_swap_memory_usage() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + with self.assertRaises(IOError) as e: + test_mem_controller.get_memory_throttled_events() + + self.assertEqual(e.exception.errno, errno.ENOENT) + + def test_get_memory_usage_v1_counters_not_found(self): + test_stat_file = os.path.join(self.tmp_dir, "memory.stat") + shutil.copyfile(os.path.join(data_dir, "cgroups", "v2", "memory.stat_missing"), test_stat_file) + test_events_file = os.path.join(self.tmp_dir, "memory.events") + shutil.copyfile(os.path.join(data_dir, "cgroups", "v2", "memory.stat_missing"), test_events_file) + test_mem_controller = MemoryControllerV2("test_extension", self.tmp_dir) + + with self.assertRaises(CounterNotFound): + test_mem_controller.get_memory_usage() + + with self.assertRaises(CounterNotFound): + test_mem_controller.get_memory_throttled_events() diff --git a/tests/ga/test_monitor.py b/tests/ga/test_monitor.py index 420645fe0e..a2100cde58 100644 --- a/tests/ga/test_monitor.py +++ b/tests/ga/test_monitor.py @@ -21,12 +21,14 @@ import string from azurelinuxagent.common import event, logger -from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue, _REPORT_EVERY_HOUR +from azurelinuxagent.ga.cgroupcontroller import MetricValue, _REPORT_EVERY_HOUR from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.common.event import EVENTS_DIRECTORY from azurelinuxagent.common.protocol.healthservice import HealthService from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.wire import WireProtocol +from azurelinuxagent.ga.cpucontroller import CpuControllerV1 +from azurelinuxagent.ga.memorycontroller import MemoryControllerV1 from azurelinuxagent.ga.monitor import get_monitor_handler, PeriodicOperation, SendImdsHeartbeat, \ ResetPeriodicLogMessages, SendHostPluginHeartbeat, PollResourceUsage, \ ReportNetworkErrors, ReportNetworkConfigurationChanges, PollSystemWideResourceUsage @@ -222,23 +224,23 @@ def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_poll_all_ self.assertEqual(0, patch_add_metric.call_count) @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") + @patch("azurelinuxagent.ga.memorycontroller.MemoryControllerV1.get_memory_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument - patch_get_memory_usage, + get_memory_usage, patch_add_metric, *args): ioerror = IOError() ioerror.errno = 2 - patch_get_memory_usage.side_effect = ioerror + get_memory_usage.side_effect = ioerror - CGroupsTelemetry._tracked["/test/path"] = MemoryMetrics("_cgroup_name", "/test/path") + CGroupsTelemetry._tracked["/test/path"] = MemoryControllerV1("_cgroup_name", "/test/path") PollResourceUsage().run() self.assertEqual(0, patch_periodic_warn.call_count) self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. @patch('azurelinuxagent.common.event.EventLogger.add_metric') - @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") + @patch("azurelinuxagent.ga.cpucontroller.CpuControllerV1.get_cpu_usage") @patch('azurelinuxagent.common.logger.Logger.periodic_warn') def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument patch_cpu_usage, patch_add_metric, @@ -247,7 +249,7 @@ def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2( ioerror.errno = 2 patch_cpu_usage.side_effect = ioerror - CGroupsTelemetry._tracked["/test/path"] = CpuMetrics("_cgroup_name", "/test/path") + CGroupsTelemetry._tracked["/test/path"] = CpuControllerV1("_cgroup_name", "/test/path") PollResourceUsage().run() self.assertEqual(0, patch_periodic_warn.call_count) diff --git a/tests/test_agent.py b/tests/test_agent.py index df1a7ca131..7073af6b73 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -24,10 +24,10 @@ from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.ga import logcollector, cgroupconfigurator from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1 +from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1, CgroupV2 from azurelinuxagent.ga.collect_logs import CollectLogsHandler -from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR -from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment +from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR +from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment from tests.lib.tools import AgentTestCase, data_dir, Mock, patch EXPECTED_CONFIGURATION = \ @@ -48,10 +48,12 @@ Debug.CgroupMonitorExpiryTime = 2022-03-31 Debug.CgroupMonitorExtensionName = Microsoft.Azure.Monitor.AzureMonitorLinuxAgent Debug.EnableAgentMemoryUsageCheck = False +Debug.EnableCgroupV2ResourceLimiting = False Debug.EnableFastTrack = True Debug.EnableGAVersioning = True Debug.EtpCollectionPeriod = 300 Debug.FirewallRulesLogPeriod = 86400 +Debug.LogCollectorInitialDelay = 300 DetectScvmmEnv = False EnableOverProvisioning = True Extension.LogDir = /var/log/azure @@ -232,7 +234,7 @@ def test_rejects_invalid_log_collector_mode(self, mock_exit, mock_stderr): # py @patch("azurelinuxagent.agent.LogCollector") def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): # pylint: disable=unused-argument agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) - mock_log_collector.run = Mock() + mock_log_collector.return_value.collect_logs_and_get_archive.return_value = (Mock(), Mock()) # LogCollector.collect_logs_and_get_archive returns a tuple agent.collect_logs(is_full_mode=True) full_mode = mock_log_collector.call_args_list[0][0][0] @@ -246,7 +248,7 @@ def test_calls_collect_logs_with_proper_mode(self, mock_log_collector, *args): def test_calls_collect_logs_on_valid_cgroups_v1(self, mock_log_collector): try: CollectLogsHandler.enable_monitor_cgroups_check() - mock_log_collector.run = Mock() + mock_log_collector.return_value.collect_logs_and_get_archive.return_value = (Mock(), Mock()) # LogCollector.collect_logs_and_get_archive returns a tuple # Mock cgroup so process is in the log collector slice def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 @@ -274,6 +276,33 @@ def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 finally: CollectLogsHandler.disable_monitor_cgroups_check() + @patch("azurelinuxagent.agent.LogCollector") + def test_calls_collect_logs_on_valid_cgroups_v2(self, mock_log_collector): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.return_value.collect_logs_and_get_archive.return_value = ( + Mock(), Mock()) # LogCollector.collect_logs_and_get_archive returns a tuple + + # Mock cgroup so process is in the log collector slice + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) + return CgroupV2( + cgroup_name=AGENT_LOG_COLLECTOR, + root_cgroup_path="/sys/fs/cgroup", + cgroup_path="/sys/fs/cgroup/{0}".format(relative_path), + enabled_controllers=["cpu", "memory"] + ) + + with mock_cgroup_v2_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup", side_effect=mock_cgroup): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + agent.collect_logs(is_full_mode=True) + + mock_log_collector.assert_called_once() + + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + @patch("azurelinuxagent.agent.LogCollector") def test_doesnt_call_collect_logs_when_cgroup_api_cannot_be_determined(self, mock_log_collector): try: @@ -336,6 +365,39 @@ def raise_on_sys_exit(*args): finally: CollectLogsHandler.disable_monitor_cgroups_check() + @patch("azurelinuxagent.agent.LogCollector") + def test_doesnt_call_collect_logs_on_invalid_cgroups_v2(self, mock_log_collector): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.run = Mock() + + # Mock cgroup so process is in incorrect slice + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "NOT_THE_CORRECT_PATH" + return CgroupV2( + cgroup_name=AGENT_LOG_COLLECTOR, + root_cgroup_path="/sys/fs/cgroup", + cgroup_path="/sys/fs/cgroup/{0}".format(relative_path), + enabled_controllers=["cpu", "memory"] + ) + + def raise_on_sys_exit(*args): + raise RuntimeError(args[0] if args else "Exiting") + + with mock_cgroup_v2_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup", + side_effect=mock_cgroup): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + + with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: + try: + agent.collect_logs(is_full_mode=True) + except RuntimeError as re: + self.assertEqual(logcollector.INVALID_CGROUPS_ERRCODE, re.args[0]) + mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + @patch('azurelinuxagent.agent.get_cgroup_api', side_effect=InvalidCgroupMountpointException("Test")) @patch("azurelinuxagent.agent.LogCollector") def test_doesnt_call_collect_logs_on_non_systemd_cgroups_v1_mountpoints(self, mock_log_collector, _): @@ -393,7 +455,41 @@ def raise_on_sys_exit(*args): mock_exit.assert_called_once_with(logcollector.INVALID_CGROUPS_ERRCODE) finally: CollectLogsHandler.disable_monitor_cgroups_check() - + + @patch("azurelinuxagent.agent.LogCollector") + @patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler.get_max_recorded_metrics") + def test_collect_log_should_output_resource_usage_summary(self, mock_get_metrics_summary, mock_log_collector): + try: + CollectLogsHandler.enable_monitor_cgroups_check() + mock_log_collector.return_value.collect_logs_and_get_archive.return_value = (Mock(), Mock()) # LogCollector.collect_logs_and_get_archive returns a tuple + mock_get_metrics_summary.return_value = "metric summary" + + # Mock cgroup so process is in the log collector slice + def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 + relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) + return CgroupV1( + cgroup_name=AGENT_LOG_COLLECTOR, + controller_mountpoints={ + 'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct", + 'memory': "/sys/fs/cgroup/memory" + }, + controller_paths={ + 'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path), + 'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path) + } + ) + + with mock_cgroup_v1_environment(self.tmp_dir): + with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", side_effect=mock_cgroup): + agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) + agent.collect_logs(is_full_mode=True) + + mock_log_collector.assert_called_once() + mock_get_metrics_summary.assert_called_once() + + finally: + CollectLogsHandler.disable_monitor_cgroups_check() + def test_it_should_parse_setup_firewall_properly(self): test_firewall_meta = { diff --git a/tests_e2e/orchestrator/runbook.yml b/tests_e2e/orchestrator/runbook.yml index dfed709afe..b96cc5107c 100644 --- a/tests_e2e/orchestrator/runbook.yml +++ b/tests_e2e/orchestrator/runbook.yml @@ -54,6 +54,7 @@ variable: - publish_hostname - recover_network_interface - cgroup_v2_disabled + - log_collector # # Additional arguments pass to the test suites diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index 976c987f92..d7d578cbaf 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -55,6 +55,19 @@ image-sets: - "oracle_610" - "rhel_610" + # These are the distros which have periodic log collector support. + log-collector-endorsed: + - "centos_82" + - "rhel_82" + - "ubuntu_1604" + - "ubuntu_1804" + - "ubuntu_2004" + - "ubuntu_2204" + - "ubuntu_2204_minimal" + - "ubuntu_2204_arm64" + - "ubuntu_2404" + - "ubuntu_2404_minimal" + # # An image can be specified by a string giving its urn, as in # diff --git a/tests_e2e/test_suites/log_collector.yml b/tests_e2e/test_suites/log_collector.yml new file mode 100644 index 0000000000..496198f921 --- /dev/null +++ b/tests_e2e/test_suites/log_collector.yml @@ -0,0 +1,8 @@ +# +# This test is used to verify that the log collector logs the expected behavior on periodic runs. +# +name: "LogCollector" +tests: + - "log_collector/log_collector.py" +images: + - "random(log-collector-endorsed, 1)" diff --git a/tests_e2e/tests/log_collector/log_collector.py b/tests_e2e/tests/log_collector/log_collector.py new file mode 100755 index 0000000000..c98a33ae86 --- /dev/null +++ b/tests_e2e/tests/log_collector/log_collector.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Microsoft Azure Linux Agent +# +# Copyright 2018 Microsoft Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +import time + +from assertpy import fail + +import tests_e2e.tests.lib.logging +from azurelinuxagent.common.utils.shellutil import CommandError +from tests_e2e.tests.lib.agent_test import AgentVmTest + + +class LogCollector(AgentVmTest): + """ + Tests that the log collector logs the expected behavior on periodic runs. + """ + def run(self): + ssh_client = self._context.create_ssh_client() + ssh_client.run_command("update-waagent-conf Logs.Collect=y Debug.EnableCgroupV2ResourceLimiting=y Debug.LogCollectorInitialDelay=60", use_sudo=True) + # Wait for log collector to finish uploading logs + for _ in range(3): + time.sleep(90) + try: + ssh_client.run_command("grep 'Successfully uploaded logs' /var/log/waagent.log") + break + except CommandError: + tests_e2e.tests.lib.logging.log.info("The Agent has not finished log collection, will check again after a short delay") + else: + raise Exception("Timeout while waiting for the Agent to finish log collection") + + # Get any agent logs between log collector start and finish + try: + output = ssh_client.run_command( + "sed -n " + + "'/INFO CollectLogsHandler ExtHandler Starting log collection/, /INFO CollectLogsHandler ExtHandler Successfully uploaded logs/p' " + + "/var/log/waagent.log").rstrip().splitlines() + except Exception as e: + raise Exception("Unable to get log collector logs from waagent.log: {0}".format(e)) + + # These logs indicate a successful log collector run with resource enforcement and monitoring + expected = [ + r'.*Starting log collection', + r'.*Using cgroup v\d for resource enforcement and monitoring', + r'.*cpu(,cpuacct)? controller for cgroup: azure-walinuxagent-logcollector \[\/sys\/fs\/cgroup(\/cpu,cpuacct)?\/azure.slice\/azure-walinuxagent.slice\/azure-walinuxagent\-logcollector.slice\/collect\-logs.scope\]', + r'.*memory controller for cgroup: azure-walinuxagent-logcollector \[\/sys\/fs\/cgroup(\/memory)?\/azure.slice\/azure-walinuxagent.slice\/azure-walinuxagent\-logcollector.slice\/collect\-logs.scope\]', + r'.*Log collection successfully completed', + r'.*Successfully collected logs', + r'.*Successfully uploaded logs' + ] + + # Filter output to only include relevant log collector logs + lc_logs = [log for log in output if len([pattern for pattern in expected if re.match(pattern, log)]) > 0] + + # Check that all expected logs exist and are in the correct order + indent = lambda lines: "\n".join([f" {ln}" for ln in lines]) + if len(lc_logs) == len(expected) and all([re.match(expected[i], lc_logs[i]) is not None for i in range(len(expected))]): + tests_e2e.tests.lib.logging.log.info("The log collector run completed as expected.\nLog messages:\n%s", indent(lc_logs)) + else: + fail(f"The log collector run did not complete as expected.\nExpected:\n{indent(expected)}\nActual:\n{indent(lc_logs)}") + + ssh_client.run_command("update-waagent-conf Debug.EnableCgroupV2ResourceLimiting=n Debug.LogCollectorInitialDelay=5*60", + use_sudo=True) + + +if __name__ == "__main__": + LogCollector.run_from_command_line() diff --git a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py index 5dfc55be82..29758d02b7 100755 --- a/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py +++ b/tests_e2e/tests/scripts/agent_cpu_quota-check_agent_cpu_quota.py @@ -115,7 +115,7 @@ def check_agent_log_for_metrics() -> bool: if match is not None: processor_time.append(float(match.group(1))) else: - match = re.search(r"Throttled Time\s*\[walinuxagent.service\]\s*=\s*([0-9.]+)", record.message) + match = re.search(r"Throttled Time \(s\)\s*\[walinuxagent.service\]\s*=\s*([0-9.]+)", record.message) if match is not None: throttled_time.append(float(match.group(1))) if len(processor_time) < 1 or len(throttled_time) < 1: From ef87f403391dfab44de872a7d5233c0ff676705e Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:24:35 -0500 Subject: [PATCH 228/240] workaround for python3.5 UTs build setup and replace assert_called_once mock method (#3191) * python3.5 workaround * replace assert_called_once * addressing comment --- .github/workflows/ci_pr.yml | 5 +++++ tests/common/protocol/test_protocol_util.py | 8 ++++---- tests/ga/test_exthandlers.py | 2 +- tests/ga/test_exthandlers_download_extension.py | 9 +++++---- tests/test_agent.py | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 05d2744273..96db6e7f9e 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -88,6 +88,9 @@ jobs: matrix: include: - python-version: "3.5" + # workaround found in https://github.com/actions/setup-python/issues/866 + # for issue "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:728)" on Python 3.5 + pip_trusted_host: "pypi.python.org pypi.org files.pythonhosted.org" - python-version: "3.6" - python-version: "3.7" - python-version: "3.8" @@ -111,6 +114,8 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + env: + PIP_TRUSTED_HOST: ${{ matrix.pip_trusted_host }} - name: Install dependencies id: install-dependencies diff --git a/tests/common/protocol/test_protocol_util.py b/tests/common/protocol/test_protocol_util.py index b60ca9af95..494d25319d 100644 --- a/tests/common/protocol/test_protocol_util.py +++ b/tests/common/protocol/test_protocol_util.py @@ -188,8 +188,8 @@ def test_get_protocol_wireserver_to_wireserver_update_removes_metadataserver_art self.assertFalse(os.path.exists(mds_cert_path)) # Check firewall rules was reset - protocol_util.osutil.remove_firewall.assert_called_once() - protocol_util.osutil.enable_firewall.assert_called_once() + self.assertEqual(1, protocol_util.osutil.remove_firewall.call_count, "remove_firewall should be called once") + self.assertEqual(1, protocol_util.osutil.enable_firewall.call_count, "enable_firewall should be called once") @patch('azurelinuxagent.common.conf.get_lib_dir') @patch('azurelinuxagent.common.conf.enable_firewall') @@ -234,8 +234,8 @@ def test_get_protocol_metadataserver_to_wireserver_update_removes_metadataserver self.assertTrue(os.path.isfile(ws_cert_path)) # Check firewall rules was reset - protocol_util.osutil.remove_firewall.assert_called_once() - protocol_util.osutil.enable_firewall.assert_called_once() + self.assertEqual(1, protocol_util.osutil.remove_firewall.call_count, "remove_firewall should be called once") + self.assertEqual(1, protocol_util.osutil.enable_firewall.call_count, "enable_firewall should be called once") # Check Protocol File is updated to WireProtocol with open(os.path.join(dir, PROTOCOL_FILE_NAME), "r") as f: diff --git a/tests/ga/test_exthandlers.py b/tests/ga/test_exthandlers.py index f56ebce14b..3252dcb239 100644 --- a/tests/ga/test_exthandlers.py +++ b/tests/ga/test_exthandlers.py @@ -681,7 +681,7 @@ def test_it_should_read_only_the_head_of_large_outputs(self): self.assertGreaterEqual(len(output), 1024) self.assertLessEqual(len(output), TELEMETRY_MESSAGE_MAX_LEN) - mock_format.assert_called_once() + self.assertEqual(1, mock_format.call_count, "format_stdout_stderr should be called once") args, kwargs = mock_format.call_args # pylint: disable=unused-variable stdout, stderr = args diff --git a/tests/ga/test_exthandlers_download_extension.py b/tests/ga/test_exthandlers_download_extension.py index b3ed96a89a..9f56a0202f 100644 --- a/tests/ga/test_exthandlers_download_extension.py +++ b/tests/ga/test_exthandlers_download_extension.py @@ -127,8 +127,8 @@ def stream(_, destination, **__): self.ext_handler_instance.download() # first download attempt should succeed - mock_stream.assert_called_once() - mock_report_event.assert_called_once() + self.assertEqual(1, mock_stream.call_count, "wireserver stream should be called once") + self.assertEqual(1, mock_report_event.call_count, "report_event should be called once") self._assert_download_and_expand_succeeded() @@ -154,7 +154,7 @@ def stream(_, destination, **__): with DownloadExtensionTestCase.create_mock_stream(stream) as mock_stream: self.ext_handler_instance.download() - mock_stream.assert_called_once() + self.assertEqual(1, mock_stream.call_count, "wireserver stream should be called once") self._assert_download_and_expand_succeeded() @@ -179,7 +179,8 @@ def stream(_, destination, **__): with DownloadExtensionTestCase.create_mock_stream(stream) as mock_stream: self.ext_handler_instance.download() - mock_stream.assert_called_once() + self.assertEqual(1, mock_stream.call_count, "wireserver stream should be called once") + self._assert_download_and_expand_succeeded() self.assertEqual(self.ext_handler_instance.get_handler_state(), ExtHandlerState.NotInstalled, "Ensure that the state is maintained for extension HandlerState") diff --git a/tests/test_agent.py b/tests/test_agent.py index 7073af6b73..62509d3fd7 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -271,7 +271,7 @@ def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent.collect_logs(is_full_mode=True) - mock_log_collector.assert_called_once() + self.assertEqual(1, mock_log_collector.call_count, "LogCollector should be called once") finally: CollectLogsHandler.disable_monitor_cgroups_check() From 76da663fffa2da0bb4e7c7c0b1b036a1fa6e9e5f Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:41:51 -0700 Subject: [PATCH 229/240] Fix log collector unit tests on 3.5 (#3193) * Fix unit tests 3.5 * Fix ut --- tests/test_agent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_agent.py b/tests/test_agent.py index 62509d3fd7..ad3024113b 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -298,7 +298,7 @@ def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent.collect_logs(is_full_mode=True) - mock_log_collector.assert_called_once() + self.assertEqual(1, mock_log_collector.call_count, "LogCollector should be called once") finally: CollectLogsHandler.disable_monitor_cgroups_check() @@ -458,11 +458,11 @@ def raise_on_sys_exit(*args): @patch("azurelinuxagent.agent.LogCollector") @patch("azurelinuxagent.ga.collect_logs.LogCollectorMonitorHandler.get_max_recorded_metrics") - def test_collect_log_should_output_resource_usage_summary(self, mock_get_metrics_summary, mock_log_collector): + def test_collect_log_should_output_resource_usage_summary(self, mock_get_max_recorded_metrics, mock_log_collector): try: CollectLogsHandler.enable_monitor_cgroups_check() mock_log_collector.return_value.collect_logs_and_get_archive.return_value = (Mock(), Mock()) # LogCollector.collect_logs_and_get_archive returns a tuple - mock_get_metrics_summary.return_value = "metric summary" + mock_get_max_recorded_metrics.return_value = {} # Mock cgroup so process is in the log collector slice def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 @@ -484,8 +484,8 @@ def mock_cgroup(*args, **kwargs): # pylint: disable=W0613 agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent.collect_logs(is_full_mode=True) - mock_log_collector.assert_called_once() - mock_get_metrics_summary.assert_called_once() + self.assertEqual(1, mock_log_collector.call_count, "LogCollector should be called once") + self.assertEqual(1, mock_get_max_recorded_metrics.call_count, "get_max_recorded_metrics should be called once") finally: CollectLogsHandler.disable_monitor_cgroups_check() From 8dbb0163c6c9c21634f883722d66ed594d004a47 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Mon, 26 Aug 2024 16:01:18 -0700 Subject: [PATCH 230/240] Fix JIT for FIPS 140-3 (#3190) * . * . * ., --------- Co-authored-by: narrieta@microsoft --- azurelinuxagent/common/protocol/goal_state.py | 4 +-- azurelinuxagent/common/protocol/wire.py | 27 ++++++++++++++++--- tests/common/protocol/test_wire.py | 22 +++++++++++++++ tests/lib/mock_wire_protocol.py | 5 ++-- 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 2eb89c1ebb..607710e3f3 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -212,7 +212,7 @@ def update(self, silent=False): except GoalStateInconsistentError as e: message = "Detected an inconsistency in the goal state: {0}".format(ustr(e)) self.logger.warn(message) - add_event(op=WALAEventOperation.GoalState, is_success=False, message=message) + add_event(op=WALAEventOperation.GoalState, is_success=False, log_event=False, message=message) self._update(force_update=True) @@ -503,7 +503,7 @@ def _fetch_full_wire_server_goal_state(self, incarnation, xml_doc): if GoalStateProperties.RemoteAccessInfo & self._goal_state_properties: remote_access_uri = findtext(container, "RemoteAccessInfo") if remote_access_uri is not None: - xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert()) + xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_remote_access()) remote_access = RemoteAccess(xml_text) if self._save_to_history: self._history.save_remote_access(xml_text) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 6ec5b1e23c..751dd2afb8 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -38,7 +38,7 @@ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError, ExtensionErrorCodes from azurelinuxagent.common.future import httpclient, bytebuffer, ustr from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME, \ - GoalStateProperties + GoalStateProperties, GoalStateInconsistentError from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.restapi import DataContract, ProvisionStatus, VMInfo, VMStatus from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema @@ -86,7 +86,22 @@ def detect(self, init_goal_state=True, save_to_history=False): # Initialize the goal state, including all the inner properties if init_goal_state: logger.info('Initializing goal state during protocol detection') - self.client.reset_goal_state(save_to_history=save_to_history) + # + # TODO: Currently protocol detection retrieves the entire goal state. This is not needed; in particular, retrieving the Extensions goal state + # is not needed. However, the goal state is cached in self.client._goal_state and other components, including the Extension Handler, + # depend on this cached value. This has been a long-standing issue that causes multiple problems. Before removing the cached goal state, + # though, a careful review of these dependencies is needed. + # + # One of the problems of fetching the full goal state is that issues while retrieving it can block protocol detection and make the + # Agent go into a retry loop that can last 1 full hour. One particular error, GoalStateInconsistentError, can arise if the certificates + # needed by extensions are missing from the goal state; for example, if a FastTrack goal state is out of sync with the corresponding + # Fabric goal state that contains the certificates, or if decryption of the certificates fais (and hence, the certificate list is + # empty). The try/except below handles only this one particular problem. + # + try: + self.client.reset_goal_state(save_to_history=save_to_history) + except GoalStateInconsistentError as error: + logger.warn("{0}", ustr(error)) def update_host_plugin_from_goal_state(self): self.client.update_host_plugin_from_goal_state() @@ -1126,6 +1141,12 @@ def get_header_for_xml_content(self): } def get_header_for_cert(self): + return self._get_header_for_encrypted_request("DES_EDE3_CBC") + + def get_header_for_remote_access(self): + return self._get_header_for_encrypted_request("AES128_CBC") + + def _get_header_for_encrypted_request(self, cypher): trans_cert_file = os.path.join(conf.get_lib_dir(), TRANSPORT_CERT_FILE_NAME) try: content = fileutil.read_file(trans_cert_file) @@ -1136,7 +1157,7 @@ def get_header_for_cert(self): return { "x-ms-agent-name": "WALinuxAgent", "x-ms-version": PROTOCOL_VERSION, - "x-ms-cipher-name": "DES_EDE3_CBC", + "x-ms-cipher-name": cypher, "x-ms-guest-agent-public-x509-cert": cert } diff --git a/tests/common/protocol/test_wire.py b/tests/common/protocol/test_wire.py index 8cf0c7d462..2fbd817ff8 100644 --- a/tests/common/protocol/test_wire.py +++ b/tests/common/protocol/test_wire.py @@ -479,6 +479,28 @@ def test_report_event_large_event(self, patch_send_event, *args): # pylint: dis self.assertEqual(patch_send_event.call_count, 0) + def test_get_header_for_cert_should_use_triple_des(self, *_): + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + headers = protocol.client.get_header_for_cert() + self.assertIn("x-ms-cipher-name", headers) + self.assertEqual(headers["x-ms-cipher-name"], "DES_EDE3_CBC", "Unexpected x-ms-cipher-name") + + def test_get_header_for_remote_access_should_use_aes128(self, *_): + with mock_wire_protocol(wire_protocol_data.DATA_FILE) as protocol: + headers = protocol.client.get_header_for_remote_access() + self.assertIn("x-ms-cipher-name", headers) + self.assertEqual(headers["x-ms-cipher-name"], "AES128_CBC", "Unexpected x-ms-cipher-name") + + def test_detect_should_handle_inconsistent_goal_state_errors(self, *_): + data_file = wire_protocol_data.DATA_FILE_VM_SETTINGS # Certificates are checked only on FastTrack goal states + data_file['certs'] = "wire/certs-2.xml" # Change the certificates to force a GoalStateInconsistentError + with mock_wire_protocol(data_file, detect_protocol=False) as protocol: + with patch("azurelinuxagent.common.logger.warn") as mock_warn: + protocol.detect() + self.assertTrue( + any(len(args) == 2 and args[1].startswith("[GoalStateInconsistentError]") for args, _ in mock_warn.call_args_list), + "Did not find any warnings about an GoalStateInconsistentError: {0}".format(mock_warn.call_args_list)) + class TestWireClient(HttpRequestPredicates, AgentTestCase): def test_get_ext_conf_without_extensions_should_retrieve_vmagent_manifests_info(self, *args): # pylint: disable=unused-argument diff --git a/tests/lib/mock_wire_protocol.py b/tests/lib/mock_wire_protocol.py index 78cbc59e2e..2cf2b10e0a 100644 --- a/tests/lib/mock_wire_protocol.py +++ b/tests/lib/mock_wire_protocol.py @@ -22,7 +22,7 @@ @contextlib.contextmanager -def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_handler=None, http_put_handler=None, do_not_mock=lambda method, url: False, fail_on_unknown_request=True, save_to_history=False): +def mock_wire_protocol(mock_wire_data_file, http_get_handler=None, http_post_handler=None, http_put_handler=None, do_not_mock=lambda method, url: False, fail_on_unknown_request=True, save_to_history=False, detect_protocol=True): """ Creates a WireProtocol object that handles requests to the WireServer, the Host GA Plugin, and some requests to storage (requests that provide mock data in wire_protocol_data.py). @@ -149,7 +149,8 @@ def stop(): # go do it try: protocol.start() - protocol.detect(save_to_history=save_to_history) + if detect_protocol: + protocol.detect(save_to_history=save_to_history) yield protocol finally: protocol.stop() From 04490fb9b844b4df059aef6f2ac2cddafbcd5551 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:43:30 -0700 Subject: [PATCH 231/240] Capture logcollector pattern only once (#3194) * Capture logcollector pattern only once * Add comment --- tests_e2e/tests/log_collector/log_collector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests_e2e/tests/log_collector/log_collector.py b/tests_e2e/tests/log_collector/log_collector.py index c98a33ae86..fed8159c0f 100755 --- a/tests_e2e/tests/log_collector/log_collector.py +++ b/tests_e2e/tests/log_collector/log_collector.py @@ -46,10 +46,10 @@ def run(self): # Get any agent logs between log collector start and finish try: - output = ssh_client.run_command( - "sed -n " + - "'/INFO CollectLogsHandler ExtHandler Starting log collection/, /INFO CollectLogsHandler ExtHandler Successfully uploaded logs/p' " + - "/var/log/waagent.log").rstrip().splitlines() + # We match the first full log collector run in the agent log (this test just needs to validate any full log collector run, does not matter if it's the first or last) + lc_start_pattern = "INFO CollectLogsHandler ExtHandler Starting log collection" + lc_end_pattern = "INFO CollectLogsHandler ExtHandler Successfully uploaded logs" + output = ssh_client.run_command("sed -n '/{0}/,/{1}/{{p;/{1}/q}}' /var/log/waagent.log".format(lc_start_pattern, lc_end_pattern)).rstrip().splitlines() except Exception as e: raise Exception("Unable to get log collector logs from waagent.log: {0}".format(e)) From 730510ec07530f31b13a9466121b0bd069541c84 Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:40:24 -0700 Subject: [PATCH 232/240] Check agent Slice unit property before setting up azure.slice (#3196) (#3198) (cherry picked from commit bdd4a4b1ebf1954364a96d6b5f87fb5f2bed3153) --- azurelinuxagent/ga/cgroupconfigurator.py | 27 ++++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index 2a75344b42..efb00d4810 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -181,21 +181,30 @@ def initialize(self): log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) return - # Setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but can be - # enabled for log collector in waagent.conf. The log collector slice should be created in case v2 - # usage is enabled for log collector. - self.__setup_azure_slice() - - if self.using_cgroup_v2(): - log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") - return - + # We check the agent unit 'Slice' property before setting up azure.slice. This check is done first + # because the agent's Slice unit property will be 'azure.slice' if the slice drop-in file exists, even + # though systemd has not moved the agent to azure.slice yet. Systemd will only move the agent to + # azure.slice after a service restart. agent_unit_name = systemd.get_agent_unit_name() agent_slice = systemd.get_unit_property(agent_unit_name, "Slice") if agent_slice not in (AZURE_SLICE, "system.slice"): log_cgroup_warning("The agent is within an unexpected slice: {0}".format(agent_slice)) return + # Notes about slice setup: + # 1. On first agent update (for machines where daemon version did not already create azure.slice), the + # agent creates azure.slice and the agent unit Slice drop-in file, but systemd does not move the agent + # unit to azure.slice until service restart. It is ok to enable cgroup usage in this case if agent is + # running in system.slice. + # 2. We setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but + # can be enabled for log collector in waagent.conf. The log collector slice should be created in case + # v2 usage is enabled for log collector. + self.__setup_azure_slice() + + if self.using_cgroup_v2(): + log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") + return + # Log mount points/root paths for cgroup controllers self._cgroups_api.log_root_paths() From 3195fdc768da9ed8790ab10b18c614f6b30842ca Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:05:17 -0700 Subject: [PATCH 233/240] version update to 2.12.0.0 (#3195) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 98065489c3..2787d246dc 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '9.9.9.9' +AGENT_VERSION = '2.12.0.0' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 83213bc0f33d435dd2a13ff149de949e68d5e2d2 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 3 Sep 2024 13:17:45 -0700 Subject: [PATCH 234/240] fixing attribute error (#3202) --- tests_e2e/orchestrator/lib/agent_test_suite_combinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py index 22b1afd9b4..07bb366328 100644 --- a/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py +++ b/tests_e2e/orchestrator/lib/agent_test_suite_combinator.py @@ -451,7 +451,7 @@ def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]: return images # If it is not image or image set, it must be a URN, VHD, or an image from a gallery - if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image) and not self._is_image_from_gallery(self.runbook.image): + if not self._is_urn(self.runbook.image) and not self._is_vhd(self.runbook.image) and not CustomImage._is_image_from_gallery(self.runbook.image): raise Exception(f"The 'image' parameter must be an image, image set name, urn, vhd, or an image from a shared gallery: {self.runbook.image}") i = VmImageInfo() From 8fb3982f07460709839d90833bcfeeb1dc1c9578 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Tue, 3 Sep 2024 13:27:18 -0700 Subject: [PATCH 235/240] version update to 2.12.0.1" (#3203) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index 2787d246dc..f62a3791a9 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '2.12.0.0' +AGENT_VERSION = '2.12.0.1' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux From 6e849db5015e24059368b75e1a194c511a0b990a Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:27:29 -0700 Subject: [PATCH 236/240] supress too-many-positional-args pylint warn (#3224) (#3225) (cherry picked from commit 4dcf95c07a4b84d3a48f68ae41030b2489cb1ea0) --- .github/workflows/ci_pr.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci_pr.yml b/.github/workflows/ci_pr.yml index 96db6e7f9e..9ae44ec8cb 100644 --- a/.github/workflows/ci_pr.yml +++ b/.github/workflows/ci_pr.yml @@ -148,9 +148,14 @@ jobs: # * 'contextmanager-generator-missing-cleanup' are false positives if yield is used inside an if-else block for contextmanager generator functions. # (https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/contextmanager-generator-missing-cleanup.html). # This is not implemented on versions (3.0-3.7) Bad option value 'contextmanager-generator-missing-cleanup' (bad-option-value) + # * 3.9-3.11 will produce "too-many-positional-arguments" for several methods that are having more than 5 args, so we suppress that warning. + # (R0917: Too many positional arguments (8/5) (too-many-positional-arguments)) PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0" if [[ "${{ matrix.python-version }}" == "3.9" ]]; then - PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member --ignore=main.py" + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member,too-many-positional-arguments --ignore=main.py" + fi + if [[ "${{ matrix.python-version }}" =~ ^3\.(10|11)$ ]]; then + PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=too-many-positional-arguments" fi if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use,bad-option-value" From f09ccd165d3fc237932f4370a5ad4b911dc45c0e Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Fri, 20 Sep 2024 14:35:11 -0700 Subject: [PATCH 237/240] move setupslice after cgroupsv2 check, remove unit file for log collector and remove fiirewall daemon-reload (#3223) (#3226) * move daemon reload * test fix * UT test * firewall daemon-reload * address comments * address comments (cherry picked from commit 47e969a87b7c740e2e4dd057608bf2dd2e77dacd) --- azurelinuxagent/ga/cgroupconfigurator.py | 30 ++++++-------------- azurelinuxagent/ga/persist_firewall_rules.py | 10 +------ tests/ga/test_cgroupconfigurator.py | 18 ++++-------- tests/ga/test_persist_firewall_rules.py | 11 ------- tests/lib/mock_command.py | 10 +++++-- tests/lib/mock_environment.py | 5 ++++ 6 files changed, 29 insertions(+), 55 deletions(-) diff --git a/azurelinuxagent/ga/cgroupconfigurator.py b/azurelinuxagent/ga/cgroupconfigurator.py index efb00d4810..22634bb64c 100644 --- a/azurelinuxagent/ga/cgroupconfigurator.py +++ b/azurelinuxagent/ga/cgroupconfigurator.py @@ -68,16 +68,6 @@ LOGCOLLECTOR_SLICE = "azure-walinuxagent-logcollector.slice" # More info on resource limits properties in systemd here: # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/resource_management_guide/sec-modifying_control_groups -_LOGCOLLECTOR_SLICE_CONTENTS_FMT = """ -[Unit] -Description=Slice for Azure VM Agent Periodic Log Collector -DefaultDependencies=no -Before=slices.target -[Slice] -CPUAccounting=yes -CPUQuota={cpu_quota} -MemoryAccounting=yes -""" LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2 = "5%" LOGCOLLECTOR_MEMORY_THROTTLE_LIMIT_FOR_V2 = "170M" LOGCOLLECTOR_MAX_THROTTLED_EVENTS_FOR_V2 = 10 @@ -181,6 +171,11 @@ def initialize(self): log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) return + # TODO: Move this and systemd system check to cgroups_supported logic above + if self.using_cgroup_v2(): + log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") + return + # We check the agent unit 'Slice' property before setting up azure.slice. This check is done first # because the agent's Slice unit property will be 'azure.slice' if the slice drop-in file exists, even # though systemd has not moved the agent to azure.slice yet. Systemd will only move the agent to @@ -192,18 +187,12 @@ def initialize(self): return # Notes about slice setup: - # 1. On first agent update (for machines where daemon version did not already create azure.slice), the + # On first agent update (for machines where daemon version did not already create azure.slice), the # agent creates azure.slice and the agent unit Slice drop-in file, but systemd does not move the agent # unit to azure.slice until service restart. It is ok to enable cgroup usage in this case if agent is # running in system.slice. - # 2. We setup the slices before v2 check. Cgroup v2 usage is disabled for agent and extensions, but - # can be enabled for log collector in waagent.conf. The log collector slice should be created in case - # v2 usage is enabled for log collector. - self.__setup_azure_slice() - if self.using_cgroup_v2(): - log_cgroup_info("Agent and extensions resource monitoring is not currently supported on cgroup v2") - return + self.__setup_azure_slice() # Log mount points/root paths for cgroup controllers self._cgroups_api.log_root_paths() @@ -295,9 +284,8 @@ def __setup_azure_slice(): if not os.path.exists(vmextensions_slice): files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS)) - # Update log collector slice contents - slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=LOGCOLLECTOR_CPU_QUOTA_FOR_V1_AND_V2) - files_to_create.append((logcollector_slice, slice_contents)) + # New agent will setup limits for scope instead slice, so removing existing logcollector slice. + CGroupConfigurator._Impl.__cleanup_unit_file(logcollector_slice) if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None: CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice) diff --git a/azurelinuxagent/ga/persist_firewall_rules.py b/azurelinuxagent/ga/persist_firewall_rules.py index a20e2874aa..e7c8373ecb 100644 --- a/azurelinuxagent/ga/persist_firewall_rules.py +++ b/azurelinuxagent/ga/persist_firewall_rules.py @@ -199,8 +199,7 @@ def _setup_network_setup_service(self): # Create unit file with default values self.__set_service_unit_file() - # Reload systemd configurations when we setup the service for the first time to avoid systemctl warnings - self.__reload_systemd_conf() + # After modifying the service, systemctl may issue a warning when checking the service, and daemon-reload should not be used to clear the warning, since it can affect other services logger.info("Successfully added and enabled the {0}".format(self._network_setup_service_name)) def __setup_binary_file(self): @@ -297,13 +296,6 @@ def __log_network_setup_service_logs(self): message=msg, log_event=False) - def __reload_systemd_conf(self): - try: - logger.info("Executing systemctl daemon-reload for setting up {0}".format(self._network_setup_service_name)) - shellutil.run_command(["systemctl", "daemon-reload"]) - except Exception as exception: - logger.warn("Unable to reload systemctl configurations: {0}".format(ustr(exception))) - def __get_unit_file_version(self): if not os.path.exists(self.get_service_file_path()): raise OSError("{0} not found".format(self.get_service_file_path())) diff --git a/tests/ga/test_cgroupconfigurator.py b/tests/ga/test_cgroupconfigurator.py index 9af0d88d7e..1ea7d9325c 100644 --- a/tests/ga/test_cgroupconfigurator.py +++ b/tests/ga/test_cgroupconfigurator.py @@ -221,26 +221,20 @@ def test_initialize_should_create_unit_files_when_the_agent_service_file_is_not_ self.assertTrue(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} was not created".format(agent_drop_in_file_cpu_accounting)) self.assertTrue(os.path.exists(agent_drop_in_file_memory_accounting), "{0} was not created".format(agent_drop_in_file_memory_accounting)) - def test_initialize_should_update_logcollector_memorylimit(self): + def test_initialize_should_clear_logcollector_slice(self): with self._get_cgroup_configurator(initialize=False) as configurator: log_collector_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.logcollector) - original_memory_limit = "MemoryLimit=30M" - # The mock creates the slice unit file with memory limit + # The mock creates the slice unit file configurator.mocks.add_data_file(os.path.join(data_dir, 'init', "azure-walinuxagent-logcollector.slice"), UnitFilePaths.logcollector) - if not os.path.exists(log_collector_unit_file): - raise Exception("{0} should have been created during test setup".format(log_collector_unit_file)) - if not fileutil.findre_in_file(log_collector_unit_file, original_memory_limit): - raise Exception("MemoryLimit was not set correctly. Expected: {0}. Got:\n{1}".format( - original_memory_limit, fileutil.read_file(log_collector_unit_file))) + + self.assertTrue(os.path.exists(log_collector_unit_file), "{0} was not created".format(log_collector_unit_file)) configurator.initialize() - # initialize() should update the unit file to remove the memory limit - self.assertFalse(fileutil.findre_in_file(log_collector_unit_file, original_memory_limit), - "Log collector slice unit file was not updated correctly. Expected no memory limit. Got:\n{0}".format( - fileutil.read_file(log_collector_unit_file))) + # initialize() should remove the unit file + self.assertFalse(os.path.exists(log_collector_unit_file), "{0} should not have been created".format(log_collector_unit_file)) def test_setup_extension_slice_should_create_unit_files(self): with self._get_cgroup_configurator() as configurator: diff --git a/tests/ga/test_persist_firewall_rules.py b/tests/ga/test_persist_firewall_rules.py index adcf43b752..7754f1efb2 100644 --- a/tests/ga/test_persist_firewall_rules.py +++ b/tests/ga/test_persist_firewall_rules.py @@ -127,13 +127,6 @@ def __assert_systemctl_called(self, cmd="enable", validate_command_called=True): else: self.assertNotIn(systemctl_command, self.__executed_commands, "Systemctl command {0} found".format(cmd)) - def __assert_systemctl_reloaded(self, validate_command_called=True): - systemctl_reload = ["systemctl", "daemon-reload"] - if validate_command_called: - self.assertIn(systemctl_reload, self.__executed_commands, "Systemctl config not reloaded") - else: - self.assertNotIn(systemctl_reload, self.__executed_commands, "Systemctl config reloaded") - def __assert_firewall_cmd_running_called(self, validate_command_called=True): cmd = PersistFirewallRulesHandler._FIREWALLD_RUNNING_CMD if validate_command_called: @@ -144,7 +137,6 @@ def __assert_firewall_cmd_running_called(self, validate_command_called=True): def __assert_network_service_setup_properly(self): self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=True) - self.__assert_systemctl_reloaded() self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=False) self.assertTrue(os.path.exists(self._network_service_unit_file), "Service unit file should be there") self.assertTrue(os.path.exists(self._binary_file), "Binary file should be there") @@ -200,7 +192,6 @@ def __setup_and_assert_network_service_setup_scenario(self, handler, mock_popen= self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=True) - self.__assert_systemctl_reloaded(validate_command_called=True) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) @@ -234,7 +225,6 @@ def test_it_should_skip_setup_if_agent_network_setup_service_already_enabled_and self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=True) self.__assert_systemctl_called(cmd="enable", validate_command_called=False) - self.__assert_systemctl_reloaded(validate_command_called=False) self.__assert_firewall_cmd_running_called(validate_command_called=True) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.QueryPassThrough, validate_command_called=False) self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.RemovePassThrough, validate_command_called=False) @@ -396,7 +386,6 @@ def test_it_should_delete_custom_service_files_if_firewalld_enabled(self): self.__assert_firewall_called(cmd=FirewallCmdDirectCommands.PassThrough, validate_command_called=True) self.__assert_systemctl_called(cmd="is-enabled", validate_command_called=False) self.__assert_systemctl_called(cmd="enable", validate_command_called=False) - self.__assert_systemctl_reloaded(validate_command_called=False) self.assertFalse(os.path.exists(handler.get_service_file_path()), "Service unit file found") self.assertFalse(os.path.exists(os.path.join(conf.get_lib_dir(), handler.BINARY_FILE_NAME)), "Binary file found") diff --git a/tests/lib/mock_command.py b/tests/lib/mock_command.py index e181d26d97..83509c3d37 100755 --- a/tests/lib/mock_command.py +++ b/tests/lib/mock_command.py @@ -2,12 +2,18 @@ import os import sys -if len(sys.argv) != 4: +if len(sys.argv) < 4: sys.stderr.write("usage: {0} ".format(os.path.basename(__file__))) # W0632: Possible unbalanced tuple unpacking with sequence: left side has 3 label(s), right side has 0 value(s) (unbalanced-tuple-unpacking) # Disabled: Unpacking is balanced: there is a check for the length on line 5 -stdout, return_value, stderr = sys.argv[1:] # pylint: disable=W0632 + +# This script will be used for mocking cgroups commands in test, when popen called this script will be executed instead of actual commands +# We pass stdout, return_value, stderr of the mocked command output as arguments to this script and this script will print them to stdout, stderr and exit with the return value +# So that popen gets the output of the mocked command. Ideally we should get 4 arguments in sys.argv, first one is the script name, next 3 are the actual command output +# But somehow when we run the tests from pycharm, it adds extra arguments next to the script name, so we need to handle that when reading the arguments +# ex: /home/nag/Documents/repos/WALinuxAgent/tests/lib/mock_command.py /snap/pycharm-professional/412/plugins/python-ce/helpers/py... +BLKID +ELFUTILS +KMOD -IDN2 +IDN -PCRE2 default-hierarchy=hybrid\n 0 +stdout, return_value, stderr = sys.argv[-3:] # pylint: disable=W0632 if stdout != '': sys.stdout.write(stdout) diff --git a/tests/lib/mock_environment.py b/tests/lib/mock_environment.py index 8f5682cf8e..5b72093584 100644 --- a/tests/lib/mock_environment.py +++ b/tests/lib/mock_environment.py @@ -76,12 +76,14 @@ def __init__(self, tmp_dir, commands=None, paths=None, files=None, data_files=No self._original_popen = subprocess.Popen self._original_mkdir = fileutil.mkdir self._original_path_exists = os.path.exists + self._original_os_remove = os.remove self._original_open = open self.patchers = [ patch_builtin("open", side_effect=self._mock_open), patch("subprocess.Popen", side_effect=self._mock_popen), patch("os.path.exists", side_effect=self._mock_path_exists), + patch("os.remove", side_effect=self._mock_os_remove), patch("azurelinuxagent.common.utils.fileutil.mkdir", side_effect=self._mock_mkdir) ] @@ -166,3 +168,6 @@ def _mock_open(self, path, *args, **kwargs): def _mock_path_exists(self, path): return self._original_path_exists(self.get_mapped_path(path)) + def _mock_os_remove(self, path): + return self._original_os_remove(self.get_mapped_path(path)) + From c1bc00481f0cdda8414294b4769e9de8680f693a Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 23 Sep 2024 12:46:22 -0700 Subject: [PATCH 238/240] Ubuntu 24 image (#25) (#3229) (#3230) * Update ubuntu 24 * Add ubuntu 24 to nat clouds * Add arm64 ubuntu 24 * Update all ubuntu images * Skip arm64 in nat clouds * Fix syntax issues (cherry picked from commit 31adf25ca97abd8c7a83d71c2a3a5d20f32c82fe) --- tests_e2e/test_suites/images.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests_e2e/test_suites/images.yml b/tests_e2e/test_suites/images.yml index d7d578cbaf..0935bffd7e 100644 --- a/tests_e2e/test_suites/images.yml +++ b/tests_e2e/test_suites/images.yml @@ -40,6 +40,7 @@ image-sets: - "azure-linux_3_arm64" - "rhel_90_arm64" - "ubuntu_2204_arm64" + - "ubuntu_2404_arm64" # As of today agent only support and enabled resource governance feature on following distros cgroups-endorsed: @@ -67,6 +68,7 @@ image-sets: - "ubuntu_2204_arm64" - "ubuntu_2404" - "ubuntu_2404_minimal" + - "ubuntu_2404_arm64" # # An image can be specified by a string giving its urn, as in @@ -205,9 +207,9 @@ images: AzureChinaCloud: [] AzureUSGovernment: [] ubuntu_2204_minimal: "Canonical 0001-com-ubuntu-minimal-jammy minimal-22_04-lts-gen2 latest" - ubuntu_2404: - # TODO: Currently using the daily build, update to the release build once it is available - urn: "Canonical 0001-com-ubuntu-server-noble-daily 24_04-daily-lts-gen2 latest" + ubuntu_2404: "Canonical ubuntu-24_04-lts server latest" + ubuntu_2404_arm64: + urn: "Canonical ubuntu-24_04-lts server-arm64 latest" locations: AzureChinaCloud: [] AzureUSGovernment: [] From 9d639a303fff8017dd571a41d2c8e44a32156a1a Mon Sep 17 00:00:00 2001 From: maddieford <93676569+maddieford@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:07:47 -0700 Subject: [PATCH 239/240] Add controller/cgroup path telemetry (#3231) --- azurelinuxagent/ga/cgroupapi.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/ga/cgroupapi.py b/azurelinuxagent/ga/cgroupapi.py index 72b41ec773..b030633f23 100644 --- a/azurelinuxagent/ga/cgroupapi.py +++ b/azurelinuxagent/ga/cgroupapi.py @@ -375,9 +375,9 @@ def log_root_paths(self): for controller in CgroupV1.get_supported_controller_names(): mount_point = self._cgroup_mountpoints.get(controller) if mount_point is None: - log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False) + log_cgroup_info("The {0} controller is not mounted".format(controller)) else: - log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point), send_event=False) + log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point)) def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): @@ -546,12 +546,12 @@ def get_process_cgroup(self, process_id, cgroup_name): return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root) def log_root_paths(self): - log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False) + log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path)) for controller in CgroupV2.get_supported_controller_names(): if controller in self._controllers_enabled_at_root: - log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False) + log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller)) else: - log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller), send_event=False) + log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller)) def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): @@ -630,17 +630,18 @@ def get_controllers(self, expected_relative_path=None): controller_mountpoint = self._controller_mountpoints.get(supported_controller_name) if controller_mountpoint is None: + # Do not send telemetry here. We already have telemetry for unmounted controllers in cgroup init log_cgroup_warning("{0} controller is not mounted; will not track".format(supported_controller_name), send_event=False) continue if controller_path is None: - log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track".format(supported_controller_name, self._cgroup_name), send_event=False) + log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track".format(supported_controller_name, self._cgroup_name)) continue if expected_relative_path is not None: expected_path = os.path.join(controller_mountpoint, expected_relative_path) if controller_path != expected_path: - log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track. Actual cgroup path:[{2}] Expected:[{3}]".format(supported_controller_name, self._cgroup_name, controller_path, expected_path), send_event=False) + log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track. Actual cgroup path:[{2}] Expected:[{3}]".format(supported_controller_name, self._cgroup_name, controller_path, expected_path)) continue if supported_controller_name == self.CPU_CONTROLLER: @@ -650,7 +651,7 @@ def get_controllers(self, expected_relative_path=None): if controller is not None: msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller) - log_cgroup_info(msg, send_event=False) + log_cgroup_info(msg) controllers.append(controller) return controllers @@ -705,13 +706,13 @@ def get_controllers(self, expected_relative_path=None): controller = None if supported_controller_name not in self._enabled_controllers: + # Do not send telemetry here. We already have telemetry for disabled controllers in cgroup init log_cgroup_warning("{0} controller is not enabled; will not track".format(supported_controller_name), send_event=False) continue if self._cgroup_path == "": - log_cgroup_warning("Cgroup path for {0} cannot be determined; will not track".format(self._cgroup_name), - send_event=False) + log_cgroup_warning("Cgroup path for {0} cannot be determined; will not track".format(self._cgroup_name)) continue if expected_relative_path is not None: @@ -719,7 +720,7 @@ def get_controllers(self, expected_relative_path=None): if self._cgroup_path != expected_path: log_cgroup_warning( "The {0} cgroup is not mounted at the expected path; will not track. Actual cgroup path:[{1}] Expected:[{2}]".format( - self._cgroup_name, self._cgroup_path, expected_path), send_event=False) + self._cgroup_name, self._cgroup_path, expected_path)) continue if supported_controller_name == self.CPU_CONTROLLER: @@ -729,7 +730,7 @@ def get_controllers(self, expected_relative_path=None): if controller is not None: msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller) - log_cgroup_info(msg, send_event=False) + log_cgroup_info(msg) controllers.append(controller) return controllers From f9ce539ab3fc02eb8c732067023a08d48975456b Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:59:35 -0700 Subject: [PATCH 240/240] version update to 2.12.0.2 (#3233) --- azurelinuxagent/common/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/version.py b/azurelinuxagent/common/version.py index f62a3791a9..9700236c0d 100644 --- a/azurelinuxagent/common/version.py +++ b/azurelinuxagent/common/version.py @@ -209,7 +209,7 @@ def has_logrotate(): # # When doing a release, be sure to use the actual agent version. Current agent version: 2.4.0.0 # -AGENT_VERSION = '2.12.0.1' +AGENT_VERSION = '2.12.0.2' AGENT_LONG_VERSION = "{0}-{1}".format(AGENT_NAME, AGENT_VERSION) AGENT_DESCRIPTION = """ The Azure Linux Agent supports the provisioning and running of Linux