Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into imp-event
Browse files Browse the repository at this point in the history
  • Loading branch information
nagworld9 committed Sep 23, 2024
2 parents efd2fb4 + 69c7bfb commit 9400195
Show file tree
Hide file tree
Showing 172 changed files with 4,898 additions and 2,251 deletions.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ This will expedite the process of getting your pull request merged and avoid ext
---

### PR information
- [ ] Ensure development PR is based on the `develop` branch.
- [ ] The title of the PR is clear and informative.
- [ ] There are a small number of commits, each of which has an informative message. This means that previously merged commits do not appear in the history of the PR. For information on cleaning up the commits in your pull request, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).
- [ ] If applicable, the PR references the bug/issue that it fixes in the description.
Expand Down
15 changes: 13 additions & 2 deletions .github/workflows/ci_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ jobs:

env:
NOSEOPTS: "--verbose"

ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

steps:
- uses: actions/checkout@v3

Expand Down Expand Up @@ -87,6 +88,9 @@ jobs:
matrix:
include:
- python-version: "3.5"
# workaround found in https://github.com/actions/setup-python/issues/866
# for issue "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:728)" on Python 3.5
pip_trusted_host: "pypi.python.org pypi.org files.pythonhosted.org"
- python-version: "3.6"
- python-version: "3.7"
- python-version: "3.8"
Expand All @@ -110,6 +114,8 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
env:
PIP_TRUSTED_HOST: ${{ matrix.pip_trusted_host }}

- name: Install dependencies
id: install-dependencies
Expand Down Expand Up @@ -142,9 +148,14 @@ jobs:
# * 'contextmanager-generator-missing-cleanup' are false positives if yield is used inside an if-else block for contextmanager generator functions.
# (https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/contextmanager-generator-missing-cleanup.html).
# This is not implemented on versions (3.0-3.7) Bad option value 'contextmanager-generator-missing-cleanup' (bad-option-value)
# * 3.9-3.11 will produce "too-many-positional-arguments" for several methods that are having more than 5 args, so we suppress that warning.
# (R0917: Too many positional arguments (8/5) (too-many-positional-arguments))
PYLINT_OPTIONS="--rcfile=ci/pylintrc --jobs=0"
if [[ "${{ matrix.python-version }}" == "3.9" ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member --ignore=main.py"
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-member,too-many-positional-arguments --ignore=main.py"
fi
if [[ "${{ matrix.python-version }}" =~ ^3\.(10|11)$ ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=too-many-positional-arguments"
fi
if [[ "${{ matrix.python-version }}" =~ ^3\.[0-7]$ ]]; then
PYLINT_OPTIONS="$PYLINT_OPTIONS --disable=no-self-use,bad-option-value"
Expand Down
63 changes: 33 additions & 30 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from __future__ import print_function

import json
import os
import re
import subprocess
Expand All @@ -31,7 +32,8 @@

from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR
from azurelinuxagent.ga.cpucontroller import _CpuController
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException

import azurelinuxagent.common.conf as conf
Expand Down Expand Up @@ -208,8 +210,7 @@ def collect_logs(self, is_full_mode):

# Check the cgroups unit
log_collector_monitor = None
cpu_cgroup_path = None
memory_cgroup_path = None
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
Expand All @@ -220,44 +221,46 @@ def collect_logs(self, is_full_mode):
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self")
cpu_slice_matches = False
memory_slice_matches = False
if cpu_cgroup_path is not None:
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
if memory_cgroup_path is not None:
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)

if not cpu_slice_matches or not memory_slice_matches:
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
if not cpu_slice_matches:
log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False)
if not memory_slice_matches:
log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False)
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()

if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controller_names()))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
for controller in tracked_controllers:
if isinstance(controller, _CpuController):
controller.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_controllers)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()

archive, total_uncompressed_size = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "
"and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH))

if log_collector_monitor is not None:
log_collector_monitor.stop()
try:
metrics_summary = log_collector_monitor.get_max_recorded_metrics()
metrics_summary['Total Uncompressed File Size (B)'] = total_uncompressed_size
msg = json.dumps(metrics_summary)
logger.info(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, message=msg, log_event=False)
except Exception as e:
msg = "An error occurred while reporting log collector resource usage summary: {0}".format(ustr(e))
logger.warn(msg)
event.add_event(op=event.WALAEventOperation.LogCollection, is_success=False, message=msg, log_event=False)

except Exception as e:
logger.error("Log collection completed unsuccessfully. Error: {0}".format(ustr(e)))
logger.info("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH))
Expand Down
5 changes: 3 additions & 2 deletions azurelinuxagent/common/agent_supported_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,15 @@ def __init__(self):
class _GAVersioningGovernanceFeature(AgentSupportedFeature):
"""
CRP would drive the RSM update if agent reports that it does support RSM upgrades with this flag otherwise CRP fallback to largest version.
Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning.
Agent doesn't report supported feature flag if auto update is disabled or old version of agent running that doesn't understand GA versioning
or if explicitly support for versioning is disabled in agent
Note: Especially Windows need this flag to report to CRP that GA doesn't support the updates. So linux adopted same flag to have a common solution.
"""

__NAME = SupportedFeatureNames.GAVersioningGovernance
__VERSION = "1.0"
__SUPPORTED = conf.get_auto_update_to_latest_version()
__SUPPORTED = conf.get_auto_update_to_latest_version() and conf.get_enable_ga_versioning()

def __init__(self):
super(_GAVersioningGovernanceFeature, self).__init__(name=self.__NAME,
Expand Down
55 changes: 31 additions & 24 deletions azurelinuxagent/common/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"Debug.CgroupDisableOnQuotaCheckFailure": True,
"Debug.EnableAgentMemoryUsageCheck": False,
"Debug.EnableFastTrack": True,
"Debug.EnableGAVersioning": True
"Debug.EnableGAVersioning": True,
"Debug.EnableCgroupV2ResourceLimiting": False,
"Debug.EnableExtensionPolicy": False
}


Expand All @@ -168,9 +170,7 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"ResourceDisk.MountPoint": "/mnt/resource",
"ResourceDisk.MountOptions": None,
"ResourceDisk.Filesystem": "ext3",
"AutoUpdate.GAFamily": "Prod",
"Debug.CgroupMonitorExpiryTime": "2022-03-31",
"Debug.CgroupMonitorExtensionName": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
"AutoUpdate.GAFamily": "Prod"
}


Expand Down Expand Up @@ -200,7 +200,8 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"Debug.EtpCollectionPeriod": 300,
"Debug.AutoUpdateHotfixFrequency": 14400,
"Debug.AutoUpdateNormalFrequency": 86400,
"Debug.FirewallRulesLogPeriod": 86400
"Debug.FirewallRulesLogPeriod": 86400,
"Debug.LogCollectorInitialDelay": 5 * 60
}


Expand Down Expand Up @@ -613,25 +614,6 @@ def get_enable_agent_memory_usage_check(conf=__conf__):
"""
return conf.get_switch("Debug.EnableAgentMemoryUsageCheck", False)


def get_cgroup_monitor_expiry_time(conf=__conf__):
"""
cgroups monitoring for pilot extensions disabled after expiry time
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31")


def get_cgroup_monitor_extension_name (conf=__conf__):
"""
cgroups monitoring extension name
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExtensionName", "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent")


def get_enable_fast_track(conf=__conf__):
"""
If True, the agent use FastTrack when retrieving goal states
Expand Down Expand Up @@ -680,3 +662,28 @@ def get_firewall_rules_log_period(conf=__conf__):
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.FirewallRulesLogPeriod", 86400)


def get_extension_policy_enabled(conf=__conf__):
"""
Determine whether extension policy is enabled. If true, policy will be enforced before installing any extensions.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableExtensionPolicy", False)


def get_enable_cgroup_v2_resource_limiting(conf=__conf__):
"""
If True, the agent will enable resource monitoring and enforcement for the log collector on machines using cgroup v2.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_switch("Debug.EnableCgroupV2ResourceLimiting", False)


def get_log_collector_initial_delay(conf=__conf__):
"""
Determine the initial delay at service start before the first periodic log collection.
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get_int("Debug.LogCollectorInitialDelay", 5 * 60)
5 changes: 4 additions & 1 deletion azurelinuxagent/common/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class WALAEventOperation:
Downgrade = "Downgrade"
Download = "Download"
Enable = "Enable"
ExtensionHandlerManifest = "ExtensionHandlerManifest"
ExtensionPolicy = "ExtensionPolicy"
ExtensionProcessing = "ExtensionProcessing"
ExtensionTelemetryEventProcessing = "ExtensionTelemetryEventProcessing"
FetchGoalState = "FetchGoalState"
Expand All @@ -111,6 +113,7 @@ class WALAEventOperation:
OpenSsl = "OpenSsl"
Partition = "Partition"
PersistFirewallRules = "PersistFirewallRules"
Policy = "Policy"
ProvisionAfterExtensions = "ProvisionAfterExtensions"
PluginSettingsVersionMismatch = "PluginSettingsVersionMismatch"
InvalidExtensionConfig = "InvalidExtensionConfig"
Expand Down Expand Up @@ -433,7 +436,7 @@ def initialize_vminfo_common_parameters(self, protocol):
logger.warn("Failed to get VM info from goal state; will be missing from telemetry: {0}", ustr(e))

try:
imds_client = get_imds_client(protocol.get_endpoint())
imds_client = get_imds_client()
imds_info = imds_client.get_compute()
parameters[CommonTelemetryEventSchema.Location].value = imds_info.location
parameters[CommonTelemetryEventSchema.SubscriptionId].value = imds_info.subscriptionId
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/common/osutil/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _get_osutil(distro_name, distro_code_name, distro_version, distro_full_name)
if distro_name == "iosxe":
return IosxeOSUtil()

if distro_name == "mariner":
if distro_name in ["mariner", "azurelinux"]:
return MarinerOSUtil()

if distro_name == "nsbsd":
Expand Down
Loading

0 comments on commit 9400195

Please sign in to comment.