Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update logcollector telemetry with common properties #3242

Merged
merged 3 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR
from azurelinuxagent.ga.cpucontroller import _CpuController
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.firewall_manager import FirewallManager

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.event as event
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.event import WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH
from azurelinuxagent.common.osutil import get_osutil
Expand Down Expand Up @@ -208,28 +209,31 @@ def collect_logs(self, is_full_mode):
else:
logger.info("Running log collector mode normal")

LogCollector.initialize_telemetry()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously this was happening in LogCollector init.
We don't initialize the LogCollector until after cgroup checks, which resulted in all cgroup check events having initialized common properties (see PR description)


# Check the cgroups unit
log_collector_monitor = None
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
except InvalidCgroupMountpointException as e:
log_cgroup_warning("The agent does not support cgroups if the default systemd mountpoint is not being used: {0}".format(ustr(e)), send_event=True)
event.warn(WALAEventOperation.LogCollection, "The agent does not support cgroups if the default systemd mountpoint is not being used: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
except CGroupsException as e:
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
event.warn(WALAEventOperation.LogCollection, "Unable to determine which cgroup version to use: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()

if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controller_names()))
event.warn(WALAEventOperation.LogCollection, "At least one required controller is missing. The following controllers are required for the log collector to run: {0}", log_collector_cgroup.get_supported_controller_names())
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
expected_slice = cgroupconfigurator.LOGCOLLECTOR_SLICE
if not log_collector_cgroup.check_in_expected_slice(expected_slice):
event.warn(WALAEventOperation.LogCollection, "The Log Collector process is not in the proper cgroups. Expected slice: {0}", expected_slice)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)

try:
Expand Down
9 changes: 5 additions & 4 deletions azurelinuxagent/ga/logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from heapq import heappush, heappop

from azurelinuxagent.common.conf import get_lib_dir, get_ext_log_dir, get_agent_log_file
from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters
from azurelinuxagent.common.event import initialize_event_logger_vminfo_common_parameters, add_event, WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.logcollector_manifests import MANIFEST_NORMAL, MANIFEST_FULL

Expand Down Expand Up @@ -76,7 +76,6 @@ def __init__(self, is_full_mode=False):
self._must_collect_files = self._expand_must_collect_files()
self._create_base_dirs()
self._set_logger()
self._initialize_telemetry()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now done in the azurelinuxagent.agent.Agent.collect_logs


@staticmethod
def _mkdir(dirname):
Expand Down Expand Up @@ -104,7 +103,7 @@ def _set_logger():
_LOGGER.setLevel(logging.INFO)

@staticmethod
def _initialize_telemetry():
def initialize_telemetry():
protocol = get_protocol_util().get_protocol(init_goal_state=False)
protocol.client.reset_goal_state(goal_state_properties=GoalStateProperties.RoleConfig | GoalStateProperties.HostingEnv)
# Initialize the common parameters for telemetry events
Expand Down Expand Up @@ -326,7 +325,9 @@ def _get_final_list_for_archive(self, priority_file_queue):
if e.errno == 2: # [Errno 2] No such file or directory
_LOGGER.warning("File %s does not exist, skipping collection for this file", file_path)

_LOGGER.info("Uncompressed archive size is %s b", total_uncompressed_size)
msg = "Uncompressed archive size is {0} b".format(total_uncompressed_size)
_LOGGER.info(msg)
add_event(op=WALAEventOperation.LogCollection, message=msg)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this event before the process enters the main loop in an effort to collect this info for runs that fail due to mem limit exceeded


return final_files_to_collect, total_uncompressed_size

Expand Down
40 changes: 16 additions & 24 deletions tests/ga/test_logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,8 @@ def test_log_collector_parses_commands_in_manifest(self):
diskinfo,""".format(folder_to_list, file_to_collect)

with patch("azurelinuxagent.ga.logcollector.MANIFEST_NORMAL", manifest):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These unit tests mocked _initialize_telemetry because it was in LogCollector init. Now that it has been removed from init, the mock is not necessary

log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

with open(self.output_results_file_path, "r") as fh:
results = fh.readlines()
Expand Down Expand Up @@ -241,9 +240,8 @@ def test_log_collector_uses_full_manifest_when_full_mode_enabled(self):
""".format(file_to_collect)

with patch("azurelinuxagent.ga.logcollector.MANIFEST_FULL", manifest):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector(is_full_mode=True)
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector(is_full_mode=True)
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)
self._assert_files_are_in_archive(expected_files=[file_to_collect])
Expand All @@ -256,9 +254,8 @@ def test_log_collector_should_collect_all_files(self):
# All files in the manifest should be collected, since none of them are over the individual file size limit,
# and combined they do not cross the archive size threshold.

with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)

Expand All @@ -282,9 +279,8 @@ def test_log_collector_should_collect_all_files(self):
def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_files(self):
# Set the size limit so that some files are too large to collect in full.
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)

Expand Down Expand Up @@ -323,9 +319,8 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self

with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)

Expand Down Expand Up @@ -382,9 +377,8 @@ def test_log_collector_should_prioritize_important_files_if_archive_too_big(self
def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_deleted(self):
# Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it
# needs to be updated in the archive, deleted if removed from disk, and added if not previously seen.
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
first_archive, first_uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
first_archive, first_uncompressed_file_size = log_collector.collect_logs_and_get_archive()
self._assert_archive_created(first_archive)

# Everything should be in the archive
Expand Down Expand Up @@ -461,9 +455,8 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self):
with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
archive, uncompressed_file_size = log_collector.collect_logs_and_get_archive()

self._assert_archive_created(archive)

Expand All @@ -490,9 +483,8 @@ def test_log_collector_should_clean_up_uncollected_truncated_files(self):
with patch("azurelinuxagent.ga.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE):
with patch("azurelinuxagent.ga.logcollector._MUST_COLLECT_FILES", must_collect_files):
with patch("azurelinuxagent.ga.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE):
with patch('azurelinuxagent.ga.logcollector.LogCollector._initialize_telemetry'):
log_collector = LogCollector()
second_archive, second_uncompressed_file_size = log_collector.collect_logs_and_get_archive()
log_collector = LogCollector()
second_archive, second_uncompressed_file_size = log_collector.collect_logs_and_get_archive()

expected_files = [
os.path.join(self.root_collect_dir, "waagent.log"),
Expand Down
Loading