Skip to content

Commit

Permalink
Add docstring to create_template() function and make it faster (#90)
Browse files Browse the repository at this point in the history
* Update create_template with documentation and change the function to use list comprehension to make it faster

* Add unit tests for the create_template function

* Add slightly faster improvement as local variable is faster than instance varable access

* Update docstring and move length assertion to top of function.

removed the param_str variable as it acts like a penalty to function executions where the param_str is not accessed at all.

Signed-off-by: Nikolai Kummer <nikolai.kummer@gmail.com>

---------

Signed-off-by: Nikolai Kummer <nikolai.kummer@gmail.com>
Co-authored-by: Superskyyy <Superskyyy@outlook.com>
  • Loading branch information
nikolai-kummer and Superskyyy authored Apr 24, 2024
1 parent c7496f6 commit 9c50c5f
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
16 changes: 9 additions & 7 deletions drain3/drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,16 @@ def get_seq_distance(self, seq1: Sequence[str], seq2: Sequence[str], include_par
return ret_val, param_count

def create_template(self, seq1: Sequence[str], seq2: Sequence[str]) -> Sequence[str]:
"""
Loop through two sequences and create a template sequence that
replaces unmatched tokens with the parameter string.
:param seq1: first sequence
:param seq2: second sequence
:return: template sequence with param_str in place of unmatched tokens
"""
assert len(seq1) == len(seq2)
ret_val = list(seq2)

for i, (token1, token2) in enumerate(zip(seq1, seq2)):
if token1 != token2:
ret_val[i] = self.param_str

return ret_val
return [token2 if token1 == token2 else self.param_str for token1, token2 in zip(seq1, seq2)]

def match(self, content: str, full_search_strategy: str = "never") -> Optional[LogCluster]:
"""
Expand Down
15 changes: 15 additions & 0 deletions tests/test_drain.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,18 @@ def test_match_only(self):
c: LogCluster = model.match("nothing")
self.assertIsNone(c)

def test_create_template(self):
model = Drain(param_str="*")

seq1 = ["aa", "bb", "dd"]
seq2 = ["aa", "bb", "cc"]

# test for proper functionality
template = model.create_template(seq1, seq2)
self.assertListEqual(["aa", "bb", "*"], template)

template = model.create_template(seq1, seq1)
self.assertListEqual(seq1, template)

# Test for equal lengths input vectors
self.assertRaises(AssertionError, model.create_template, seq1, ["aa"])

0 comments on commit 9c50c5f

Please sign in to comment.