Skip to content

Commit

Permalink
implements globs for container matching
Browse files Browse the repository at this point in the history
  • Loading branch information
richardlehane committed May 27, 2023
1 parent 528698f commit 6c1f379
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 15 deletions.
82 changes: 68 additions & 14 deletions internal/containermatcher/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"encoding/binary"
"errors"
"fmt"
"path/filepath"
"strings"

"github.com/richardlehane/siegfried/internal/bytematcher"
"github.com/richardlehane/siegfried/internal/bytematcher/frames"
Expand Down Expand Up @@ -144,28 +146,42 @@ type ContainerMatcher struct {
ctype
startIndexes []int // added to hits - these place all container matches in a single slice
conType containerType
nameCTest map[string]*cTest
parts []int // corresponds with each signature: represents the number of CTests for each sig
nameCTest map[string]*cTest // map of literal paths to ctests
globs []string // corresponds with globCtests
globCtests []*cTest //
parts []int // corresponds with each signature: represents the number of CTests for each sig
priorities *priority.Set
extension string
entryBufs *siegreader.Buffers
}

func loadCM(ls *persist.LoadSaver) *ContainerMatcher {
return &ContainerMatcher{
ct := &ContainerMatcher{
startIndexes: ls.LoadInts(),
conType: containerType(ls.LoadTinyUInt()),
nameCTest: loadCTests(ls),
parts: ls.LoadInts(),
priorities: priority.Load(ls),
extension: ls.LoadString(),
globs: ls.LoadStrings(),
}
gcts := make([]*cTest, ls.LoadSmallInt())
for i := range gcts {
gcts[i] = loadCTest(ls)
}
ct.globCtests = gcts
ct.parts = ls.LoadInts()
ct.priorities = priority.Load(ls)
ct.extension = ls.LoadString()
return ct
}

func (c *ContainerMatcher) save(ls *persist.LoadSaver) {
ls.SaveInts(c.startIndexes)
ls.SaveTinyUInt(int(c.conType))
saveCTests(ls, c.nameCTest)
ls.SaveStrings(c.globs)
ls.SaveSmallInt(len(c.globCtests))
for _, v := range c.globCtests {
saveCTest(ls, v)
}
ls.SaveInts(c.parts)
c.priorities.Save(ls)
ls.SaveString(c.extension)
Expand All @@ -176,6 +192,7 @@ func (c *ContainerMatcher) String() string {
str += fmt.Sprintf("Type: %d\n", c.conType)
str += fmt.Sprintf("Priorities: %v\n", c.priorities)
str += fmt.Sprintf("Parts: %v\n", c.parts)
str += fmt.Sprintf("%d literal tests, %d glob tests\n", len(c.nameCTest), len(c.globCtests))
for k, v := range c.nameCTest {
str += "-----------\n"
str += fmt.Sprintf("Name: %v\n", k)
Expand All @@ -187,6 +204,17 @@ func (c *ContainerMatcher) String() string {
str += "Bytematcher:\n" + v.bm.String()
}
}
for i, v := range c.globs {
str += "-----------\n"
str += fmt.Sprintf("Glob: %v\n", v)
str += fmt.Sprintf("Satisfied: %v\n", c.globCtests[i].satisfied)
str += fmt.Sprintf("Unsatisfied: %v\n", c.globCtests[i].unsatisfied)
if c.globCtests[i].bm == nil {
str += "Bytematcher: None\n"
} else {
str += "Bytematcher:\n" + c.globCtests[i].bm.String()
}
}
return str
}

Expand Down Expand Up @@ -240,7 +268,25 @@ func (c *ContainerMatcher) addSignature(nameParts []string, sigParts []frames.Si
return errors.New("container matcher: nameParts and sigParts must be equal")
}
c.parts = append(c.parts, len(nameParts))
outer:
for i, nm := range nameParts {
if nm != "[Content_Types].xml" && strings.ContainsAny(nm, "*?[]") {
// is glob pattern is valid
if _, err := filepath.Match(nm, ""); err == nil {
// do we already have this glob?
for i, v := range c.globs {
if nm == v {
c.globCtests[i].add(sigParts[i], len(c.parts)-1)
continue outer
}
}
c.globs = append(c.globs, nm)
ct := &cTest{}
ct.add(sigParts[i], len(c.parts)-1)
c.globCtests = append(c.globCtests, ct)
continue
}
}
ct, ok := c.nameCTest[nm]
if !ok {
ct = &cTest{}
Expand All @@ -263,11 +309,7 @@ func loadCTests(ls *persist.LoadSaver) map[string]*cTest {
ret := make(map[string]*cTest)
l := ls.LoadSmallInt()
for i := 0; i < l; i++ {
ret[ls.LoadString()] = &cTest{
satisfied: ls.LoadInts(),
unsatisfied: ls.LoadInts(),
bm: bytematcher.Load(ls),
}
ret[ls.LoadString()] = loadCTest(ls)
}
return ret
}
Expand All @@ -276,12 +318,24 @@ func saveCTests(ls *persist.LoadSaver, ct map[string]*cTest) {
ls.SaveSmallInt(len(ct))
for k, v := range ct {
ls.SaveString(k)
ls.SaveInts(v.satisfied)
ls.SaveInts(v.unsatisfied)
bytematcher.Save(v.bm, ls)
saveCTest(ls, v)
}
}

func loadCTest(ls *persist.LoadSaver) *cTest {
return &cTest{
satisfied: ls.LoadInts(),
unsatisfied: ls.LoadInts(),
bm: bytematcher.Load(ls),
}
}

func saveCTest(ls *persist.LoadSaver, ct *cTest) {
ls.SaveInts(ct.satisfied)
ls.SaveInts(ct.unsatisfied)
bytematcher.Save(ct.bm, ls)
}

func (ct *cTest) add(s frames.Signature, t int) {
if s == nil {
ct.satisfied = append(ct.satisfied, t)
Expand Down
13 changes: 13 additions & 0 deletions internal/containermatcher/identify.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,22 @@ func (c *ContainerMatcher) identify(n string, rdr Reader, res chan core.Result,
}
id := c.newIdentifier(len(c.parts), hints...)
var err error
outer:
for err = rdr.Next(); err == nil; err = rdr.Next() {
ct, ok := c.nameCTest[rdr.Name()]
if !ok {
for i, glob := range c.globs {
if m, _ := filepath.Match(glob, rdr.Name()); m {
if config.Debug() {
fmt.Fprintf(config.Out(), "{Glob match (%s) - %s (container %d))}\n", glob, rdr.Name(), c.conType)
}
// process hits returns true if we can stop, otherwise possible other globs may match
// so we keep trying remaining globs
if c.processHits(c.globCtests[i].identify(c, id, rdr, rdr.Name()), id, c.globCtests[i], rdr.Name(), res) {
break outer
}
}
}
continue
}
if config.Debug() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/siegfried.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var siegfried = struct {
checkpoint int64
userAgent string
}{
version: [3]int{1, 10, 1},
version: [3]int{1, 11, 0},
signature: "default.sig",
conf: "sf.conf",
magic: []byte{'s', 'f', 0x00, 0xFF},
Expand Down

0 comments on commit 6c1f379

Please sign in to comment.