Skip to content

Commit

Permalink
fs: Fix splitPath in DeepFS
Browse files Browse the repository at this point in the history
  • Loading branch information
mholt committed Dec 3, 2024
1 parent ff6062f commit ac0b248
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 15 deletions.
66 changes: 51 additions & 15 deletions fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ func (fsys *DeepFS) ReadDir(name string) ([]fs.DirEntry, error) {
// make sure entries that appear to be archive files indicate they are a directory
// so the fs package will try to walk them
for i, entry := range entries {
if slices.Contains(archiveExtensions, path.Ext(entry.Name())) {
if slices.Contains(archiveExtensions, strings.ToLower(path.Ext(entry.Name()))) {
entries[i] = alwaysDirEntry{entry}
}
}
Expand Down Expand Up @@ -811,28 +811,64 @@ func (fsys *DeepFS) getInnerFsys(realPath string) fs.FS {
}

// splitPath splits a file path into the "real" path and the "inner" path components,
// where the split point is the extension of an archive filetype like ".zip" or ".tar.gz".
// where the split point is the first extension of an archive filetype like ".zip" or
// ".tar.gz" that occurs in the path.
//
// The real path is the path that can be accessed on disk and will be returned with
// filepath separators. The inner path is the path that can be used within the archive.
// platform filepath separators. The inner path is the io/fs-compatible path that can
// be used within the archive.
//
// If no archive extension is found in the path, only the realPath is returned.
// If the input path is precisely an archive file (i.e. ends with an archive file
// extension), then innerPath is returned as "." which indicates the root of the archive.
func (*DeepFS) splitPath(path string) (realPath, innerPath string) {
for _, ext := range archiveExtensions {
idx := strings.Index(path+"/", ext+"/")
if idx < 0 {
continue
if len(path) < 2 {
realPath = path
return
}

// slightly more LoC, but more efficient, than exploding the path on every slash,
// is segmenting the path by using indices and looking at slices of the same
// string on every iteration; this avoids many allocations which can be valuable
// since this can be a hot path

// start at 1 instead of 0 because we know if the first slash is at 0, the part will be empty
start, end := 1, strings.Index(path[1:], "/")+1
if end-start <= 0 {
end = len(path)
}

for {
part := strings.TrimRight(strings.ToLower(path[start:end]), " ")

for _, ext := range archiveExtensions {
if strings.HasSuffix(part, ext) {
// we've found an archive extension, so the path until the end of this segment is
// the "real" OS path, and what remains (if anything( is the path within the archive
realPath = filepath.Clean(filepath.FromSlash(path[:end]))
if end < len(path) {
innerPath = path[end+1:]
} else {
// signal to the caller that this is an archive,
// even though it is the very root of the archive
innerPath = "."
}
return
}
}
splitPos := idx + len(ext)
realPath = filepath.Clean(filepath.FromSlash(path[:splitPos]))
innerPath = strings.TrimPrefix(path[splitPos:], "/")
if innerPath == "" {
// signal to the caller that this is an archive,
// even though it is the very root of the archive
innerPath = "."

// advance to the next segment, or end of string
start = end + 1
if start > len(path) {
break
}
end = strings.Index(path[start:], "/") + start
if end-start <= 0 {
end = len(path)
}
return
}

// no archive extension found, so entire path is real path
realPath = filepath.Clean(filepath.FromSlash(path))
return
}
Expand Down
61 changes: 61 additions & 0 deletions fs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,67 @@ func TestPathWithoutTopDir(t *testing.T) {
}
}

func TestSplitPath(t *testing.T) {
d := DeepFS{}
for i, testCase := range []struct {
input, expectedReal, expectedInner string
}{
{
input: "/",
expectedReal: "/",
expectedInner: "",
},
{
input: "/foo",
expectedReal: "/foo",
expectedInner: "",
},
{
input: "/foo/bar",
expectedReal: "/foo/bar",
expectedInner: "",
},
{
input: "/foo.zip",
expectedReal: "/foo.zip",
expectedInner: ".",
},
{
input: "/foo.zip/a",
expectedReal: "/foo.zip",
expectedInner: "a",
},
{
input: "/foo.zip/a/b",
expectedReal: "/foo.zip",
expectedInner: "a/b",
},
{
input: "/a/b/foobar.zip/c",
expectedReal: "/a/b/foobar.zip",
expectedInner: "c",
},
{
input: "/a/foo.zip/b/test.tar",
expectedReal: "/a/foo.zip",
expectedInner: "b/test.tar",
},
{
input: "/a/foo.zip/b/test.tar/c",
expectedReal: "/a/foo.zip",
expectedInner: "b/test.tar/c",
},
} {
actualReal, actualInner := d.splitPath(testCase.input)
if actualReal != testCase.expectedReal {
t.Errorf("Test %d (input=%q): expected real path %q but got %q", i, testCase.input, testCase.expectedReal, actualReal)
}
if actualInner != testCase.expectedInner {
t.Errorf("Test %d (input=%q): expected inner path %q but got %q", i, testCase.input, testCase.expectedInner, actualInner)
}
}
}

var (
//go:embed testdata/test.zip
testZIP []byte
Expand Down

0 comments on commit ac0b248

Please sign in to comment.