Skip to content

Commit

Permalink
feat: added CacheWholeIndex option
Browse files Browse the repository at this point in the history
doc: added some documentation
bug: fixed wrong behaviour in searchNode
  • Loading branch information
DeltaLaboratory committed Sep 30, 2023
1 parent fe7f7fd commit 1562734
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 46 deletions.
31 changes: 23 additions & 8 deletions internal/util/util.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
package util

import "bytes"
func compareByteSlices(slice1, slice2 []byte) int {
minLength := len(slice1)
if len(slice2) < minLength {
minLength = len(slice2)
}

for i := 0; i < minLength; i++ {
if slice1[i] < slice2[i] {
return -1
} else if slice1[i] > slice2[i] {
return 1
}
}

return 0
}

func SliceContains(S [][]byte, E []byte) (bool, int) {
for i := 0; i < len(S); i++ {
if t := bytes.Compare(S[i], E); t >= 0 {
if t == 0 {
return true, i
}
return false, i
cmpResult := -1
i := 0
for i = 0; i < len(S); i++ {
cmpResult = compareByteSlices(E, S[i])
if cmpResult <= 0 {
break
}
}
return false, 0
return cmpResult == 0, i
}

func IsLeaf(S []int) bool {
Expand Down
23 changes: 21 additions & 2 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,21 @@ import (
)

type Options struct {
Client *http.Client
Logger zerolog.Logger
Client *http.Client
Logger zerolog.Logger

// Client-specific options

// UpdateScriptInterval is an option to update the script every interval.
// if it is set to -1, it will never update the script.
UpdateScriptInterval time.Duration

// Search-specific options

// CacheWholeIndex is an option to download the whole index and cache it.
// it can be extremely slow the first time (especially for gallery index) and consume much memory space,
// but it will be a lot faster when you search.
CacheWholeIndex bool
}

func (o *Options) WithClient(c *http.Client) *Options {
Expand All @@ -29,10 +41,17 @@ func (o *Options) WithUpdateScriptInterval(t time.Duration) *Options {
return o
}

func (o *Options) WithCacheWholeIndex(b bool) *Options {
o.CacheWholeIndex = b
return o
}

func DefaultOptions() *Options {
return &Options{
Client: &http.Client{},
Logger: log.Logger.With().Str("caller", "github.com/EINNN7/hitomi").Logger(),
UpdateScriptInterval: -1,

CacheWholeIndex: false,
}
}
108 changes: 75 additions & 33 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ const MaxNodeSize = 464
type Search struct {
options *Options

logger zerolog.Logger
logger zerolog.Logger

indexVersion map[string]string
indexCache map[string][]byte
}

func NewSearch(options *Options) *Search {
return &Search{
options: options,
indexVersion: map[string]string{},
indexCache: map[string][]byte{},
}
}

Expand All @@ -46,12 +49,28 @@ func (s *Search) IndexVersion(name string) (string, error) {
return string(version), nil
}

func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) {
s.options.Logger.Debug().Msgf("retrieve node at %s:%d", field, address)
func (s *Search) TagSuggestion(query string) ([]string, error) {
field := strings.Split(query, ":")
if len(field) != 2 {
return nil, fmt.Errorf("invalid query: %s", query)
}
firstNode, err := s.nodeByAddress(field[0], 0)
if err != nil {
return nil, err
}
dataOffset, err := s.searchNode(field[0], HashTerm(field[1]), firstNode)
if err != nil {
return nil, fmt.Errorf("cannot find search result: %w", err)
}
return s.tagSuggestionData(field[0], dataOffset)
}

func (s *Search) nodeByAddress(field string, address int) (*node, error) {
var url string
switch field {
case "galleries":
if _, ok := s.indexVersion["galleriesindex"]; !ok {
s.options.Logger.Debug().Msg("galleriesindex version not found, fetch fresh one")
version, err := s.IndexVersion("galleriesindex")
if err != nil {
return nil, err
Expand All @@ -61,6 +80,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) {
url = fmt.Sprintf("https://ltn.hitomi.la/galleriesindex/galleries.%s.index", s.indexVersion["galleriesindex"])
case "languages":
if _, ok := s.indexVersion["languagesindex"]; !ok {
s.options.Logger.Debug().Msg("languagesindex version not found, fetch fresh one")
version, err := s.IndexVersion("languagesindex")
if err != nil {
return nil, err
Expand All @@ -70,6 +90,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) {
url = fmt.Sprintf("https://ltn.hitomi.la/languagesindex/languages.%s.index", s.indexVersion["languagesindex"])
case "nozomiurl":
if _, ok := s.indexVersion["nozomiurlindex"]; !ok {
s.options.Logger.Debug().Msg("nozomiurlindex version not found, fetch fresh one")
version, err := s.IndexVersion("nozomiurlindex")
if err != nil {
return nil, err
Expand All @@ -79,6 +100,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) {
url = fmt.Sprintf("https://ltn.hitomi.la/nozomiurlindex/nozomiurl.%s.index", s.indexVersion["nozomiurlindex"])
default:
if _, ok := s.indexVersion["tagindex"]; !ok {
s.options.Logger.Debug().Msg("tagindex version not found, fetch fresh one")
version, err := s.IndexVersion("tagindex")
if err != nil {
return nil, err
Expand All @@ -87,34 +109,57 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) {
}
url = fmt.Sprintf("https://ltn.hitomi.la/tagindex/%s.%s.index", field, s.indexVersion["tagindex"])
}
s.options.Logger.Debug().Msgf("calling %s", url)
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", address, address+MaxNodeSize-1))
resp, err := s.options.Client.Do(req)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
_ = Body.Close()
}(resp.Body)
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode)
}
content, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
if s.options.CacheWholeIndex {
if v, ok := s.indexCache[field]; ok {
return decodeNode(v[address : address+MaxNodeSize-1])
}
s.options.Logger.Debug().Msgf("indexCache for %s not found, fetch fresh one", field)
req, _ := http.NewRequest("GET", url, nil)
resp, err := s.options.Client.Do(req)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
_ = Body.Close()
}(resp.Body)
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode)
}
content, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
s.indexCache[field] = content
return decodeNode(content[address : address+MaxNodeSize-1])
} else {
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", address, address+MaxNodeSize-1))
resp, err := s.options.Client.Do(req)
if err != nil {
return nil, err
}
defer func(Body io.ReadCloser) {
_ = Body.Close()
}(resp.Body)
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode)
}
content, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return decodeNode(content)
}
return DecodeNode(content)
}

type SearchNode struct {
type node struct {
Key [][]byte
Data [][2]int
SubNodeAddress []int
}

func DecodeNode(data []byte) (*SearchNode, error) {
node := new(SearchNode)
func decodeNode(data []byte) (*node, error) {
node := new(node)
node.Key = [][]byte{}
node.Data = [][2]int{}
node.SubNodeAddress = []int{}
Expand Down Expand Up @@ -154,13 +199,10 @@ func DecodeNode(data []byte) (*SearchNode, error) {
return node, nil
}

func (s *Search) SearchNode(field string, key []byte, node *SearchNode) ([2]int, error) {
func (s *Search) searchNode(field string, key []byte, node *node) ([2]int, error) {
if node == nil {
return [2]int{}, fmt.Errorf("node is nil")
}

s.options.Logger.Debug().Ints("nodes", node.SubNodeAddress).Msg("nodes")

var found bool
var next int
if found, next = util.SliceContains(node.Key, key); found {
Expand All @@ -173,14 +215,14 @@ func (s *Search) SearchNode(field string, key []byte, node *SearchNode) ([2]int,
if node.SubNodeAddress[next] == 0 {
return [2]int{}, fmt.Errorf("non-root node address 0")
}
subNode, err := s.NodeByAddress(field, node.SubNodeAddress[next])
subNode, err := s.nodeByAddress(field, node.SubNodeAddress[next])
if err != nil {
return [2]int{}, fmt.Errorf("failed to retrive subNode %d", next)
}
return s.SearchNode(field, key, subNode)
return s.searchNode(field, key, subNode)
}

func (s *Search) TagSuggestionData(field string, data [2]int) ([]string, error) {
func (s *Search) tagSuggestionData(field string, data [2]int) ([]string, error) {
req, _ := http.NewRequest("GET", fmt.Sprintf("https://ltn.hitomi.la/tagindex/%s.%s.data", field, s.indexVersion["tagindex"]), nil)
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", data[0], data[0]+data[1]))
resp, err := s.options.Client.Do(req)
Expand All @@ -195,9 +237,9 @@ func (s *Search) TagSuggestionData(field string, data [2]int) ([]string, error)
return nil, err
}
var position = 4
suggestionsLength := int32(binary.BigEndian.Uint32(content[0:4]))
var suggestions = make([]string, suggestionsLength)
for i := int32(0); i < suggestionsLength; i++ {
suggestionLength := int32(binary.BigEndian.Uint32(content[0:4]))
var suggestions = make([]string, suggestionLength)
for i := int32(0); i < suggestionLength; i++ {
headerLength := int32(binary.BigEndian.Uint32(content[position : position+4]))
position += 4
header := string(content[position : position+int(headerLength)])
Expand Down
37 changes: 34 additions & 3 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,49 @@ import (
var search *Search

func TestSearch_SearchNode(t *testing.T) {
node, err := search.NodeByAddress("female", 0)
node, err := search.nodeByAddress("female", 0)
if err != nil {
t.Fatal(err)
}
data, err := search.SearchNode("female", HashTerm("big"), node)
data, err := search.searchNode("female", HashTerm("big"), node)
if err != nil {
t.Fatal(err)
}
t.Log(data)
tags, err := search.TagSuggestionData("female", data)
tags, err := search.tagSuggestionData("female", data)
if err != nil {
t.Fatal(err)
}
t.Log(tags)
}

func TestSearch_TagSuggestion(t *testing.T) {
result, err := search.TagSuggestion("tag:")
if err != nil {
t.Fatal(err)
}
t.Log(result)
}

func TestSearch_TagSuggestion_CacheWholeIndex(t *testing.T) {
csc := NewSearch(DefaultOptions().WithCacheWholeIndex(true))
result, err := csc.TagSuggestion("female:big")
if err != nil {
t.Fatal(err)
}
t.Log(result)
}

func BenchmarkSearch_TagSuggestion_CacheWholeIndex(b *testing.B) {
b.StopTimer()
csc := NewSearch(DefaultOptions().WithCacheWholeIndex(true))
_, _ = csc.TagSuggestion("female:")
b.Log("warmup done")
b.StartTimer()
for i := 0; i < b.N; i++ {
_, err := csc.TagSuggestion("female:big")
if err != nil {
b.Fatal(err)
}
}
}

0 comments on commit 1562734

Please sign in to comment.