diff --git a/internal/util/util.go b/internal/util/util.go index 5d299ba..24d99a7 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -1,17 +1,32 @@ package util -import "bytes" +func compareByteSlices(slice1, slice2 []byte) int { + minLength := len(slice1) + if len(slice2) < minLength { + minLength = len(slice2) + } + + for i := 0; i < minLength; i++ { + if slice1[i] < slice2[i] { + return -1 + } else if slice1[i] > slice2[i] { + return 1 + } + } + + return 0 +} func SliceContains(S [][]byte, E []byte) (bool, int) { - for i := 0; i < len(S); i++ { - if t := bytes.Compare(S[i], E); t >= 0 { - if t == 0 { - return true, i - } - return false, i + cmpResult := -1 + i := 0 + for i = 0; i < len(S); i++ { + cmpResult = compareByteSlices(E, S[i]) + if cmpResult <= 0 { + break } } - return false, 0 + return cmpResult == 0, i } func IsLeaf(S []int) bool { diff --git a/options.go b/options.go index 46c4f6a..26ce8d1 100644 --- a/options.go +++ b/options.go @@ -9,9 +9,21 @@ import ( ) type Options struct { - Client *http.Client - Logger zerolog.Logger + Client *http.Client + Logger zerolog.Logger + + // Client-specific options + + // UpdateScriptInterval is an option to update the script every interval. + // if it is set to -1, it will never update the script. UpdateScriptInterval time.Duration + + // Search-specific options + + // CacheWholeIndex is an option to download the whole index and cache it. + // it can be extremely slow the first time (especially for gallery index) and consume much memory space, + // but it will be a lot faster when you search. + CacheWholeIndex bool } func (o *Options) WithClient(c *http.Client) *Options { @@ -29,10 +41,17 @@ func (o *Options) WithUpdateScriptInterval(t time.Duration) *Options { return o } +func (o *Options) WithCacheWholeIndex(b bool) *Options { + o.CacheWholeIndex = b + return o +} + func DefaultOptions() *Options { return &Options{ Client: &http.Client{}, Logger: log.Logger.With().Str("caller", "github.com/EINNN7/hitomi").Logger(), UpdateScriptInterval: -1, + + CacheWholeIndex: false, } } diff --git a/search.go b/search.go index 23e0cd9..87832e0 100644 --- a/search.go +++ b/search.go @@ -19,14 +19,17 @@ const MaxNodeSize = 464 type Search struct { options *Options - logger zerolog.Logger + logger zerolog.Logger + indexVersion map[string]string + indexCache map[string][]byte } func NewSearch(options *Options) *Search { return &Search{ options: options, indexVersion: map[string]string{}, + indexCache: map[string][]byte{}, } } @@ -46,12 +49,28 @@ func (s *Search) IndexVersion(name string) (string, error) { return string(version), nil } -func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) { - s.options.Logger.Debug().Msgf("retrieve node at %s:%d", field, address) +func (s *Search) TagSuggestion(query string) ([]string, error) { + field := strings.Split(query, ":") + if len(field) != 2 { + return nil, fmt.Errorf("invalid query: %s", query) + } + firstNode, err := s.nodeByAddress(field[0], 0) + if err != nil { + return nil, err + } + dataOffset, err := s.searchNode(field[0], HashTerm(field[1]), firstNode) + if err != nil { + return nil, fmt.Errorf("cannot find search result: %w", err) + } + return s.tagSuggestionData(field[0], dataOffset) +} + +func (s *Search) nodeByAddress(field string, address int) (*node, error) { var url string switch field { case "galleries": if _, ok := s.indexVersion["galleriesindex"]; !ok { + s.options.Logger.Debug().Msg("galleriesindex version not found, fetch fresh one") version, err := s.IndexVersion("galleriesindex") if err != nil { return nil, err @@ -61,6 +80,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) { url = fmt.Sprintf("https://ltn.hitomi.la/galleriesindex/galleries.%s.index", s.indexVersion["galleriesindex"]) case "languages": if _, ok := s.indexVersion["languagesindex"]; !ok { + s.options.Logger.Debug().Msg("languagesindex version not found, fetch fresh one") version, err := s.IndexVersion("languagesindex") if err != nil { return nil, err @@ -70,6 +90,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) { url = fmt.Sprintf("https://ltn.hitomi.la/languagesindex/languages.%s.index", s.indexVersion["languagesindex"]) case "nozomiurl": if _, ok := s.indexVersion["nozomiurlindex"]; !ok { + s.options.Logger.Debug().Msg("nozomiurlindex version not found, fetch fresh one") version, err := s.IndexVersion("nozomiurlindex") if err != nil { return nil, err @@ -79,6 +100,7 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) { url = fmt.Sprintf("https://ltn.hitomi.la/nozomiurlindex/nozomiurl.%s.index", s.indexVersion["nozomiurlindex"]) default: if _, ok := s.indexVersion["tagindex"]; !ok { + s.options.Logger.Debug().Msg("tagindex version not found, fetch fresh one") version, err := s.IndexVersion("tagindex") if err != nil { return nil, err @@ -87,34 +109,57 @@ func (s *Search) NodeByAddress(field string, address int) (*SearchNode, error) { } url = fmt.Sprintf("https://ltn.hitomi.la/tagindex/%s.%s.index", field, s.indexVersion["tagindex"]) } - s.options.Logger.Debug().Msgf("calling %s", url) - req, _ := http.NewRequest("GET", url, nil) - req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", address, address+MaxNodeSize-1)) - resp, err := s.options.Client.Do(req) - if err != nil { - return nil, err - } - defer func(Body io.ReadCloser) { - _ = Body.Close() - }(resp.Body) - if resp.StatusCode >= 400 { - return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode) - } - content, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err + if s.options.CacheWholeIndex { + if v, ok := s.indexCache[field]; ok { + return decodeNode(v[address : address+MaxNodeSize-1]) + } + s.options.Logger.Debug().Msgf("indexCache for %s not found, fetch fresh one", field) + req, _ := http.NewRequest("GET", url, nil) + resp, err := s.options.Client.Do(req) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode) + } + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + s.indexCache[field] = content + return decodeNode(content[address : address+MaxNodeSize-1]) + } else { + req, _ := http.NewRequest("GET", url, nil) + req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", address, address+MaxNodeSize-1)) + resp, err := s.options.Client.Do(req) + if err != nil { + return nil, err + } + defer func(Body io.ReadCloser) { + _ = Body.Close() + }(resp.Body) + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("failed to get node: %d", resp.StatusCode) + } + content, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + return decodeNode(content) } - return DecodeNode(content) } -type SearchNode struct { +type node struct { Key [][]byte Data [][2]int SubNodeAddress []int } -func DecodeNode(data []byte) (*SearchNode, error) { - node := new(SearchNode) +func decodeNode(data []byte) (*node, error) { + node := new(node) node.Key = [][]byte{} node.Data = [][2]int{} node.SubNodeAddress = []int{} @@ -154,13 +199,10 @@ func DecodeNode(data []byte) (*SearchNode, error) { return node, nil } -func (s *Search) SearchNode(field string, key []byte, node *SearchNode) ([2]int, error) { +func (s *Search) searchNode(field string, key []byte, node *node) ([2]int, error) { if node == nil { return [2]int{}, fmt.Errorf("node is nil") } - - s.options.Logger.Debug().Ints("nodes", node.SubNodeAddress).Msg("nodes") - var found bool var next int if found, next = util.SliceContains(node.Key, key); found { @@ -173,14 +215,14 @@ func (s *Search) SearchNode(field string, key []byte, node *SearchNode) ([2]int, if node.SubNodeAddress[next] == 0 { return [2]int{}, fmt.Errorf("non-root node address 0") } - subNode, err := s.NodeByAddress(field, node.SubNodeAddress[next]) + subNode, err := s.nodeByAddress(field, node.SubNodeAddress[next]) if err != nil { return [2]int{}, fmt.Errorf("failed to retrive subNode %d", next) } - return s.SearchNode(field, key, subNode) + return s.searchNode(field, key, subNode) } -func (s *Search) TagSuggestionData(field string, data [2]int) ([]string, error) { +func (s *Search) tagSuggestionData(field string, data [2]int) ([]string, error) { req, _ := http.NewRequest("GET", fmt.Sprintf("https://ltn.hitomi.la/tagindex/%s.%s.data", field, s.indexVersion["tagindex"]), nil) req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", data[0], data[0]+data[1])) resp, err := s.options.Client.Do(req) @@ -195,9 +237,9 @@ func (s *Search) TagSuggestionData(field string, data [2]int) ([]string, error) return nil, err } var position = 4 - suggestionsLength := int32(binary.BigEndian.Uint32(content[0:4])) - var suggestions = make([]string, suggestionsLength) - for i := int32(0); i < suggestionsLength; i++ { + suggestionLength := int32(binary.BigEndian.Uint32(content[0:4])) + var suggestions = make([]string, suggestionLength) + for i := int32(0); i < suggestionLength; i++ { headerLength := int32(binary.BigEndian.Uint32(content[position : position+4])) position += 4 header := string(content[position : position+int(headerLength)]) diff --git a/search_test.go b/search_test.go index f5b0afd..0a289f8 100644 --- a/search_test.go +++ b/search_test.go @@ -7,18 +7,49 @@ import ( var search *Search func TestSearch_SearchNode(t *testing.T) { - node, err := search.NodeByAddress("female", 0) + node, err := search.nodeByAddress("female", 0) if err != nil { t.Fatal(err) } - data, err := search.SearchNode("female", HashTerm("big"), node) + data, err := search.searchNode("female", HashTerm("big"), node) if err != nil { t.Fatal(err) } t.Log(data) - tags, err := search.TagSuggestionData("female", data) + tags, err := search.tagSuggestionData("female", data) if err != nil { t.Fatal(err) } t.Log(tags) } + +func TestSearch_TagSuggestion(t *testing.T) { + result, err := search.TagSuggestion("tag:") + if err != nil { + t.Fatal(err) + } + t.Log(result) +} + +func TestSearch_TagSuggestion_CacheWholeIndex(t *testing.T) { + csc := NewSearch(DefaultOptions().WithCacheWholeIndex(true)) + result, err := csc.TagSuggestion("female:big") + if err != nil { + t.Fatal(err) + } + t.Log(result) +} + +func BenchmarkSearch_TagSuggestion_CacheWholeIndex(b *testing.B) { + b.StopTimer() + csc := NewSearch(DefaultOptions().WithCacheWholeIndex(true)) + _, _ = csc.TagSuggestion("female:") + b.Log("warmup done") + b.StartTimer() + for i := 0; i < b.N; i++ { + _, err := csc.TagSuggestion("female:big") + if err != nil { + b.Fatal(err) + } + } +}