Skip to content

Commit

Permalink
небольшой тюнинг поиска
Browse files Browse the repository at this point in the history
теперь точные совпадения (без учета
морфологии) будут выше в выдаче
  • Loading branch information
maxcom committed Jul 28, 2024
1 parent fdd93fc commit 31e0070
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 39 deletions.
31 changes: 15 additions & 16 deletions src/main/java/ru/org/linux/search/SearchController.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 1998-2022 Linux.org.ru
* Copyright 1998-2024 Linux.org.ru
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -25,7 +25,6 @@
import com.sksamuel.elastic4s.requests.searches.aggs.responses.bucket.TermBucket;
import com.sksamuel.elastic4s.requests.searches.aggs.responses.bucket.Terms;
import org.joda.time.DateTimeZone;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.validation.BindingResult;
Expand Down Expand Up @@ -57,20 +56,20 @@

@Controller
public class SearchController {
@Autowired
private SectionService sectionService;

@Autowired
private UserService userService;

@Autowired
private GroupDao groupDao;

@Autowired
private ElasticClient client;

@Autowired
private SearchResultsService resultsService;
private final SectionService sectionService;
private final UserService userService;
private final GroupDao groupDao;
private final ElasticClient client;
private final SearchResultsService resultsService;

public SearchController(SectionService sectionService, UserService userService, GroupDao groupDao,
ElasticClient client, SearchResultsService resultsService) {
this.sectionService = sectionService;
this.userService = userService;
this.groupDao = groupDao;
this.client = client;
this.resultsService = resultsService;
}

@ModelAttribute("sorts")
public static Map<String, String> getSorts() {
Expand Down
46 changes: 30 additions & 16 deletions src/main/scala/ru/org/linux/search/ElasticsearchIndexService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@
package ru.org.linux.search

import com.sksamuel.elastic4s.ElasticDsl.*
import com.sksamuel.elastic4s.requests.analyzers.*
import com.sksamuel.elastic4s.analysis.{Analysis, CustomAnalyzer, LengthTokenFilter, MappingCharFilter, SnowballTokenFilter, StandardTokenizer}
import com.sksamuel.elastic4s.requests.bulk.{BulkCompatibleRequest, BulkRequest}
import com.sksamuel.elastic4s.requests.indexes.IndexRequest
import com.sksamuel.elastic4s.requests.mappings.{MappingDefinition, TermVector}
import com.sksamuel.elastic4s.{ElasticClient, Index}
import com.typesafe.scalalogging.StrictLogging
import org.apache.commons.text.StringEscapeUtils
import org.joda.time.DateTime
import org.springframework.stereotype.Service
import ru.org.linux.comment.{Comment, CommentReadService}
import ru.org.linux.group.{Group, GroupDao}
Expand Down Expand Up @@ -54,19 +53,34 @@ object ElasticsearchIndexService {
longField("topic_id"),
textField("topic_title").index(false),
textField("title").analyzer("text_analyzer"),
textField("message").analyzer("text_analyzer").termVector(TermVector.WithPositionsOffsets),
textField("message").analyzer("text_analyzer").termVector(TermVector.WithPositionsOffsets).fields {
textField("raw").termVector(TermVector.WithPositionsOffsets).analyzer("exact_analyzer")
},
booleanField("topic_awaits_commit"))

private val Analyzers: Seq[CustomAnalyzerDefinition] = Seq(
CustomAnalyzerDefinition(
"text_analyzer",
tokenizer = StandardTokenizer("text_tokenizer"),
filters = Seq(
LengthTokenFilter("m_long_word").max(100),
LowercaseTokenFilter,
MappingCharFilter("m_ee", "ё" -> "е", "Ё" -> "Е"),
SnowballTokenFilter("m_my_snow_ru", "Russian"),
SnowballTokenFilter("m_my_snow_en", "English")))
private val Analyzers = Analysis(
analyzers = List(
CustomAnalyzer(
name = "text_analyzer",
tokenizer = "text_tokenizer",
tokenFilters = List("m_long_word", "lowercase", "m_my_snow_ru", "m_my_snow_en"),
charFilters = List("m_ee")),
CustomAnalyzer(
name = "exact_analyzer",
tokenizer = "text_tokenizer",
tokenFilters = List("m_long_word", "lowercase"),
charFilters = List("m_ee"))),
tokenizers = List(
StandardTokenizer("text_tokenizer")
),
tokenFilters = List(
LengthTokenFilter("m_long_word").max(100),
SnowballTokenFilter("m_my_snow_ru", "Russian"),
SnowballTokenFilter("m_my_snow_en", "English")
),
charFilters = List(
MappingCharFilter("m_ee", Map("ё" -> "е", "Ё" -> "Е"))
)
)
}

Expand Down Expand Up @@ -197,7 +211,7 @@ class ElasticsearchIndexService(sectionService: SectionService, groupDao: GroupD
"topic_title" -> topicTitle,
COLUMN_TOPIC_AWAITS_COMMIT -> topicAwaitsCommit(topic),
"message" -> message,
"postdate" -> new DateTime(comment.postdate),
"postdate" -> comment.postdate.toInstant,
"tag" -> topicTagService.getTags(topic),
"is_comment" -> true) ++ title.map("title" -> _)
)
Expand All @@ -213,7 +227,7 @@ class ElasticsearchIndexService(sectionService: SectionService, groupDao: GroupD
val section = sectionService.getSection(topic.sectionId)
val author = userService.getUserCached(topic.authorUserId)

indexInto(MessageIndexType) id topic.id.toString fields(
indexInto(MessageIndexType).id(topic.id.toString).fields(
"section" -> section.getUrlName,
"topic_author" -> author.getNick,
"topic_id" -> topic.id,
Expand All @@ -222,7 +236,7 @@ class ElasticsearchIndexService(sectionService: SectionService, groupDao: GroupD
"title" -> topic.getTitleUnescaped,
"topic_title" -> topic.getTitleUnescaped,
"message" -> messageTextService.extractPlainText(msgbaseDao.getMessageText(topic.id)),
"postdate" -> new DateTime(topic.postdate),
"postdate" -> topic.postdate.toInstant,
"tag" -> topicTagService.getTags(topic),
COLUMN_TOPIC_AWAITS_COMMIT -> topicAwaitsCommit(topic),
"is_comment" -> false)
Expand Down
18 changes: 11 additions & 7 deletions src/main/scala/ru/org/linux/search/SearchViewer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,20 @@ class SearchViewer(query: SearchRequest, elastic: ElasticClient) {
if (queryText.isEmpty) {
matchAllQuery()
} else {
boolQuery().
should(
MatchQuery("title", queryText).minimumShouldMatch("2"),
MatchQuery("message", queryText).minimumShouldMatch("2"),
matchPhraseQuery("message", queryText)).minimumShouldMatch(1)
boolQuery()
.must(
should(
MatchQuery("title", queryText).minimumShouldMatch("2"),
MatchQuery("message", queryText).minimumShouldMatch("2")))
.should(
matchPhraseQuery("message", queryText),
MatchQuery("message.raw", queryText).minimumShouldMatch("2")
).minimumShouldMatch(0)
}
}

private def boost(query: Query) = {
functionScoreQuery(query) functions(
functionScoreQuery(query).functions(
WeightScore(TopicBoost).filter(termQuery("is_comment", "false")),
WeightScore(RecentBoost).filter(rangeQuery("postdate").gte("now/d-3y"))
)
Expand All @@ -56,7 +60,7 @@ class SearchViewer(query: SearchRequest, elastic: ElasticClient) {
}
}

def performSearch(tz:DateTimeZone): SearchResponse = {
def performSearch(tz: DateTimeZone): SearchResponse = {
val typeFilter = Option(query.getRange.getValue) map { value =>
termQuery(query.getRange.getColumn, value)
}
Expand Down

1 comment on commit 31e0070

@blogdron
Copy link

@blogdron blogdron commented on 31e0070 Jul 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ураааaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!1!

Please sign in to comment.