Experimental domain-searching feature

This commit is contained in:
vlofgren 2022-07-28 20:06:47 +02:00
parent 3c76043d88
commit ea5dbb301e
4 changed files with 9 additions and 5 deletions

View File

@ -18,7 +18,7 @@ public interface EdgeDataStoreDao {
List<BrowseResult> getRandomDomains(int count, EdgeDomainBlacklist backlist);
List<BrowseResult> getBrowseResultFromUrlIds(List<EdgeId<EdgeUrl>> urlId, int count);
List<BrowseResult> getBrowseResultFromUrlIds(List<EdgeId<EdgeUrl>> urlId);
List<EdgeUrlDetails> getUrlDetailsMulti(List<EdgeId<EdgeUrl>> ids);

View File

@ -266,7 +266,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
}
@Override
public List<BrowseResult> getBrowseResultFromUrlIds(List<EdgeId<EdgeUrl>> urlId, int count) {
public List<BrowseResult> getBrowseResultFromUrlIds(List<EdgeId<EdgeUrl>> urlId) {
if (urlId.isEmpty())
return Collections.emptyList();
@ -278,7 +278,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
String inStmt = urlId.stream().map(id -> Integer.toString(id.id())).collect(Collectors.joining(", ", "(", ")"));
var rsp = stmt.executeQuery("SELECT DOMAIN_ID, DOMAIN_NAME FROM EC_URL_VIEW INNER JOIN DOMAIN_METADATA ON EC_URL_VIEW.DOMAIN_ID=DOMAIN_METADATA.ID WHERE VISITED_URLS<500 AND QUALITY>-10 AND EC_URL_VIEW.ID IN " + inStmt + " ORDER BY RANK ASC");
while (rsp.next() && ret.size() < count) {
while (rsp.next()) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);

View File

@ -117,7 +117,7 @@ public class EdgeSearchOperator {
.flatMap(sq -> sq.searchTermsInclude.stream())
.distinct()
.flatMap(keyword ->
specs.buckets.stream().map(bucket -> new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, 2_000_000/specs.buckets.size(), 20, 25/specs.buckets.size()))
specs.buckets.stream().map(bucket -> new EdgeDomainSearchSpecification(bucket, IndexBlock.Title, keyword, 2_000_000/specs.buckets.size(), 10, 25))
)
.toArray(EdgeDomainSearchSpecification[]::new);
@ -127,7 +127,7 @@ public class EdgeSearchOperator {
List<EdgeId<EdgeUrl>> results = indexClient.queryDomains(ctx, requests)
.stream().flatMap(rs -> rs.results.stream()).distinct().toList();
return edgeDataStoreDao.getBrowseResultFromUrlIds(results, 5);
return edgeDataStoreDao.getBrowseResultFromUrlIds(results);
}
private String getEvalResult(@Nullable Future<String> eval) {

View File

@ -52,7 +52,11 @@ public class SearchCommand implements SearchCommandInterface {
DecoratedSearchResults results = searchOperator.doSearch(ctx, params, eval);
results.results.removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
results.domainResults.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
if (results.domainResults.size() > 5) {
results.domainResults.subList(5, results.domainResults.size()).clear();
}
return Optional.of(searchResultsRenderer.render(results));
}