Implement Meilisearch Indexing

This commit is contained in:
PrivateGER 2023-05-25 00:55:33 +02:00 committed by PrivateGER
parent 9e0e305aef
commit 99b4e5e13c
6 changed files with 177 additions and 5 deletions

View File

@ -85,6 +85,7 @@
"koa-send": "5.0.1",
"koa-slow": "2.1.0",
"koa-views": "7.0.2",
"meilisearch": "^0.32.4",
"mfm-js": "0.23.3",
"mime-types": "2.1.35",
"multer": "1.4.4-lts.1",

View File

@ -40,6 +40,12 @@ export type Source = {
bucket?: string;
};
meilisearch: {
host: string;
port: number;
apiKey?: string;
};
proxy?: string;
proxySmtp?: string;
proxyBypassHosts?: string[];

View File

@ -0,0 +1,61 @@
import { MeiliSearch } from 'meilisearch';
import { dbLogger } from "./logger.js";
import config from "@/config/index.js";
import {Note} from "@/models/entities/note";
import {normalizeForSearch} from "@/misc/normalize-for-search";
const logger = dbLogger.createSubLogger("meilisearch", "gray", false);
logger.info("Connecting to MeiliSearch");
const hasConfig =
config.meilisearch && (config.meilisearch.host || config.meilisearch.port || config.meilisearch.apiKey);
const host = hasConfig ? config.meilisearch.host ?? "localhost" : "";
const port = hasConfig ? config.meilisearch.port ?? 7700 : 0;
const auth = hasConfig ? config.meilisearch.apiKey ?? "" : "";
const client = new MeiliSearch({
host: 'http://127.0.0.1:7700',
apiKey: 'masterKey',
})
const posts = client.index('posts');
posts.updateSearchableAttributes(['text']);
logger.info("Connected to MeiliSearch");
export type MeilisearchNote = {
id: string;
text: string;
userId: string;
userHost: string;
channelId: string;
}
export default hasConfig ? {
search: (query : string, limit : number, offset : number) => {
logger.info(`Searching for ${query}`);
return posts.search(query, {
limit: limit,
offset: offset,
});
},
ingestNote: (note : Note) => {
logger.info("Indexing note in MeiliSearch: " + note.id);
return posts.addDocuments([
{
id: note.id.toString(),
text: note.text,
userId: note.userId,
userHost: note.userHost,
channelId: note.channelId,
}
])
},
} : null;

View File

@ -4,6 +4,7 @@ import { Note } from "@/models/entities/note.js";
import config from "@/config/index.js";
import es from "../../../../db/elasticsearch.js";
import sonic from "../../../../db/sonic.js";
import meilisearch, {MeilisearchNote} from "../../../../db/meilisearch.js";
import define from "../../define.js";
import { makePaginationQuery } from "../../common/make-pagination-query.js";
import { generateVisibilityQuery } from "../../common/generate-visibility-query.js";
@ -62,7 +63,7 @@ export const paramDef = {
} as const;
export default define(meta, paramDef, async (ps, me) => {
if (es == null && sonic == null) {
if (es == null && sonic == null && meilisearch == null) {
const query = makePaginationQuery(
Notes.createQueryBuilder("note"),
ps.sinceId,
@ -171,6 +172,73 @@ export default define(meta, paramDef, async (ps, me) => {
}
return found;
} else if(meilisearch) {
let start = 0;
const chunkSize = 100;
// Use meilisearch to fetch and step through all search results that could match the requirements
const ids = [];
while (true) {
const results = await meilisearch.search(ps.query, start, chunkSize);
start += chunkSize;
if (results.hits.length === 0) {
break;
}
const res = results.hits
.filter((key) => {
let note = key as MeilisearchNote;
if (ps.userId && note.userId !== ps.userId) {
return false;
}
if (ps.channelId && note.channelId !== ps.channelId) {
return false;
}
if (ps.sinceId && note.id <= ps.sinceId) {
return false;
}
if (ps.untilId && note.id >= ps.untilId) {
return false;
}
return true;
})
.map((key) => key.id);
ids.push(...res);
}
// Sort all the results by note id DESC (newest first)
ids.sort((a, b) => b - a);
// Fetch the notes from the database until we have enough to satisfy the limit
start = 0;
const found = [];
while (found.length < ps.limit && start < ids.length) {
const chunk = ids.slice(start, start + chunkSize);
const notes: Note[] = await Notes.find({
where: {
id: In(chunk),
},
order: {
id: "DESC",
},
});
// The notes are checked for visibility and muted/blocked users when packed
found.push(...(await Notes.packMany(notes, me)));
start += chunkSize;
}
// If we have more results than the limit, trim them
if (found.length > ps.limit) {
found.length = ps.limit;
}
return found;
} else {
const userQuery =
ps.userId != null

View File

@ -67,6 +67,7 @@ import type { UserProfile } from "@/models/entities/user-profile.js";
import { db } from "@/db/postgre.js";
import { getActiveWebhooks } from "@/misc/webhook-cache.js";
import { shouldSilenceInstance } from "@/misc/should-block-instance.js";
import meilisearch from "@/db/meilisearch";
const mutedWordsCache = new Cache<
{ userId: UserProfile["userId"]; mutedWords: UserProfile["mutedWords"] }[]
@ -776,6 +777,10 @@ export async function index(note: Note): Promise<void> {
note.text,
);
}
if (meilisearch) {
await meilisearch.ingestNote(note);
}
}
async function notifyToWatchersOfRenotee(

View File

@ -263,6 +263,9 @@ importers:
koa-views:
specifier: 7.0.2
version: 7.0.2(@types/koa@2.13.5)(ejs@3.1.8)(pug@3.0.2)
meilisearch:
specifier: ^0.32.4
version: 0.32.4
mfm-js:
specifier: 0.23.3
version: 0.23.3
@ -2819,7 +2822,7 @@ packages:
'@types/webgl-ext': 0.0.30
'@webgpu/types': 0.1.16
long: 4.0.0
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
transitivePeerDependencies:
- encoding
@ -2835,7 +2838,7 @@ packages:
'@types/webgl-ext': 0.0.30
'@webgpu/types': 0.1.21
long: 4.0.0
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
transitivePeerDependencies:
- encoding
@ -2849,7 +2852,7 @@ packages:
dependencies:
'@tensorflow/tfjs-core': 3.21.0
'@types/node-fetch': 2.6.2
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
string_decoder: 1.3.0
transitivePeerDependencies:
@ -2864,7 +2867,7 @@ packages:
dependencies:
'@tensorflow/tfjs-core': 4.2.0
'@types/node-fetch': 2.6.2
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
string_decoder: 1.3.0
transitivePeerDependencies:
@ -5938,6 +5941,14 @@ packages:
- encoding
dev: true
/cross-fetch@3.1.6:
resolution: {integrity: sha512-riRvo06crlE8HiqOwIpQhxwdOk4fOeR7FVM/wXoxchFEqMNUjvbs3bfo4OTgMEMHzppd4DxFBDbyySj8Cv781g==}
dependencies:
node-fetch: 2.6.11
transitivePeerDependencies:
- encoding
dev: false
/cross-spawn@5.1.0:
resolution: {integrity: sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A==}
dependencies:
@ -10386,6 +10397,14 @@ packages:
engines: {node: '>= 0.6'}
dev: false
/meilisearch@0.32.4:
resolution: {integrity: sha512-QvPtQ6F2TaqAT9fw072/MDjSCMpQifdtUBFeIk3M5jSnFpeSiv1iwfJWNfP6ByaCgR/s++K1Cqtf9vjcZe7prg==}
dependencies:
cross-fetch: 3.1.6
transitivePeerDependencies:
- encoding
dev: false
/meow@9.0.0:
resolution: {integrity: sha512-+obSblOQmRhcyBt62furQqRAQpNyWXo8BuQ5bN7dG8wmwQ+vwHKp/rCFD4CrTP8CsDQD1sjoZ94K417XEUk8IQ==}
engines: {node: '>=10'}
@ -10854,6 +10873,18 @@ packages:
resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
engines: {node: '>=10.5.0'}
/node-fetch@2.6.11:
resolution: {integrity: sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==}
engines: {node: 4.x || >=6.0.0}
peerDependencies:
encoding: ^0.1.0
peerDependenciesMeta:
encoding:
optional: true
dependencies:
whatwg-url: 5.0.0
dev: false
/node-fetch@2.6.7:
resolution: {integrity: sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==}
engines: {node: 4.x || >=6.0.0}