From 079942eda480f1e270a2d0437636d98830d06c58 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Tue, 1 Aug 2023 23:02:11 +0200 Subject: [PATCH] Improved media cleanup task --- .config/example.yml | 7 ++ packages/backend/src/config/types.ts | 7 ++ packages/backend/src/queue/index.ts | 15 +++++ .../object-storage/clean-remote-files.ts | 67 ++++++++++++------- 4 files changed, 71 insertions(+), 25 deletions(-) diff --git a/.config/example.yml b/.config/example.yml index 70fb38de6..69751739e 100644 --- a/.config/example.yml +++ b/.config/example.yml @@ -190,6 +190,13 @@ reservedUsernames: [ # Proxy remote files (default: false) #proxyRemoteFiles: true +# Media cleanup settings (defaults: false, 0, false, false) +#mediaCleanup: +# cron: true +# maxAgeDays: 30 +# cleanAvatars: false +# cleanHeaders: false + #allowedPrivateNetworks: [ # '127.0.0.1/32' #] diff --git a/packages/backend/src/config/types.ts b/packages/backend/src/config/types.ts index d41bf4831..89085bd1b 100644 --- a/packages/backend/src/config/types.ts +++ b/packages/backend/src/config/types.ts @@ -59,6 +59,13 @@ export type Source = { ssl: boolean; }; + mediaCleanup?: { + cron?: boolean; + maxAgeDays?: number; + keepAvatars?: boolean; + keepHeaders?: boolean; + }; + proxy?: string; proxySmtp?: string; proxyBypassHosts?: string[]; diff --git a/packages/backend/src/queue/index.ts b/packages/backend/src/queue/index.ts index 93aed7cb8..787fa848f 100644 --- a/packages/backend/src/queue/index.ts +++ b/packages/backend/src/queue/index.ts @@ -522,6 +522,21 @@ export default function () { endedPollNotificationQueue.process(endedPollNotification); webhookDeliverQueue.process(64, processWebhookDeliver); processDb(dbQueue); + + if (config.mediaCleanup?.cron) { + objectStorageQueue.add( + "cleanRemoteFiles", + {}, + { + repeat: { + cron: "0 0 * * *", + }, + removeOnComplete: true, + removeOnFail: true, + }, + ); + } + processObjectStorage(objectStorageQueue); processBackground(backgroundQueue); diff --git a/packages/backend/src/queue/processors/object-storage/clean-remote-files.ts b/packages/backend/src/queue/processors/object-storage/clean-remote-files.ts index fdfe05d1a..60be026e4 100644 --- a/packages/backend/src/queue/processors/object-storage/clean-remote-files.ts +++ b/packages/backend/src/queue/processors/object-storage/clean-remote-files.ts @@ -2,8 +2,10 @@ import type Bull from "bull"; import { queueLogger } from "../../logger.js"; import { deleteFileSync } from "@/services/drive/delete-file.js"; -import { DriveFiles } from "@/models/index.js"; +import { DriveFiles, Users } from "@/models/index.js"; import { MoreThan, Not, IsNull } from "typeorm"; +import { User } from "@/models/entities/user.js"; +import config from "@/config/index.js"; const logger = queueLogger.createSubLogger("clean-remote-files"); @@ -11,43 +13,58 @@ export default async function cleanRemoteFiles( job: Bull.Job>, done: any, ): Promise { - logger.info("Deleting cached remote files..."); + let progress = 0; + let until = new Date(); + until.setDate(until.getDate() - (config.mediaCleanup?.maxAgeDays ?? 0)); + const avatars = config.mediaCleanup?.avatars ?? false; + const headers = config.mediaCleanup?.headers ?? false; - let deletedCount = 0; - let cursor: any = null; + until = until.toISOString().replace("T", " ").replace("Z", ""); + + let target = "files"; + if (avatars) + if (headers) target += ", avatars & headers"; + else target += " & avatars"; + else if (headers) target += " & headers"; + + logger.info(`Deleting cached remote ${target} created before ${until}...`); + + let query = DriveFiles.createQueryBuilder("file") + .where(`file.isLink = FALSE`) + .andWhere(`file.userHost IS NOT NULL`) + .andWhere("file.createdAt < :until", { until }); + + if (!avatars || !headers) { + query = query.andWhere((qb) => { + let sq = qb.subQuery().from(User, "user"); + + if (!avatars) sq = sq.where("file.id = user.avatarId"); + if (!headers) sq = sq.orWhere("file.id = user.bannerId"); + + return `NOT EXISTS ${sq.getQuery()}`; + }); + } + + query = query.take(8); + + const total = await query.getCount(); + logger.info(`Deleting ${total} files, please wait...`); while (true) { - const files = await DriveFiles.find({ - where: { - userHost: Not(IsNull()), - isLink: false, - ...(cursor ? { id: MoreThan(cursor) } : {}), - }, - take: 8, - order: { - id: 1, - }, - }); + const files = await query.getMany(); if (files.length === 0) { job.progress(100); break; } - cursor = files[files.length - 1].id; - await Promise.all(files.map((file) => deleteFileSync(file, true))); - deletedCount += 8; + progress += files.length; - const total = await DriveFiles.countBy({ - userHost: Not(IsNull()), - isLink: false, - }); - - job.progress(deletedCount / total); + job.progress((progress / total) * 100); } - logger.succ("All cahced remote files has been deleted."); + logger.succ(`Remote media cleanup job completed successfully.`); done(); }