From 296ccc5f8e7c0a595c19e39015b44969efb89044 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sun, 18 Feb 2024 08:16:48 +0100 Subject: [PATCH] (blacklist) Clean up blacklist impl The domain blacklist blocked the start-up of each process that injected it, adding like 30 seconds to the start-up time in prod. This change moves the loading to a separate thread entirely. For threads or processes that require the blacklist to be definitely loaded, a helper method was added that blocks until that time. --- .../nu/marginalia/db/DomainBlacklistImpl.java | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java index 3ccc7731..662b5344 100644 --- a/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java @@ -4,11 +4,10 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import gnu.trove.set.hash.TIntHashSet; -import io.reactivex.rxjava3.schedulers.Schedulers; -import lombok.SneakyThrows; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.sql.SQLException; import java.util.concurrent.TimeUnit; @Singleton @@ -17,33 +16,67 @@ public class DomainBlacklistImpl implements DomainBlacklist { private final HikariDataSource dataSource; private final Logger logger = LoggerFactory.getLogger(getClass()); private final boolean blacklistDisabled = Boolean.getBoolean("blacklist.disable"); + + private volatile boolean isLoaded = false; + @Inject public DomainBlacklistImpl(HikariDataSource dataSource) { this.dataSource = dataSource; - Schedulers.io().schedulePeriodicallyDirect(this::updateSpamList, 5, 600, TimeUnit.SECONDS); - - updateSpamList(); + Thread.ofPlatform().daemon().name("BlacklistUpdater").start(this::updateSpamList); } private void updateSpamList() { + // If the blacklist is disabled, we don't need to do anything + if (blacklistDisabled) { + isLoaded = true; - try { - int oldSetSize = spamDomainSet.size(); + flagLoaded(); + return; + } + + for (;;) { spamDomainSet = getSpamDomains(); - if (oldSetSize == 0 && spamDomainSet.size() > 0) { - logger.info("Synchronized {} spam domains", spamDomainSet.size()); + // Set the flag to true after the first loading attempt, regardless of success, + // to avoid deadlocking threads that are waiting for this condition + flagLoaded(); + + // Sleep for 10 minutes before trying again + try { + TimeUnit.MINUTES.sleep(10); + } + catch (InterruptedException ex) { + break; } } - catch (Exception ex) { - logger.error("Failed to synchronize spam domains", ex); + + } + + private void flagLoaded() { + if (!isLoaded) { + synchronized (this) { + isLoaded = true; + notifyAll(); + } } } + /** Block until the blacklist has been loaded */ + public boolean waitUntilLoaded() throws InterruptedException { + if (!isLoaded) { + synchronized (this) { + while (!isLoaded) { + wait(5000); + } + } + } + + return true; + } + - @SneakyThrows public TIntHashSet getSpamDomains() { final TIntHashSet result = new TIntHashSet(1_000_000); @@ -52,15 +85,25 @@ public class DomainBlacklistImpl implements DomainBlacklist { } try (var connection = dataSource.getConnection()) { - try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)")) { + try (var stmt = connection.prepareStatement(""" + SELECT EC_DOMAIN.ID + FROM EC_DOMAIN + INNER JOIN EC_DOMAIN_BLACKLIST + ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP + OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME) + """)) + { stmt.setFetchSize(1000); var rsp = stmt.executeQuery(); while (rsp.next()) { result.add(rsp.getInt(1)); } } + } catch (SQLException ex) { + logger.error("Failed to load spam domain list", ex); } + return result; }