(blacklist) Clean up blacklist impl

The domain blacklist blocked the start-up of each process that injected it, adding like 30 seconds to the start-up time in prod.

This change moves the loading to a separate thread entirely.  For threads or processes that require the blacklist to be definitely loaded, a helper method was added that blocks until that time.
This commit is contained in:
Viktor Lofgren 2024-02-18 08:16:48 +01:00
parent 8cb5825617
commit 296ccc5f8e

View File

@ -4,11 +4,10 @@ import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.set.hash.TIntHashSet; import gnu.trove.set.hash.TIntHashSet;
import io.reactivex.rxjava3.schedulers.Schedulers;
import lombok.SneakyThrows;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@Singleton @Singleton
@ -17,33 +16,67 @@ public class DomainBlacklistImpl implements DomainBlacklist {
private final HikariDataSource dataSource; private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final boolean blacklistDisabled = Boolean.getBoolean("blacklist.disable"); private final boolean blacklistDisabled = Boolean.getBoolean("blacklist.disable");
private volatile boolean isLoaded = false;
@Inject @Inject
public DomainBlacklistImpl(HikariDataSource dataSource) { public DomainBlacklistImpl(HikariDataSource dataSource) {
this.dataSource = dataSource; this.dataSource = dataSource;
Schedulers.io().schedulePeriodicallyDirect(this::updateSpamList, 5, 600, TimeUnit.SECONDS); Thread.ofPlatform().daemon().name("BlacklistUpdater").start(this::updateSpamList);
updateSpamList();
} }
private void updateSpamList() { private void updateSpamList() {
// If the blacklist is disabled, we don't need to do anything
if (blacklistDisabled) {
isLoaded = true;
try { flagLoaded();
int oldSetSize = spamDomainSet.size();
return;
}
for (;;) {
spamDomainSet = getSpamDomains(); spamDomainSet = getSpamDomains();
if (oldSetSize == 0 && spamDomainSet.size() > 0) { // Set the flag to true after the first loading attempt, regardless of success,
logger.info("Synchronized {} spam domains", spamDomainSet.size()); // to avoid deadlocking threads that are waiting for this condition
flagLoaded();
// Sleep for 10 minutes before trying again
try {
TimeUnit.MINUTES.sleep(10);
}
catch (InterruptedException ex) {
break;
} }
} }
catch (Exception ex) {
logger.error("Failed to synchronize spam domains", ex); }
private void flagLoaded() {
if (!isLoaded) {
synchronized (this) {
isLoaded = true;
notifyAll();
}
} }
} }
/** Block until the blacklist has been loaded */
public boolean waitUntilLoaded() throws InterruptedException {
if (!isLoaded) {
synchronized (this) {
while (!isLoaded) {
wait(5000);
}
}
}
return true;
}
@SneakyThrows
public TIntHashSet getSpamDomains() { public TIntHashSet getSpamDomains() {
final TIntHashSet result = new TIntHashSet(1_000_000); final TIntHashSet result = new TIntHashSet(1_000_000);
@ -52,15 +85,25 @@ public class DomainBlacklistImpl implements DomainBlacklist {
} }
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)")) { try (var stmt = connection.prepareStatement("""
SELECT EC_DOMAIN.ID
FROM EC_DOMAIN
INNER JOIN EC_DOMAIN_BLACKLIST
ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP
OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)
"""))
{
stmt.setFetchSize(1000); stmt.setFetchSize(1000);
var rsp = stmt.executeQuery(); var rsp = stmt.executeQuery();
while (rsp.next()) { while (rsp.next()) {
result.add(rsp.getInt(1)); result.add(rsp.getInt(1));
} }
} }
} catch (SQLException ex) {
logger.error("Failed to load spam domain list", ex);
} }
return result; return result;
} }