(blacklist) Clean up blacklist impl

The domain blacklist blocked the start-up of each process that injected it, adding like 30 seconds to the start-up time in prod.

This change moves the loading to a separate thread entirely.  For threads or processes that require the blacklist to be definitely loaded, a helper method was added that blocks until that time.
This commit is contained in:
Viktor Lofgren 2024-02-18 08:16:48 +01:00
parent 8cb5825617
commit 296ccc5f8e

View File

@ -4,11 +4,10 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.set.hash.TIntHashSet;
import io.reactivex.rxjava3.schedulers.Schedulers;
import lombok.SneakyThrows;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.concurrent.TimeUnit;
@Singleton
@ -17,33 +16,67 @@ public class DomainBlacklistImpl implements DomainBlacklist {
private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final boolean blacklistDisabled = Boolean.getBoolean("blacklist.disable");
private volatile boolean isLoaded = false;
@Inject
public DomainBlacklistImpl(HikariDataSource dataSource) {
this.dataSource = dataSource;
Schedulers.io().schedulePeriodicallyDirect(this::updateSpamList, 5, 600, TimeUnit.SECONDS);
updateSpamList();
Thread.ofPlatform().daemon().name("BlacklistUpdater").start(this::updateSpamList);
}
private void updateSpamList() {
// If the blacklist is disabled, we don't need to do anything
if (blacklistDisabled) {
isLoaded = true;
try {
int oldSetSize = spamDomainSet.size();
flagLoaded();
return;
}
for (;;) {
spamDomainSet = getSpamDomains();
if (oldSetSize == 0 && spamDomainSet.size() > 0) {
logger.info("Synchronized {} spam domains", spamDomainSet.size());
// Set the flag to true after the first loading attempt, regardless of success,
// to avoid deadlocking threads that are waiting for this condition
flagLoaded();
// Sleep for 10 minutes before trying again
try {
TimeUnit.MINUTES.sleep(10);
}
}
catch (Exception ex) {
logger.error("Failed to synchronize spam domains", ex);
catch (InterruptedException ex) {
break;
}
}
}
private void flagLoaded() {
if (!isLoaded) {
synchronized (this) {
isLoaded = true;
notifyAll();
}
}
}
/** Block until the blacklist has been loaded */
public boolean waitUntilLoaded() throws InterruptedException {
if (!isLoaded) {
synchronized (this) {
while (!isLoaded) {
wait(5000);
}
}
}
return true;
}
@SneakyThrows
public TIntHashSet getSpamDomains() {
final TIntHashSet result = new TIntHashSet(1_000_000);
@ -52,15 +85,25 @@ public class DomainBlacklistImpl implements DomainBlacklist {
}
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)")) {
try (var stmt = connection.prepareStatement("""
SELECT EC_DOMAIN.ID
FROM EC_DOMAIN
INNER JOIN EC_DOMAIN_BLACKLIST
ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP
OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)
"""))
{
stmt.setFetchSize(1000);
var rsp = stmt.executeQuery();
while (rsp.next()) {
result.add(rsp.getInt(1));
}
}
} catch (SQLException ex) {
logger.error("Failed to load spam domain list", ex);
}
return result;
}