JVM flags for disabling black and block-lists.

This commit is contained in:
Viktor Lofgren 2023-06-30 17:07:47 +02:00
parent 42afe490b7
commit 8274e8a953
2 changed files with 21 additions and 2 deletions

View File

@ -16,7 +16,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
private volatile TIntHashSet spamDomainSet = new TIntHashSet(); private volatile TIntHashSet spamDomainSet = new TIntHashSet();
private final HikariDataSource dataSource; private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final boolean blacklistDisabled = Boolean.getBoolean("no-domain-blacklist");
@Inject @Inject
public DomainBlacklistImpl(HikariDataSource dataSource) { public DomainBlacklistImpl(HikariDataSource dataSource) {
this.dataSource = dataSource; this.dataSource = dataSource;
@ -27,6 +27,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
} }
private void updateSpamList() { private void updateSpamList() {
try { try {
int oldSetSize = spamDomainSet.size(); int oldSetSize = spamDomainSet.size();
@ -46,6 +47,10 @@ public class DomainBlacklistImpl implements DomainBlacklist {
public TIntHashSet getSpamDomains() { public TIntHashSet getSpamDomains() {
final TIntHashSet result = new TIntHashSet(1_000_000); final TIntHashSet result = new TIntHashSet(1_000_000);
if (blacklistDisabled) {
return result;
}
try (var connection = dataSource.getConnection()) { try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP")) { try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP")) {
stmt.setFetchSize(1000); stmt.setFetchSize(1000);
@ -61,6 +66,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
@Override @Override
public boolean isBlacklisted(int domainId) { public boolean isBlacklisted(int domainId) {
if (spamDomainSet.contains(domainId)) { if (spamDomainSet.contains(domainId)) {
return true; return true;
} }

View File

@ -22,11 +22,18 @@ public class IpBlockList {
private final GeoIpBlocklist geoIpBlocklist; private final GeoIpBlocklist geoIpBlocklist;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final List<SubnetUtils.SubnetInfo> badSubnets = new ArrayList<>(); private final List<SubnetUtils.SubnetInfo> badSubnets = new ArrayList<>();
private final boolean blocklistDisabled = Boolean.getBoolean("no-ip-blocklist");
@Inject @Inject
public IpBlockList(GeoIpBlocklist geoIpBlocklist) { public IpBlockList(GeoIpBlocklist geoIpBlocklist) {
this.geoIpBlocklist = geoIpBlocklist; this.geoIpBlocklist = geoIpBlocklist;
if (blocklistDisabled) {
logger.warn("IP blocklist disabled");
// no point loading the list here
return;
}
var resource = Objects.requireNonNull( var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"), ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"),
"Could not load IP blacklist"); "Could not load IP blacklist");
@ -52,6 +59,9 @@ public class IpBlockList {
final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate(); final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
public boolean isAllowed(EdgeDomain domain) { public boolean isAllowed(EdgeDomain domain) {
if (blocklistDisabled)
return true;
if (domain.domain.endsWith(".cn")) { if (domain.domain.endsWith(".cn")) {
logger.debug("Blocking {} on .cn-end", domain); logger.debug("Blocking {} on .cn-end", domain);
return false; return false;
@ -64,12 +74,15 @@ public class IpBlockList {
try { try {
var hostAddress = InetAddressCache.getAddress(domain).getHostAddress(); var hostAddress = InetAddressCache.getAddress(domain).getHostAddress();
var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst(); var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst();
if (subnet.isPresent()) { if (subnet.isPresent()) {
logger.debug("Blocking {} on IP range: {}", domain, subnet.get()); logger.debug("Blocking {} on IP range: {}", domain, subnet.get());
return false; return false;
} }
} catch (Throwable t) { } catch (Throwable t) {
return false; // Host failed ot resolve, deal with crawling error upstream
// to avoid flagging this as a blocked domain
} }
var geo = geoIpBlocklist.isAllowed(domain); var geo = geoIpBlocklist.isAllowed(domain);