JVM flags for disabling black and block-lists.
This commit is contained in:
parent
42afe490b7
commit
8274e8a953
@ -16,7 +16,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
||||
private volatile TIntHashSet spamDomainSet = new TIntHashSet();
|
||||
private final HikariDataSource dataSource;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final boolean blacklistDisabled = Boolean.getBoolean("no-domain-blacklist");
|
||||
@Inject
|
||||
public DomainBlacklistImpl(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
@ -27,6 +27,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
||||
}
|
||||
|
||||
private void updateSpamList() {
|
||||
|
||||
try {
|
||||
int oldSetSize = spamDomainSet.size();
|
||||
|
||||
@ -46,6 +47,10 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
||||
public TIntHashSet getSpamDomains() {
|
||||
final TIntHashSet result = new TIntHashSet(1_000_000);
|
||||
|
||||
if (blacklistDisabled) {
|
||||
return result;
|
||||
}
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP")) {
|
||||
stmt.setFetchSize(1000);
|
||||
@ -61,6 +66,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
||||
|
||||
@Override
|
||||
public boolean isBlacklisted(int domainId) {
|
||||
|
||||
if (spamDomainSet.contains(domainId)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -22,11 +22,18 @@ public class IpBlockList {
|
||||
private final GeoIpBlocklist geoIpBlocklist;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final List<SubnetUtils.SubnetInfo> badSubnets = new ArrayList<>();
|
||||
private final boolean blocklistDisabled = Boolean.getBoolean("no-ip-blocklist");
|
||||
|
||||
@Inject
|
||||
public IpBlockList(GeoIpBlocklist geoIpBlocklist) {
|
||||
this.geoIpBlocklist = geoIpBlocklist;
|
||||
|
||||
if (blocklistDisabled) {
|
||||
logger.warn("IP blocklist disabled");
|
||||
// no point loading the list here
|
||||
return;
|
||||
}
|
||||
|
||||
var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"),
|
||||
"Could not load IP blacklist");
|
||||
@ -52,6 +59,9 @@ public class IpBlockList {
|
||||
final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
|
||||
|
||||
public boolean isAllowed(EdgeDomain domain) {
|
||||
if (blocklistDisabled)
|
||||
return true;
|
||||
|
||||
if (domain.domain.endsWith(".cn")) {
|
||||
logger.debug("Blocking {} on .cn-end", domain);
|
||||
return false;
|
||||
@ -64,12 +74,15 @@ public class IpBlockList {
|
||||
try {
|
||||
var hostAddress = InetAddressCache.getAddress(domain).getHostAddress();
|
||||
var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst();
|
||||
|
||||
if (subnet.isPresent()) {
|
||||
logger.debug("Blocking {} on IP range: {}", domain, subnet.get());
|
||||
return false;
|
||||
}
|
||||
|
||||
} catch (Throwable t) {
|
||||
return false;
|
||||
// Host failed ot resolve, deal with crawling error upstream
|
||||
// to avoid flagging this as a blocked domain
|
||||
}
|
||||
|
||||
var geo = geoIpBlocklist.isAllowed(domain);
|
||||
|
Loading…
Reference in New Issue
Block a user