JVM flags for disabling black and block-lists.
This commit is contained in:
parent
42afe490b7
commit
8274e8a953
@ -16,7 +16,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
|||||||
private volatile TIntHashSet spamDomainSet = new TIntHashSet();
|
private volatile TIntHashSet spamDomainSet = new TIntHashSet();
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
private final boolean blacklistDisabled = Boolean.getBoolean("no-domain-blacklist");
|
||||||
@Inject
|
@Inject
|
||||||
public DomainBlacklistImpl(HikariDataSource dataSource) {
|
public DomainBlacklistImpl(HikariDataSource dataSource) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
@ -27,6 +27,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void updateSpamList() {
|
private void updateSpamList() {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int oldSetSize = spamDomainSet.size();
|
int oldSetSize = spamDomainSet.size();
|
||||||
|
|
||||||
@ -46,6 +47,10 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
|||||||
public TIntHashSet getSpamDomains() {
|
public TIntHashSet getSpamDomains() {
|
||||||
final TIntHashSet result = new TIntHashSet(1_000_000);
|
final TIntHashSet result = new TIntHashSet(1_000_000);
|
||||||
|
|
||||||
|
if (blacklistDisabled) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
try (var connection = dataSource.getConnection()) {
|
try (var connection = dataSource.getConnection()) {
|
||||||
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP")) {
|
try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP")) {
|
||||||
stmt.setFetchSize(1000);
|
stmt.setFetchSize(1000);
|
||||||
@ -61,6 +66,7 @@ public class DomainBlacklistImpl implements DomainBlacklist {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isBlacklisted(int domainId) {
|
public boolean isBlacklisted(int domainId) {
|
||||||
|
|
||||||
if (spamDomainSet.contains(domainId)) {
|
if (spamDomainSet.contains(domainId)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,18 @@ public class IpBlockList {
|
|||||||
private final GeoIpBlocklist geoIpBlocklist;
|
private final GeoIpBlocklist geoIpBlocklist;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final List<SubnetUtils.SubnetInfo> badSubnets = new ArrayList<>();
|
private final List<SubnetUtils.SubnetInfo> badSubnets = new ArrayList<>();
|
||||||
|
private final boolean blocklistDisabled = Boolean.getBoolean("no-ip-blocklist");
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IpBlockList(GeoIpBlocklist geoIpBlocklist) {
|
public IpBlockList(GeoIpBlocklist geoIpBlocklist) {
|
||||||
this.geoIpBlocklist = geoIpBlocklist;
|
this.geoIpBlocklist = geoIpBlocklist;
|
||||||
|
|
||||||
|
if (blocklistDisabled) {
|
||||||
|
logger.warn("IP blocklist disabled");
|
||||||
|
// no point loading the list here
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
var resource = Objects.requireNonNull(
|
var resource = Objects.requireNonNull(
|
||||||
ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"),
|
ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"),
|
||||||
"Could not load IP blacklist");
|
"Could not load IP blacklist");
|
||||||
@ -52,6 +59,9 @@ public class IpBlockList {
|
|||||||
final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
|
final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
|
||||||
|
|
||||||
public boolean isAllowed(EdgeDomain domain) {
|
public boolean isAllowed(EdgeDomain domain) {
|
||||||
|
if (blocklistDisabled)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (domain.domain.endsWith(".cn")) {
|
if (domain.domain.endsWith(".cn")) {
|
||||||
logger.debug("Blocking {} on .cn-end", domain);
|
logger.debug("Blocking {} on .cn-end", domain);
|
||||||
return false;
|
return false;
|
||||||
@ -64,12 +74,15 @@ public class IpBlockList {
|
|||||||
try {
|
try {
|
||||||
var hostAddress = InetAddressCache.getAddress(domain).getHostAddress();
|
var hostAddress = InetAddressCache.getAddress(domain).getHostAddress();
|
||||||
var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst();
|
var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst();
|
||||||
|
|
||||||
if (subnet.isPresent()) {
|
if (subnet.isPresent()) {
|
||||||
logger.debug("Blocking {} on IP range: {}", domain, subnet.get());
|
logger.debug("Blocking {} on IP range: {}", domain, subnet.get());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
return false;
|
// Host failed ot resolve, deal with crawling error upstream
|
||||||
|
// to avoid flagging this as a blocked domain
|
||||||
}
|
}
|
||||||
|
|
||||||
var geo = geoIpBlocklist.isAllowed(domain);
|
var geo = geoIpBlocklist.isAllowed(domain);
|
||||||
|
Loading…
Reference in New Issue
Block a user